# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"[^!?。\.\!\?]+[!?。\.\!\?]?"
test_str = "热带风暴尚塔尔是2001年大西洋飓风季的一场在8月穿越了加勒比海的北大西洋热带气旋。尚塔尔于8月14日由热带大西洋的一股东风波发展而成,其存在的大部分时间里都在快速向西移动,退化成东风波后穿越了向风群岛。"
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html