# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"([a-zA-z0-9 ]+)|([?,.!:\'\"=_\-%#@\&\])([a-zA-z0-9 ]|$)"
test_str = ("Hello!Friend of my friend ... how are you! I'm tired, don ' t I! Two differences: one, two , three,four, five! !Hey!Hey =) How are you?\n\n"
"Oh , we've got plenty of time . __eou__ We haven't . Hurry up ! __eou__ All right.Let ' s go . __eou__ And about time too . __eou__\n"
"line: What dressing would you like on the salad ? __eou__ French dressing , please . __eou__ I beg your pardon ? __eou__ Oh , French dressing . __eou__ Sorry , it's not available now.Anything else ? __eou__ We still prefer French dressing . __eou__ Will you say it again ? __eou__ Who's your manager ? Tell me ! Would you ? __eou__\n"
"line: Excuse me , can I use your computer to type my paper ? __eou__ No problem . __eou__ I am afraid I can't finish typing it this afternoon.When will you use it tonight ? __eou__ Oh , Never mind , I finished my paper.So you can use it tonight . __eou__ Thanks a lot ! __eou__")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html