# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r".*begin|end.*|[^e]*?\b([A-Z]{2,})\b"
test_str = ("Lorem ipsum dolor sit amet, consectetur adipiscing elit. \n"
"NOT MATCHED in HERE Vestibulum vehicula arcu cursus justo luctus gravida. Praesent elefur dictum sollicitudin. \n"
"begin\n"
"Nullam mi justo, pulvinar sit amet leo sed, commodo malesuada nulla. \n"
"but MATCHED HERE Donec lobortis arcu eget turpis commodo, sed interdum leo OK elefur. \n"
"end\n"
"NOT MATCHED HERE \n"
"Cras venenatis felis vel nisi elementum varius. \n"
"Proin TEST sed magna sed erat consequat pretium eu sit amet nibh.")
matches = re.finditer(regex, test_str, re.MULTILINE | re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html