import re
regex = re.compile(r"^(\d{3}) .*(?:\r?\n\1.*)*", flags=re.MULTILINE)
test_str = ("021 Line one of section A. \n"
"021 Line two of Section A. \n"
"021 Line three of section A. \n"
"021 Part two of Line three of Section A. \n"
"021 We just skipped line four, but that's okay. \n"
"021 Back to line six. \n"
"Non-formatted lines to be ignored. This can be from 0 lines, to any number of lines, and the content can be any text. \n"
"033 Line 1 of Section B \n"
"033 Line 2 of Section B \n"
"033 Okay, that's enough.")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html