# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(01\d{14})(17\d{6})?(10\w{1,20})?(21\w{1,20})?$"
test_str = ("01084700069811461719010010285322921DA192089940088\n"
"01084700088763891719050010BM2120\n\n"
"Each block start with a fixed code and is followed by digits o characters\n\n"
"01 + 14 digits\n"
"17 + 6 digits\n"
"10 + from 1 to 20 characters\n"
"21 + from 1 to 20 characters\n\n"
"0108470006981146 17190100 102853229 21DA192089940088\n\n"
"0108470008876389 17190500 10BM2120")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html