# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\d{4}( [A-a]+).+?(?=\d{4}( [A-a]+) | $)"
test_str = "1201 FOEZELOLIE 3 F1 II 3 1 L E2 P001 IBC02 R001MP19 T4 TP1 LGBF FL 2(D/E)S2, S2033 1201 FOEZELOLIE1201 FOEZELOLIE 3 F1 III 3 5 L E1 P001 IBC03 LP01 R001MP19 T2 TP1 LGBF FL 3(D/E)V12 S2 30 1201 FOEZELOLIE1202 DIESELOLIE of GASOLIE of STOOKOLIE, LICHT (vlampunt niet meer dan 60 °C)3 F1 III 3 640K 6645 L E1 P001 IBC03 LP01 R001MP19 T2 TP1 LGBF FL 3(D/E)V12 S2 30 1202 DIESELOLIE of GASOLIE of STOOKOLIE, LICHT (vlampunt niet meer dan 60 °C)1202 DIESELOLIE overeenkomstig norm EN 590:2013 + AC:2017 of GASOLIE of STOOKOLIE, LICHT met een vlampunt overeenkomstig norm EN 590:2013 +A1:20173 F1 III 3 640L 6645 L E1 P001 IBC03 LP01 R001MP19 T2 TP1 LGBF AT 3(D/E)V12 S2 30 1202 DIESELOLIE overeenkomstig norm EN 590:2013 + AC:2017 of GASOLIE of STOOKOLIE, LICHT met een vlampunt overeenkomstig norm EN 590:2013 +A1:20171202 DIESELOLIE of GASOLIE of STOOKOLIE, LICHT (vlampunt hoger dan 60 °C maar niet hoger dan 100 °C )3 F1 III 3 640M 6645 L E1 P001 IBC03 LP01 R001MP19 T2 TP1 LGBV AT 3(D/E)V12 30 1202 DIESELOLIE of GASOLIE of STOOKOLIE, LICHT (vlampunt hoger dan 60 °C maar niet hoger dan 100 °C )1203 BENZINE (motorbrandstof) 3 F1 II 3 243534 6641 L E2 P001 IBC02 R001 BB2MP19 T4 TP1 LGBF TU9 FL 2(D/E)S2, S2033 1203 BENZINE (motorbrandstof)1204 NITROGLYCERINE, OPLOSSING IN ALCOHOL, met niet meer dan 1 % nitroglycerine3 D II 3 601 1 L E0 P001 IBC02PP5 MP2 2(B)S2, S141204 NITROGLYCERINE, OPLOSSING IN ALCOHOL, met niet meer dan 1 % nitroglycerine1206 HEPTANEN 3 F1 II 3 1 L E2 P001 IBC02 R001MP19 T4 TP1 LGBF FL 2(D/E)S2, S2033 1206 HEPTANEN1207 HEXALDEHYDE 3 F1 III 3 5 L E1 P001 IBC03 LP01 R001MP19 T2 TP1 LGBF FL 3(D/E)V12 S2 30 "
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html