# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\s[0-9]\s([A-Z]{2,3})([0-9\s]{4})\s([A-Z])\s\s([A-Z]{1,2})\s([0-9]{1,2})(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s\s([A-Z]{3})([A-Z]{3})\s([A-Z0-9]{3})\s(X|\s)\s([0-9]{2})([0-9]{2})\s([0-9]{2})([0-9]{2})(\s|\*)([A-Z]*)\s(.*)"
test_str = ("1.3SAHIN/FATMAMRS/MUSTAFAMR/SEMIHMSTR 4.I/1SAHIN/MUSABMSTR\n"
"2ZSTKOS 13SEP UNBFML\n"
" 1 TK1616 W WE 25NOV FRASAW HK3 X 1745 2145 Y M01.1E\n"
" 2 TK7230 Y WE 25NOV SAWTZX HK3 2245 0025*Y M01.2E\n"
" ANADOLUJET IS A TRADEMARK OF TURKISH AIRLINES\n"
" 3 TK2839 Y SA 12DEC TZXIST HK3 X 0645 0840 Y M02.1E\n"
" 4 TK1591 S SA 12DEC ISTFRA HK3 1140 1355 Y M02.2E\n"
"FONE-2ZS00-T REISEBURO SAHIN 06042/951616")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html