# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
^
(?<Y>\d{4})-(?<M>\d{2})-(?<D>\d{2})\s(?<T>(?:\d\d\.){3}\d{3}) # Datum
# 1 mellanslag mellan datum och övrig info
(?:\s
(?:
(?<msg>\p{Lu}+\s\p{Lu}+\s\p{Lu}+\s\p{Lu}+)\s+$ # 4 versala ord, sedan slut
|
(?<id1>\S+\s?\S+\s?\S+) # 3,2,1 ord
\s{1,2}
(?<status1>\S+|\S+\s\S+)\s{1,2} # 1,2 ord
(?<letter1>\S)?\s*$ # en bokstav i slutet på en rad (med ev. whitespace)
)
|
# 4 mellanslag mellan datum och övrig info
\s{4}
(?<id4>\S+\s\S+(?:\s\S+\s\S+)?) # 2,4 ord
(?:\s{3}|\s) # 1,3 mellanslag
(?:
(?<status4>\S+\s?\S+(?:\s\S+)?) # 2,3 ord
| # ELLER
(?<statusArg>\S+\s{2}) # 1 ord och 2 mellanslag
(?<arg>\S+) # 1 ord
)
(?:\s{4}|\s{2}) # 2,4 mellanslag
(?:
(?<letter4>\S) # 1 bokstav
\s{2}? # 0,2 mellanslag
(?<letter42>\S)?)?
\s*$ # ibland finns det två bokstäver (42=4 spaces, 2 bokstav)
)
"""
test_str = ("Ny loggfil skapas 2020-06-07 23:50:02\n"
"2020-06-08 12.56.36.000 MANUELL ÖVERKOPPLING HAR SKETT \n"
"2020-06-08 12.56.37.000 Ethernet anslutning A AC FEL \n"
"2020-06-08 12.56.38.000 FIL-SYNKRONISERING STARTAD \n"
"2020-06-08 12.56.39.000 Ethernet anslutning B AC FEL \n"
"1978-01-01 00.00.00.000 521GW A I DRIFT T \n"
"2020-06-08 12.56.53.000 521GW A MV/TTD-LÄNK ANSL. \n"
"1978-01-01 00.00.00.000 521FE A.2 I DRIFT T \n"
"2020-06-08 12.56.53.000 GW A FELFRI \n"
"1978-01-01 00.00.00.000 521PKT 13A/B I DRIFT T \n"
"1978-01-01 00.00.00.000 521PKT 14A/B I DRIFT T \n"
"2020-06-08 12.56.53.000 PKT NR. 13A/B FELFRI \n"
"2020-06-08 12.56.53.000 PKT NR. 14A/B FELFRI \n"
"1978-01-01 00.00.00.000 521FE A.1 I DRIFT T \n"
"2020-06-08 12.56.54.000 PKT NR. 14A/B HREP S \n"
"2020-06-08 12.56.54.000 521FE A.1 MV/TTD-LÄNK ANSL. \n"
"2020-06-08 12.56.54.000 FRONT END A:1 FELFRI \n"
"1980-01-01 00.00.00.000 521PKT 2 I DRIFT T \n"
"2020-06-08 12.56.54.000 PKT NR. 2 FELFRI \n"
"1980-01-01 00.00.00.000 521PKT 4 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 6 I DRIFT T \n"
"2020-06-08 12.56.54.000 521FE A.2 MV/TTD-LÄNK ANSL. \n"
"2020-06-08 12.56.54.000 PKT NR. 4 FELFRI \n"
"2020-06-08 12.56.54.000 PKT NR. 6 FELFRI \n"
"2020-06-08 12.56.54.000 FRONT END A:2 FELFRI \n"
"1980-01-01 00.00.00.000 521PKT 12 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 8 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 10 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 5 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 3 I DRIFT T \n"
"1978-01-01 00.00.00.000 521PKT 14A/B I DRIFT T \n"
"2020-06-08 12.56.55.000 PKT NR. 12 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 8 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 10 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 5 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 3 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 1 HREP S \n"
"2020-06-08 12.56.55.000 PKT NR. 14A/B HREP K \n"
"2020-06-08 12.56.55.000 PKT NR. 7 HREP S \n"
"1980-01-01 00.00.00.000 521PKT 11 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 9 I DRIFT T \n"
"2020-06-08 12.56.55.000 PKT NR. 11 FELFRI \n"
"2020-06-08 12.56.55.000 PKT NR. 9 FELFRI \n"
"2020-06-08 12.56.56.000 PKT NR. 1 HREP K \n"
"1980-01-01 00.00.00.000 521PKT 7 I DRIFT T \n"
"1980-01-01 00.00.00.000 521PKT 1 I DRIFT T \n"
"2020-06-08 12.56.56.000 PKT NR. 7 HREP K \n"
"2020-06-08 12.56.56.000 PKT NR. 7 FELFRI \n"
"2020-06-08 12.56.56.000 PKT NR. 1 FELFRI \n"
"2020-06-08 12.57.06.000 521FE B.1 I DRIFT R \n"
"2020-06-08 12.57.06.000 FRONT END B:1 FELFRI R \n"
"2020-06-08 12.57.07.000 521FE B.2 I DRIFT R \n"
"2020-06-08 12.57.07.000 FRONT END B:2 FELFRI R \n"
"2020-06-08 12.57.07.000 521GW B I DRIFT R \n"
"2020-06-08 12.57.07.000 GW B FELFRI R \n"
"2020-06-08 12.57.43.000 FIL-SYNKRONISERING AVSLUTAD \n"
"2020-06-08 12.57.51.000 Länk till BIS FEL \n"
"2020-06-08 12.58.07.000 Länk till BIS FELFRI \n"
"2020-06-08 12.58.52.000 Länk till BIS FEL R \n"
"2020-06-08 12.59.07.000 Länk till BIS FELFRI ")
matches = re.finditer(regex, test_str, re.MULTILINE | re.VERBOSE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html