import re
regex = re.compile(r"""
# Find the company at the beginning of the line, always:
^ COMPANY:(?P<company>[^\s|]+(?:[ ][^\s|]+)*)
# After COMPANY, any of PERSON or PRODUCT may be matched:
|\G [^\S\r\n]*[|][^\S\r\n]*
PERSON:(?P<person>[^\s|]+(?:[ ][^\s|]+)*)
|\G [^\S\r\n]*[|][^\S\r\n]*
PRODUCT:(?P<product>[^\s|]+(?:[ ][^\s|]+)*)
""", flags=re.MULTILINE | re.VERBOSE)
test_str = ("COMPANY:Pizza\n"
"COMPANY:Pizza \n"
"COMPANY:Pizza | PERSON:Sid Sanders | PERSON:Louise Lane | PRODUCT:Bananas \n"
"COMPANY:Pizza |PERSON:Sid Sanders | PERSON:Louise Lane| PRODUCT:Bananas\n"
"COMPANY:Pizza| PERSON:Sid Sanders | PERSON:Louise Lane|PRODUCT:Bananas\n"
"COMPANY:Pizza|PERSON:Sid Sanders | PERSON:Louise Lane |PRODUCT:Bananas \n\n"
"COMPANY:Food Heaven\n"
"COMPANY:Food 123 Heaven \n"
"COMPANY:Food Heaven | PRODUCT:Bananas | PERSON:Sid Sanders | PERSON:Louise Lane \n"
"COMPANY:Food Heaven123| PRODUCT:Bananas |PERSON:Sid Sanders | PERSON:Louise Lane\n"
"COMPANY:Food Heaven| PERSON:Sid Sanders | PERSON:Louise Lane | PRODUCT:Bananas \n"
"COMPANY:Food Heaven|PERSON:Sid Sanders| PRODUCT:Bananas | PERSON:Louise Lane\n\n"
"COMPANY:Something Somewhere Anywhere - There\n"
"COMPANY:Something 1 Soöômewhere Anywhere There \n"
"COMPANY:Something 2 Somewhere - Anywhere There | PERSON:Sid Sanders | PRODUCT:Bananas| PERSON:Louise Lane\n"
"COMPANY:Something - Somewhere Anywhere There |PERSON:Sid Sanders| PRODUCT:Bananas | PERSON:Louise Lane\n"
"COMPANY:- Something 3 Somewhere Anywhere There| PERSON:Sid Sanders |PRODUCT:Bananas | PERSON:Louise Lane\n"
"COMPANY:Something Somewhere 4 Anywhere There|PERSON:Sid Sanders | PRODUCT:Bananas | PERSON:Louise Lane\n\n"
"# Should not match:\n"
"|PERSON:Fred Rogers\n"
" | PRODUCT:Mr. Roger's Neighborhood\n\n"
"# Should start matching again:\n"
"COMPANY:Fred Rogers Productions |PERSON:Fred Rogers |PRODUCT:Mr. Roger's Neighborhood\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html