import re
regex = re.compile(r"""
^(?P<timestamp>.{19})\ \[(?P<severity>\w{4,6})\]
\ \d+\#\d+:\ (\*\d+\ )?
(?P<err_msg>
(?:\"\S+\"\sis\snot\s)?
(?P<err_type>\w+)[\s(]
(?:requests,\ excess:\ (?P<err_rate>\d+\.\d+)
\ by\ zone\ \"(?P<err_zone>\S+)\"
|.+?)
)
(?:
,\ client:\ (?P<remote_addr>\d+\.\d+\.\d+\.\d+)
,\ server:\ (?P<server>[^,\a]+)
(?:,\ request:\ \"(?P<method>[A-Z]{3,4}?)
\ (?P<uri>\/\S*?)(?P<uri_query>\?\S+)?
\ (?P<protocol>\S+)\"
)?
(?:,\ upstream:\ \"(?P<upstream>.+?)\")?
(?:,\ host:\ \"(?P<req_host>\S+)\")?
(?:,\ referrer:\ \"(?P<referrer>\S+)\")?
|)
$
""", flags=re.VERBOSE | re.DOTALL)
test_str = ("2023/06/16 17:12:30 [error] 1814859#1814859: *228570402 limiting requests, excess: 50.120 by zone \"resizerzone\", client: 10.5.0.122, server: app.market.com, request: \"GET /resizer/view?key=8e158c8741ce849a09ef84d316bb68e7&b=productimages&f=webp&s=0 HTTP/1.1\", host: \"market.com\", referrer: \"https://market.com/\"\n")
match = regex.search(test_str)
if match:
print(f"Match was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html