import re
regex = re.compile(r"(?P<ip>.*?) (?P<remote_log_name>.*?) (?P<userid>.*?) \[(?P<date>.*?)(?= ) (?P<timezone>.*?)\] \"(?P<request_method>.*?) (?P<path>.*?)(?P<request_version> HTTP/.*)?\" (?P<status>.*?) (?P<length>.*?) \"(?P<referrer>.*?)\" \"(?P<user_agent>.*?)\" (?P<session_id>.*?) (?P<generation_time_micro>.*?) (?P<virtual_host>.*)")
test_str = ("1.1.1.2 - - [11/Nov/2016:03:04:55 +0100] \"GET /\" 200 83 \"-\" \"-\" - 9221 1.1.1.1\n"
"127.0.0.1 - - [11/Nov/2016:14:24:21 +0100] \"GET /uno dos\" 404 298 \"-\" \"-\" - 400233 1.1.1.1\n"
"127.0.0.1 - - [11/Nov/2016:14:23:37 +0100] \"GET /uno dos HTTP/1.0\" 404 298 \"-\" \"-\" - 385111 1.1.1.1\n"
"1.1.1.1 - - [11/Nov/2016:00:00:11 +0100] \"GET /icc HTTP/1.1\" 302 - \"-\" \"XXX XXX XXX\" - 6160 11.1.1.1\n"
"1.1.1.1 - - [11/Nov/2016:00:00:11 +0100] \"GET /icc/ HTTP/1.1\" 302 - \"-\" \"XXX XXX XXX\" - 2981 1.1.1.1\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html