import re
regex = re.compile(r"(?<ip>\d+\.\d+\.\d+\.\d+)[\s-]+(?<date>\[.*\])[\s\"]+(?<method>\w+)\s+(?<url>[^\s]+)\s+(?<protocol>[^\s]+)[\s\"]+(?<status>\d+)\s+(?<length>\d+)([ -]+(?<useragent>.*))?", flags=re.MULTILINE)
test_str = ("207.46.13.93 - - [31/Mar/2012:19:43:19 +0530] GET /robots.txt HTTP/1.1 404 613 - Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)\n"
"202.164.138.60 - - [31/Mar/2012:20:20:10 +0530] GET /kuhsonline/ HTTP/1.1 302 80 http://www.google.co.in/url?sa=t&rct=j&q=www.kuhas.ac.in&source=web&cd=2&sqi=2&ved=0CCwQFjAB&url=http%3A%2F%2Fwww.kuhas.ac.in%2Fkuhsonline%2F&ei=mRl3T-XtMofVrQfJiYWqDQ&usg=AFQjCNF4mlbbtGFCu495PHK2BTGwO4Ol0w Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html