# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^([^\s]+) ([^\s]+) ([^\s]+) \[([\w:/]+\s[+\-]\d{4})\] \"(\S+) /.*() ([^\\\"]+)\" (\d{3}) (\S+)"
test_str = ("ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:43:57 -0400] \"GET /images/ksclogo-medium.gif HTTP/1.0 \" 200 5866\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:07 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0 \" 200 786\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:11 -0400] \"GET /images/MOSAIC-logosmall.gif HTTP/1.0 \" 200 363\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:13 -0400] \"GET /images/USA-logosmall.gif HTTP/1.0 \" 200 234\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:43:39 -0400] \"GET / HTTP/1.0 \" 200 7131\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:15 -0400] \"GET /images/WORLD-logosmall.gif HTTP/1.0 \" 200 669\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:31 -0400] \"GET /shuttle/countdown/ HTTP/1.0 \" 200 4673\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:44:41 -0400] \"GET /shuttle/missions/sts-69/count69.gif HTTP/1.0 \" 200 46053\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:45:34 -0400] \"GET /images/KSC-logosmall.gif HTTP/1.0 \" 200 1204\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:45:46 -0400] \"GET /cgi-bin/imagemap/countdown69?293,287 HTTP/1.0 \" 302 85\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:45:48 -0400] \"GET /htbin/cdt_main.pl HTTP/1.0 \" 200 3714\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:45:52 -0400] \"GET /shuttle/countdown/images/countclock.gif HTTP/1.0 \" 200 13994\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:22 -0400] \"GET / HTTP/1.0 \" 200 7131\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:29 -0400] \"GET /images/ksclogo-medium.gif HTTP/1.0 \" 200 5866\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:35 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0 \" 200 786\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:37 -0400] \"GET /images/MOSAIC-logosmall.gif HTTP/1.0 \" 200 363\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:38 -0400] \"GET /images/USA-logosmall.gif HTTP/1.0 \" 200 234\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:46:40 -0400] \"GET /images/WORLD-logosmall.gif HTTP/1.0 \" 200 669\n"
"ix-li1-14.ix.netcom.com - - [08/Aug/1995:14:47:41 -0400] \"GET /shuttle/missions/sts-70/mission-sts-70.html HTTP/1.0 \" 200 20304\n"
"ix-sac6-20.ix.netcom.com - - [08/Aug/1995:14:47:48 -0400] \"GET /shuttle/countdown/count.html HTTP/1.0 \" 200 73231")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html