# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] (((\"(GET|POST|PUT|PATCH|DELETE|OPTIONS|HEAD|TRACE|CONNECT|[.*\\x.*]) )(?P<url>.+)(http\/1\.(1|0)\"))|.*\\x.*) (?P<statuscode>(301|4(44|03|00))) (?P<bytessent>\d+) ([\"](?P<refferer>(\-)|(.+))[\"]) ([\"](?P<useragent>.+)[\"])"
test_str = ("51.159.23.43 - - [30/Apr/2021:15:26:20 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"-\"\n"
"89.248.170.22 - - [30/Apr/2021:15:26:31 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0\"\n"
"172.104.242.173 - - [30/Apr/2021:15:43:51 +0800] \"\\xBA\\xABd\\xA1EZC\\xDBM\\x87\\xEE^\\xFD\\xBF\\x159 X\\xD4>\\x12\\x98\\xC4<\\xE0\\x13\\xCF\\x00\\xAC\\xA09\\xD7\\x90#8~\\x8C\\xDE\\x9DReF\\xBF%1Q\\xE0\\x9D\\x06&g\\xBB\\x82\\x95\\x19\\xED\\x07\\x14\\x19ZP\\x80+\\x94e\\xC3\\xE6\\x85\\x06\\xA4\\x99\\x8B\\x19l\\x01\\xEA\\x88Y\\x91\\x16\\x95\\xC4\\xC8\\x0EH\\x02\\xC7\\x93g\\xC14FW\\x05|\\xFB\\xF3T\\xB8\\xFD\\xCB\\xBB)\\xE3\\xCE\\xDD\\xCD7\\x9E\\xEFP\\x8C\\xA4[V\\xFD\\x98\\xC9l\\x82\\xF5\\xE4\\xC1d\\x87X\\xF7\\x9B\\xBF\\xE8q\\x12\\x99&\\xDB,\\xF5\\x87\\xD7\\xA8\\x97j;\\xE3\\xEA\\xA7\\xB4\\xB0\\x02\\xAD\\x8DE\\x9B\\xAAB\\x80\\x0E)\\xA9\\xE9\\xAF}\\x18\\x8E\\xB8\\x1E\\x99\\x04\\xEF\\xA8\\x8C\\xE8\\x04\\xE2\\xD3\\xED)1\\x91\\xC1\\x8F\\x88\\x8C\\x81\\xF0\\xDB\\xA5\\x88\\x95H\\x9BZ\\xAB\\xCE\\xBF\\xF4E%P*\\x88KFY6\\x9E\\xE7::j\\xD4\\x8A\\xA8V\\x9A\\xAA\\xAB\\xAF\\xC3&.\\xED[\\x04\\xC5e\\x7F\\x08\\xBE\\x8Ar\\xA7\\xB0\\x99F\\xF7\\x11\\xE5\\xD6\\x96\\x8CIm+w\\x1C\\xFDuU\\x14\\x0F!x\\xAC\\xE8MPy\\xC3\\x19!2\\xA0\\xED\\xC0}!Rw\\x14\\x8E\\x1B\\xC4\\xE1\\xA0\\xAF+\\xADKk\\xC5\\xE0\\x5Cs\\x9C\\xBD\\xCB\" 400 150 \"-\" \"-\"\n"
"192.241.216.138 - - [30/Apr/2021:16:57:52 +0800] \"GET /portal/redlion HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 zgrab/0.x\"\n"
"193.118.53.194 - - [30/Apr/2021:17:00:02 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:35 +0800] \"GET /vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:35 +0800] \"POST /vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:38 +0800] \"GET /index.php?s=/Index/\\x5Cthink\\x5Capp/invokefunction&function=call_user_func_array&vars[0]=md5&vars[1][]=HelloThinkPHP21 HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:40 +0800] \"GET /?XDEBUG_SESSION_START=phpstorm HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:41 +0800] \"POST /mifs/.;/services/LogService HTTP/1.1\" 444 0 \"https://125.160.120.137:443\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:43 +0800] \"GET /wp-content/plugins/wp-file-manager/readme.txt HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:44 +0800] \"GET /console/ HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:45 +0800] \"POST /Autodiscover/Autodiscover.xml HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:47 +0800] \"GET /_ignition/execute-solution HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"45.155.205.84 - - [30/Apr/2021:17:09:47 +0800] \"POST /api/jsonws/invoke HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\"\n"
"89.248.170.22 - - [30/Apr/2021:17:23:51 +0800] \"HEAD / HTTP/1.0\" 444 0 \"-\" \"-\"\n"
"192.241.220.215 - - [30/Apr/2021:17:26:46 +0800] \"GET /actuator/health HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 zgrab/0.x\"\n"
"128.14.133.58 - - [30/Apr/2021:17:40:03 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\"\n"
"74.120.14.37 - - [30/Apr/2021:17:41:07 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"-\"\n"
"74.120.14.37 - - [30/Apr/2021:17:41:09 +0800] \"GET / HTTP/1.1\" 400 248 \"-\" \"-\"\n"
"74.120.14.37 - - [30/Apr/2021:17:41:09 +0800] \"GET / HTTP/1.1\" 400 248 \"-\" \"Mozilla/5.0 (compatible; CensysInspect/1.1; +https://about.censys.io/)\"\n"
"192.241.212.72 - - [30/Apr/2021:18:36:39 +0800] \"GET /hudson HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 zgrab/0.x\"\n"
"189.39.247.159 - - [30/Apr/2021:19:01:06 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"\n"
"80.82.77.192 - - [30/Apr/2021:19:18:14 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36\"\n"
"80.82.77.192 - - [30/Apr/2021:19:22:45 +0800] \"GET / HTTP/1.1\" 400 248 \"-\" \"Mozilla/5.0 zgrab/0.x\"\n"
"190.94.151.114 - - [30/Apr/2021:19:40:19 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"\n"
"92.118.161.21 - - [30/Apr/2021:19:47:03 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"NetSystemsResearch studies the availability of various services across the internet. Our website is netsystemsresearch.com\"\n"
"128.14.211.186 - - [30/Apr/2021:19:58:55 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\"\n"
"119.15.88.56 - - [30/Apr/2021:20:46:18 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"\n"
"34.77.163.42 - - [30/Apr/2021:20:47:53 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"python-requests/2.25.1\"\n"
"194.62.6.193 - - [30/Apr/2021:20:56:30 +0800] \"HEAD / HTTP/1.0\" 444 0 \"-\" \"-\"\n"
"209.141.33.74 - - [30/Apr/2021:20:57:03 +0800] \"POST /boaform/admin/formLogin HTTP/1.1\" 444 0 \"http://125.160.120.137:80/admin/login.asp\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0\"\n"
"216.21.170.197 - - [30/Apr/2021:21:09:26 +0800] \"GET /admin/login.asp HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"213.163.23.68 - - [30/Apr/2021:21:09:26 +0800] \"GET /:80/admin/login.asp HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"63.117.14.69 - - [30/Apr/2021:21:09:27 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"195.111.111.68 - - [30/Apr/2021:21:09:28 +0800] \"GET /admin/login.asp HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36\"\n"
"98.124.44.196 - - [30/Apr/2021:21:19:28 +0800] \"GET /:443 HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"128.14.133.58 - - [30/Apr/2021:21:25:18 +0800] \"GET / HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36\"\n"
"64.246.133.4 - - [30/Apr/2021:21:27:28 +0800] \"GET /:443 HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"24.220.112.166 - - [30/Apr/2021:21:43:28 +0800] \"GET /admin/login.asp HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n"
"64.53.1.196 - - [30/Apr/2021:21:47:29 +0800] \"GET /admin/login.asp HTTP/1.1\" 444 0 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36\"\n")
matches = re.finditer(regex, test_str, re.IGNORECASE | re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html