import re
regex = re.compile(r"\b(^(http|https|wss|ws|ftp|ftps):\/\/127[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|^(http|https|wss|ws|ftp|ftps):\/\/(10)([.](25[0-5]|2[0-4][0-9]|1[0-9]{1,2}|[0-9]{1,2})){3}|^(http|https|wss|ws|ftp|ftps):\/\/localhost|^(http|https|wss|ws|ftp|ftps):\/\/172[.](0?16|0?17|0?18|0?19|0?20|0?21|0?22|0?23|0?24|0?25|0?26|0?27|0?28|0?29|0?30|0?31)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|^(http|https|wss|ws|ftp|ftps):\/\/192[.]168[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|^(http|https|wss|ws|ftp|ftps):\/\/169[.]254[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[.](?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|^(http|https|wss|ws|ftp|ftps):\/\/::1|^(http|https|wss|ws|ftp|ftps):\/\/[fF][cCdD][0-9a-fA-F]{2}(?:[:][0-9a-fA-F]{0,4}){0,7}|^(http|https|wss|ws|ftp|ftps):\/\/[fF][eE][89aAbB][0-9a-fA-F](?:[:][0-9a-fA-F]{0,4}){0,7})(?:\/([789]|1?[0-9]{2}))?\b", flags=re.IGNORECASE | re.MULTILINE)
test_str = ("# loopback\n"
"http://127.0.0.0:80\n"
"https://127.0.0.0000 # only three digits supported here\n\n"
"# Localhost\n"
"http://localhost:25\n"
"https://localhost:8080\n"
"HTTP://LoCALhosT:99 # Case insensitive\n"
"ws://localhost:7777\n"
"wss://localhost:9200\n"
"https://google.com/blah/localhost/ # This should not alert\n\n"
"# Web Sockets\n"
"ws://localhost:2121/sockjs-node\n"
"wss://localhost:5882/\n\n"
"# FTP\n"
"ftp://192.168.0.1\n"
"ftps://127.0.0.1\n"
"ftps://localhost\n\n"
"# A\n"
"wss://10.0.0.1/10\n"
"ws://10.255.22.33\n\n"
"# B\n"
"http://172.17.100.155/32 # 172.16.0.0 to 172.31.255.255\n"
"http://172.15.1.1 # This should not fire\n"
"http://172.32.1.1 # This should not fire\n\n"
"# C\n"
"https://192.168.1.1/random/path\n"
"wss://192.168.1.255\n\n"
"# IPv6\n"
"http://fc00:833e:d648:f196:5c6c:1d9a:a14b:0d59 # Private IPv6\n"
"http://fc01:67e5::6a66:34a7\n"
"http://FC02:5fcf:e093:bb10:ce77:b5c9::/112\n"
"http://fc03:49bd:b7c1:5685:fa0a:87e9::2/128\n"
"http://fc04:::2 hmpf\n"
"http://0de9:b022:883f:2c3c:7dba:a184:7576:4531 # public IP addresses\n"
"http://04e4:e53b:0c2f:2990:27ad:0464:6dd3:b6b3\n"
"http://0319:3b4a:546e:d480:4814:f885:3396:bba2\n"
"http://5394:03f5:606f:273e:e343:d94a:610a:3f2e\n"
"http://63cd:ad1f:7ffb:62c7:cc17:6e20:9b59:2f7d\n\n"
"False Positives Check # Should not match any\n"
"https://c091ab33-ae6f-425e-9b41-a50d59eef025.organizations.api.brightspace.com/888654/image?version=c44605a0-fc75-4c49-85b6-e43cb5f42978\n"
"/course-images/images/a02b640d-fd9e-428e-8e77-5ac9c61d21b2\n"
"https://10207287.fls.doubleclick.net/activityi;src=10207287;type=counter;cat=uship0;ord=4880235517192;gtm=2wg3h0;auiddc=2059049732.1617109349;u1=https%3A%2F%2Fwww.uship.com%2Fregister%2F;u2=undefined;u3=undefined;u4=undefined;~oref=https%3A%2F%2Fwww.uship.com%2Fregister%2F\n\n"
"# More \"crap\"\n"
"wss://30.168.1.255.1\n"
"wss://127.1\n"
"https://192.168.1.256\n"
"wss://-1.2.3.4\n"
"wss://1.1.1.1.\n"
"wss://3...3\n"
"wss://255.255.255.255\n"
"wss://0.0.0.0\n"
"wss://1.1.1.01\n\n"
"# decimal vs. octal mix\n"
"http://172.031.33.33\n"
"http://172.021.133.01\n\n"
"# octal - not supported here\n"
"http://0254.0021.0062.0041 # Should not fire\n\n"
"# link-local addresses IPv4\n"
"http://169.254.1.0\n"
"http://169.254.254.255\n\n"
"# link-local addresses IPv6\n"
"http://fe90:1234::/64\n"
"http://feaf:ffff::/128\n"
"http://febf:ffff:ffff:ffff:ffff:ffff:ffff:ffff\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html