import re
regex = re.compile(r"""
^(https?:\/\/)? # optional scheme
((?:[-a-z0-9._~!$&\'()*+,;=]|%[0-9a-f]{2})+ # optional username@,
(?::(?:[-a-z0-9._~!$&\'()*+,;=]|%[0-9a-f]{2})+)?@)? # or username:password@
(?:((?:(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])) # IPv4 address
|((?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z][a-z0-9-]*[a-z0-9])) # or dot-separated domain labels
(:\d+)? # optional port number
((?:\/(?:[-a-z0-9._~!$&\'()*+,;=:@]|%[0-9a-f]{2})+)*\/?) # path (possibly empty, may end in /, no double-// allowed)
(\?(?:[-a-z0-9._~!$&\'()*+,;=:@\/?]|%[0-9a-f]{2})*)? # optional querystring
(\#(?:[-a-z0-9._~!$&\'()*+,;=:@\/?]|%[0-9a-f]{2})*)?$ # optional fragment
""", flags=re.IGNORECASE | re.VERBOSE)
test_str = "www.hwholdsworth.com:80/a/b/c?j=k#z"
match = regex.search(test_str)
if match:
print(f"Match was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html