import re
regex = re.compile(r"(?<scheme>[^!$&'()*,;=@//:\s]+):(?<authority>//(?<userinfo>(?<username>[\w\.\-!$&'()*,;=]+):?(?<password>[\w\.\-!$&'()*,;=]+)?@)?(?<host>[^\s\n\t\r/:\[\]]+|\[[^\s\n\t\r/:\[\]]+\]):?(?<port>\d{1,5})?)?[/]?(?<path>[^\s\?#]{1,}(?:\?(?<query>[^\s#]+))?(?:#(?<fragment>[^\s]+))?)?")
test_str = ("example from wikipedia URI:\n"
" userinfo host port\n"
" ┌─┴─────┐\n"
" user pass\n"
" ┌──┴───────┴───┐┌──────┴──────┐ ┌┴─┐\n"
" https://john.doe:example@www.example.com:1234/forum/questions/?tag=networking&order=newest#top\n"
" └─┬─┘ └─────────────┬─────────────┘└───────┬───────┘ └────────────┬────────────┘ └┬┘\n"
" scheme authority path query fragment\n"
" userinfo host port\n"
" ┌──┴───┐ ┌──────┴──────┐ ┌┴─┐\n"
" https://john.doe@www.example.com:1234/forum/questions/?tag=networking&order=newest#:~:text=whatever\n"
" └─┬─┘ └─────────────┬─────────────┘└───────┬───────┘ └────────────┬────────────┘ └───────┬───────┘\n"
" scheme authority path query fragment\n\n"
" ldap://[2001:db8::7]/c=GB?objectClass?one\n"
" └┬─┘ └─────┬─────┘└─┬─┘ └──────┬──────┘\n"
" scheme authority path query\n\n"
" mailto:John.Doe@example.com\n"
" └─┬──┘ └────┬─────────────┘\n"
" scheme path\n\n"
" news:comp.infosystems.www.servers.unix\n"
" └┬─┘ └─────────────┬─────────────────┘\n"
" scheme path\n\n"
" tel:+1-816-555-1212\n"
" └┬┘ └──────┬──────┘\n"
" scheme path\n\n"
" telnet://192.0.2.16:80/\n"
" └─┬──┘ └─────┬─────┘│\n"
" scheme authority path\n\n"
" urn:oasis:names:specification:docbook:dtd:xml:4.1.2\n"
" └┬┘ └──────────────────────┬──────────────────────┘\n"
" scheme path")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html