import re
regex = re.compile(r"([-\.\w]+:\/{2,3})(?!.*[.]{2})(?![-.*\.])((?!.*@\.)[-_\w@^=%&:;~+\.]+(?<![-\.]))(\/[-_\w@^=%&$:;/~+\.]+(?<!\.))?[?]?([-_\w=&@$!|~+]+)*[#]?([-_\w=&@$!|~+]+)*", flags=re.IGNORECASE)
test_str = ("DB_URL=postgresql://postgres:postgres@localhost/postgres?timeout=0\n"
"# Damn, Daniel!\n\n"
"Bob: have you checked https://www.facebook.com?\n"
"lorem https://github.com/justsml?tab=activity#top ipsum\n"
"smb:///winbox/dfs/ - ipp://printer\n"
"leading text chrome-extension://flags??#\n"
"s3://buckets/for/all?true=true\n"
"s3:///////buckets-o-fun\n"
"pkcs11://because-pkcs7-is-weak#only=hash\n"
"\\||@@https://www.google.com <- should match when extracting\n"
"https://we - this is valid\n"
"s3://buckets-o-fun/hi/raw=val&#keep=hashin'?\n"
"http://aaa.com.co.com:8080/test?dan=dev#den?\n"
"hi. pkcs11://because-pkcs7-is-weak\n"
"hi. a://b/c?d=e#f=g && a://b/c?d]\n"
"hi. chrome-extension://flags hi. \n"
"hi. ms-help://good-luck/index.html hi. \n"
"hi. .iris.beep://really/dots. hi. \n"
"hi. iris.beep://really/dots?test=qs hi. \n"
"---\n"
"a://b]/c - Should match `a://b` - it's smallest match.\n"
"http://www.c:ool.com.au - this one's tricky, should match auth URIs: proto://user:pass@host. Including cases where the password could be omitted (`proto://user@host`.)\n"
"boop://really-/ -- technically domains shouldn't end in dashes or other 'special' chars, but this is technically a valid URI.\n"
"---\n"
"a://b./c?d. -- bad dns, trailing dot: tricky w/o look-arounds. (Adding an OR pattern would muddle the returned data indexes.) -- should match `a://b`\n\n\n"
"Invalid Examples:\n"
"https://www...google...com\n"
"https://we@.com\n"
"https://asdas-.com\n"
"http://-apple-.com\n"
"a://(b/c\n"
"chrome-extension)://flags\n"
"ms-help://|good-luck.html\n"
". , ; : ) ] } \n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html