import re
regex = re.compile(r"""
(?:
(?<!\S)@ # Match "@" only if it's not preceded by a non-whitespace character
| # OR
(?:
# Telegram link types reference: https://core.telegram.org/api/links
#
(?:https?://)? # Optional HTTP or HTTPS protocol
(?:www\.)? # www subdomain
(?:t\.me|telegram\.(?:me|dog)) # Match "t.me", "telegram.me", or "telegram.dog" domains
/ # Ensure a forward slash after the domain
| # OR
tg://resolve\?domain= # Match Telegram deep link schema (tg://resolve?domain=)
)
| # OR
(?=\b # Positive lookahead: Ensure a valid subdomain before username
(?!\w*__) # Disallow double underscores anywhere in the username
(?!\w*_{2,}) # Disallow underscores at the start or end of username
(?:[a-z][a-z0-9_]{3,31}) # Username
(?<!_) # Disallow usernames ending with an underscore
\.t\.me$ # Ensure it ends with ".t.me"
)
)
(?P<username> # Start capturing the username
(?!\w*_{2,}) # Disallow double underscores in the username
(?!\w*[^0-9a-z_.,\s]) # Ensure valid characters (letters, numbers, underscores, dots, commas, spaces)
(?:[a-z][a-z0-9_]{3,31}) # Username
(?<!_) # Disallow username ending with an underscore
\b # Ensure it's a valid word boundary
)
(?:\.t\.me)? # Optional ".t.me" subdomain
""", flags=re.MULTILINE | re.IGNORECASE | re.VERBOSE)
test_str = ("@username\n"
"@user_name\n"
"@username123\n"
"@alanbadoev @orkester okinea.t.me\n"
"@username123_, @username123\n"
"@username123, @username123_\n"
"@username123, username123.t.me\n"
"@name_with_underscore\n"
"@user$name\n"
"@user__name\n"
"@username_\n"
"@user name\n"
"@toolongusernameeeeeeeeeeeeeeeeeee\n"
"@123username\n"
"https://t.me/username\n"
"https://www.t.me/username\n"
"http://t.me/username\n"
"t.me/username\n"
"tg://resolve?domain=username\n"
"username.t.me\n"
" username.t.me\n"
"https://t.me/@username\n"
"http://t.me/user__name\n"
"username_.t.me\n"
"user__name.t.me\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html