import re
regex = re.compile(r"""
^
(?:
(?:\+|00|011)[\.\/\-\ \t]*
(?# group 1: country code after international dialing code)
([17]|2(?:[07]|[1-689]\d)|3(?:[0-4679]|[578]\d)|4(?:[013-9]|2\d)|5(?:[1-8]|[09]\d)|6(?:[0-6]|[789]\d)|8(?:[1246]|[035789]\d)|9(?:[0-58]|[679]\d))
[\.\/\-\ \t]*
| (?# group 2: single-digit country code without international dialing code)
([17])
[\.\/\-\ \t]+(?# a separator is required for disambiguation)
)?
(?# group 3: area code between parentheses, optional)
(?:\((\d{1,4})\)[\.\/\-\ \t]*)?
(?# groups 4-7: leading groups of digits, may be empty)
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?# group 8: start of the last group of digits, may be empty)
(\d{0,10}?)
(?# group 9: up to 4 digits at end of the last group of digits)
(\d{1,4}+)
(?:
[\.\/\-\ \t]*e?xt?[\.\/\-\ \t]*+
(?# group 10: extension code, optional)
(\d{1,14})
)?
$
""", flags=re.MULTILINE | re.VERBOSE | re.IGNORECASE)
test_str = ("All valid numbers should have a non-empty group 9 for the last digits (before extension):\n\n"
"+1(234)567 8901\n"
"+1 234 567 8901\n"
"+1-234-567-8901\n"
"+1-234-567-8901\n"
"+1.234.567.8901\n"
"+1/234/567/8901\n"
"+7-123-456-7890\n"
"+7(123)4567890\n"
"+27-31-707-1700\n"
"+27-84-820-0365\n"
"+261-23-456-7890\n"
"+212 (34) 567-8901\n"
"+34123456789\n"
"+34 1 23 45 67 89\n"
"+34 123 456 789\n"
"+39 0577286143\n"
"+44(012)123456789\n"
"+49 (1234) 567890\n"
"+49 211 828934-0\n"
"+49 69 96876-150\n\n"
"1-234-555-8901\n"
"7-123-456-7890\n\n"
"0012315557890\n"
"001 230 123 456789\n"
"001-555012345\n"
"00210123456789\n"
"00271-12-345-6789\n"
"003312345678\n"
"009112345678\n"
"008001234567890\n\n"
"With an extension:\n\n"
"+12345678901x1234\n"
"+1-234-567-8901 x1234\n"
"+7 123-4567890-x321\n"
"+7 123 4 5 6-7890x1234\n"
"+7-123-456-7890 ext 1234\n"
"+33(1)23.45.67.89 x 1234\n"
"+9123-456-7890x12345\n"
"+91-92130-25552\n\n"
"01172312345678x901\n"
"011 7 231 234 5678 ext. 901\n\n"
"0091234567890x1234\n"
"0011234567890ext.1234\n"
"00223-(4321)-567.89 ext-4321\n"
"007-(123)-456-7890 ext 4321\n\n"
"Local numbers only (or missing/unknown country code):\n\n"
"(800)5678901\n"
"(800) 567 8901\n"
"(234) 567 8901\n"
"(234) 567 89 01\n"
"(1)23 45 67 89\n"
"(01)23 45 67 89\n"
"(0)1 23 45 67 89\n"
"(0)1 23 45 6789\n"
"(0)800 800 800\n\n"
"800-567-8901\n"
"234-567-8901\n"
"234 567 8901\n\n"
"01 23 45 67 89\n"
"01 23 45 6789\n"
"0 800 800 800\n"
"00-0-0000\n\n"
"123456789\n"
"123456789012\n"
"12345678901\n"
"2345678901\n"
"12345678\n"
"1234567890123\n\n"
"Local short numbers (up to 4 digits):\n\n"
"12 34\n"
"1234\n"
"112\n"
"911\n"
"15\n\n"
"Ambiguous or invalid format:\n\n"
"00-0--0000\n"
"(01 55) 1234 5678\n"
"(01 551) 234 5678\n"
"+012345678\n"
" +340123456789\n"
"++34123456789\n"
" (0)123456789\n"
"12(34567890\n"
"123)456789012345\n"
")234( 567 8901\n"
"ext1234\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html