# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
^
(?:
(?:\+|00|011)[\.\/\-\ \t]*
(?# group 1: country code after international dialing code)
([17]|2(?:[07]|[1-689]\d)|3(?:[0-4679]|[578]\d)|4(?:[013-9]|2\d)|5(?:[1-8]|[09]\d)|6(?:[0-6]|[789]\d)|8(?:[1246]|[035789]\d)|9(?:[0-58]|[679]\d))
[\.\/\-\ \t]*
| (?# group 2: single-digit country code without international dialing code)
([17])
[\.\/\-\ \t]+(?# a separator is required for disambiguation)
)?
(?# group 3: area code between parentheses, optional)
(?:\((\d{1,4})\)[\.\/\-\ \t]*)?
(?# groups 4-7: leading groups of digits, may be empty)
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?:(\d{1,6})[\.\/\-\ ])?
(?# group 8: start of the last group of digits, may be empty)
(\d{0,10}?)
(?# group 9: up to 4 digits at end of the last group of digits)
(\d{1,4}+)
(?:
[\.\/\-;\ \t]*e?xt?[\.\/\-=\ \t]*+
(?# group 10: extension code, optional)
(\d{1,14})
)?
$
"""
test_str = ("All valid numbers should have a non-empty group 9 for the last digits (before extension):\n\n"
"+1(234)567 8901\n"
"+1 234 567 8901\n"
"+1-234-567-8901\n"
"+1-234-567-8901\n"
"+1.234.567.8901\n"
"+1/234/567/8901\n"
"+7-123-456-7890\n"
"+7(123)4567890\n"
"+27-31-707-1700\n"
"+27-84-820-0365\n"
"+261-23-456-7890\n"
"+212 (34) 567-8901\n"
"+34123456789\n"
"+34 1 23 45 67 89\n"
"+34 123 456 789\n"
"+39 0577286143\n"
"+44(012)123456789\n"
"+49 (1234) 567890\n"
"+49 211 828934-0\n"
"+49 69 96876-150\n\n"
"1-234-555-8901\n"
"7-123-456-7890\n\n"
"0012315557890\n"
"001 230 123 456789\n"
"001-555012345\n"
"00210123456789\n"
"00271-12-345-6789\n"
"003312345678\n"
"009112345678\n"
"008001234567890\n\n"
"With an extension:\n\n"
"+12345678901x1234\n"
"+1-234-567-8901 x1234\n"
"+7 123-4567890-x321\n"
"+7 123 4 5 6-7890x1234\n"
"+7-123-456-7890 ext 1234\n"
"+33(1)23.45.67.89 x 1234\n"
"+9123-456-7890x12345\n"
"+9123-456-7890;ext=12345\n"
"+91-92130-25552\n\n"
"01172312345678x901\n"
"011 7 231 234 5678 ext. 901\n"
"011 7 231 234 5678;ext=901\n\n"
"0091234567890x1234\n"
"0011234567890ext.1234\n"
"00223-(4321)-567.89 ext-4321\n"
"007-(123)-456-7890 ext 4321\n"
"007-(123)-456-7890;ext=4321\n\n"
"Local numbers only (or missing/unknown country code):\n\n"
"(800)5678901\n"
"(800) 567 8901\n"
"(234) 567 8901\n"
"(234) 567 89 01\n"
"(1)23 45 67 89\n"
"(01)23 45 67 89\n"
"(0)1 23 45 67 89\n"
"(0)1 23 45 6789\n"
"(0)800 800 800\n"
"(0)800 800 800;ext=12\n\n"
"800-567-8901\n"
"234-567-8901\n"
"234 567 8901\n\n"
"01 23 45 67 89\n"
"01 23 45 6789\n"
"0 800 800 800\n"
"00-0-0000\n\n"
"123456789\n"
"123456789012\n"
"12345678901\n"
"2345678901\n"
"12345678\n"
"1234567890123\n\n"
"Local short numbers (up to 4 digits):\n\n"
"12 34\n"
"1234\n"
"112\n"
"911\n"
"15\n\n"
"Ambiguous or invalid format:\n\n"
"00-0--0000\n"
"(01 55) 1234 5678\n"
"(01 551) 234 5678\n"
"+012345678\n"
" +340123456789\n"
"++34123456789\n"
" (0)123456789\n"
"12(34567890\n"
"123)456789012345\n"
")234( 567 8901\n"
"ext1234\n")
matches = re.finditer(regex, test_str, re.MULTILINE | re.VERBOSE | re.IGNORECASE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html