# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?=(.*?\d){9,})(((((((00|\+)[0-9]{1,3}[ \-/]?)|0)[ ]?[1-9][0-9]{1,4})[ ]?[\-/]?[ ]?)|((((00|\+)[0-9]{1,3}[ ]?\()|\(0)(\)|[ ]?[1-9][0-9]{1,4}\))[ ]?[\-/]?[ ]?))[0-9]{1,7}([ \-/]?[0-9]{1,5}){0,4})"
test_str = ("+33 (33) 3)44444444444 //the found string has only 5 digits, but it shouldn't be found because of (?=(.*?\\d){9,})\n"
"+49123123123 //how can I exclude that, because there is no white space in the middle\n\n\n\n\n\n\n"
"// this is the part where I test all the other phone numbers, if you are interested in:\n\n"
"//it should match these phone numbers:\n"
"testword +49 30 12345-67 testword \n"
"testword+49 (0)30 12345-67\n"
"(0)30 12345-67\n"
"(0)30 123 234\n"
"(0123)30 12345-67\n"
"test (021)30 123 234\n"
"s030 12345-67 dsd\n"
"(030) 12345 55 99testword \n"
"testword (030) 12345 44\n"
"0351 4640-123\n"
"09623 12 3 33\n"
"09234 1233\n"
"+49 123 1 2 12 31\n"
"0049 2123 1231\n"
"+1 3519 1231\n"
"0 30 / 12 34 56\n"
"0 30 / 12 34 56\n"
"030 / 12 34 56\n"
"0123 / 12312 123\n"
"testword 0178 1232231\n"
"+490 178 1232231\n"
"testword +36 (351)4740-991 testword\n"
"testword +36(351) 4740-991 testword\n"
"09623 12333 testword\n\n"
"should NOT match (with the reason why it shouldn't match):\n"
"+49123123123 //because there is no white space\n"
"01781232231 //because there is no white space\n"
"123456 //because it doesn't have at least 9 digits and no white space\n"
"123.123 //because it has a dot\n"
"12-12-12 //because there is no white space and there is more than one dash\n"
"12-12 -12-12-12 //because there is more than one dash\n"
"1990 - 2000 //because it doesn't have at least 9 digits\n"
"1990-2000 //because it doesn't have at least 9 digits and no white space\n"
"1990-91 //because it doesn't have at least 9 digits and no white space\n"
"123 //because it doesn't have at least 9 digits and no white space\n"
"+36 (351) 47(40-991 //because it has more than one left bracket\n"
"+36 (33) 3)4444\n"
")40-991 //because it has more than one right bracket\n"
"+23+234 +2346 // because it has more than one plus sign\n"
"234 234 234 234 234 // because it has more than one white space in a row\n"
"123 123123 // because it has more than one white space in a row\n"
"01712123123\n"
"01234")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html