# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(?:[A-Z] \d|[^\W\d_]{2,}\.?)(?:[- '’][^\W\d_]+\.?)*$"
test_str = ("########### Absolutly Fine ################\n"
"Mainstreet. 12\n"
"mainstreet 1\n"
"Mainstreet 12a\n"
"Main Street 12A\n"
"Big New mainstreet 12a\n"
"Mainstreet-New 12b\n"
"Mains Str. 12z\n"
"St. Alexander Street 121 b\n"
"Übermorgen Straße 56/58\n"
"John Kennedy Street 56/58a\n"
"Bahnhofstr. 56-58\n"
"Leonhard-Eck-Str. 56 - 58\n"
"Älterweg 56-58a\n"
"Graf-Anton-Weg 1234\n"
"Alexanderstraße 56/58a\n"
"Prof.-Ernst-Nathan-Straße 18a - 19b\n"
"Prof.-Albert-Einstein-Weg 18a-19b\n"
"Prof. Ernst-Nathan-Straße 12\n"
"Prof. Albert-Einstein-Weg 15a\n\n\n"
"######## Not okay ################\n"
"Mainstreet\n"
"A\n"
"B\n"
"Z\n"
"A 1\n"
"B 2\n"
"Z 99\n"
"Mainstreet #+;:_*´`?=)(/&%$§!\n"
"Mainstreet#+;:_*´`?=)(/&%$§!\n"
"Mainstreet 2\n"
"Mainstreet..\n"
"Mainstreet§\n"
"Mainstreet 12345\n"
"Mainstreet 25-ab\n"
"Mainstreet 12ü\n"
"Mainstreet 12_\n"
"Mainstreet 12!\"§$$%&/()\n"
"Mainstreet a2\n"
"Mainstreet 13àâäèéêë\n"
"Mainstreet 0\n"
"Mainstreet 123aaa123,\n"
"Mainstreet 123 aaa 123\n"
"Mainstreet 1a 1\n"
"Mainstreet 1a 1'\n"
"Mainstreet 1a1\n"
"Mainstreet 00 a\n"
"Mainstreet 0a\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html