# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\b((?<street>(\p{L}[ ]?[-.]*)+ [0-9]{1,3}([ ]?[A-Za-z])?))\b|\b((?<zipcode>([A-Z-]+)?[0-9]+)[, ](?<city>(\p{L}[- ]?)+))\b"
test_str = ("Michael Hohmann P.v.-Mallinckrodt-Weg 13a\n"
"Experte für Marketing Automatisierung 33154 Salzkotten\n\n"
" Tel: (0 52 58) 9 90 90-0\n\n\n"
"Dianastraße 1 030 - 40 39 33 33\n"
"13469 Berlin mhugo@stahlsdfsdf.berlin\n\n"
"Dipl.Ing. Arch. Innenraum Großgörschenstrasse 5\n\n"
"10827 Berlin Schöneberg\n\n"
"Geschäftsführer Gubeslstrasse 24\n\n"
"M +41 79 999 99 99 CH-6300 Zug\n\n"
"Friedrichstraße 94\n"
"Telefon +49 30 33333-33333 10117 Berlin\n\n"
"Mobil +49 333 3333333 Lichtenberger Straße 17 a\n\n"
"wilfreid.kroll@sdfsdf.de 10243 Berlin\n\n"
"Unit 13, Le Moulin, Rue de Bali\n"
"Pereybére 99999, Mauritius\n\n"
"Wolfgang Leobner Unterer Grund 10\n"
"Ressavarstraße 45, A-9384 Hartberg\n\n"
"General Manager Horbeller Straße 33\n\n"
"Poppelsdorfer Allee 89\n"
"93828 Bonn\n\n"
"Carl-Benz-Straße 13\n"
"D-82937 Schweinfurt\n\n"
"Luisenstraße 33 | 93720 Bad Dürrheim\n\n"
"Schwändi 7 * 8486 Rikon\n\n"
"Bogener Straße 8 94827 Neukirchen\n\n"
"93939 Neuötting\n\n"
"83927 Mannheim-Schönau\n\n"
"Wörrst Straße 3\n\n"
"Türrschmidtstr. 2a\n\n"
"Lambertusweg 2\n\n"
"Lerchenweg 4 \n\n"
"D-29304 Eching am Ammersee\n\n"
"Schönbuchstr. 34\n\n"
"Neuenhofer Str. 7\n\n"
"Meinekeestr. 26, 29304 Berlin\n\n"
"Carmenstraße 17 - 18\n\n"
"13432 Werder (Havel)\n\n"
"Lenelshof 1 99930 Ratingen\n\n"
"Rothenbasdfchaussee 80c\n\n"
"Poller Kirchweg 990\n"
"Poller Kirchweg 9903\n\n"
"Weilstetter Weg 34B\n\n"
"Gartenstraße 88U Fax: 93 (0) 33333 - 33333\n\n"
"09392 Crottendorf Email: info@sdfsdf.de\n\n"
"hauptstr.51\n\n"
"Pantelsdfs Str. 33, A-3929 St. Pantaleon, Austria\n\n"
"D - 88889 Dreieich\n\n"
"Bahnhofstr. 77 93000 Kornewestheim Tel. 929293-39392\n\n"
"Friedrichstraße 180 - 39302 Berlin\n\n"
"Kupferteichweg 39 44444 Hamburg\n\n"
"HEP Immobilien GmbH - Willicher Straße 1 - 33333 Willich\n\n"
"Rosengasse 19\n"
"A-3456 Groß Enzersdorf\n\n"
"Karsten Poppe Neue Straße 22\n"
"Finanz-/Versicherungmakler 33333 Wildeshausen\n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html