# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^.*(po\s*box|private\s*bag).*$|^\d[\/a-zĀ-ū0-9\s\,\'\-]*$"
test_str = ("Street address:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"224B Clarence Street, Vogeltown, Wellington 6023\n\n"
"Rural address:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"128 Valley Road North, RD 2, Middlemarch 9597\n\n"
"PO Box:\n"
"¯¯¯¯¯¯¯\n"
"PO Box 17999, Greenlane, Auckland 1546\n\n"
"Street – with unit:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"3/123 Johnstone Street West, Point Chevalier, Auckland 1022\n\n"
"Rural address with unit:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"8/56 Maple Boulevard, RD 2, Wanaka 9382\n\n"
"Private Bag:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯\n"
"Private Bag 93899, Auckland 0753\n\n"
"Street – with building name:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"Level 2 Colombo House, 3A/1222 Colombo Street, St Albans, Christchurch 8014\n\n"
"Rural address – with building name:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"Totârä Farm, 2/12543 Farm Road, RD 1, Outram 9073\n\n\n\n"
"Other addresses around the world:\n"
"¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯\n"
"813 Howard Street, Oswego, NY 13126, USA\n"
"1 Beacon Street, 33rd Floor, Boston, MA 02108, USA\n"
"Hagagatan 1 vi, SE-113 49 Stockholm, Sweden\n"
"John A. Smith, ACME Innovations Inc., PO BOX 1033, Los Angeles, CA\n"
"c/o Henry Roth, 50 Oakland Ave,#206, A City, Florida, 32104, USA\n"
"10 Downing Street, London, Greater London, SW1A 2AA, United Kingdom\n"
"Av. de Senalèche 27, 1009 Pully, Switzerland\n"
"17, Rue Bergère, 75009 Paris, France\n"
"11 Bis Av. Gabriel Péri, 38150 Roussillon, France\n"
"Apostel-Paulus-Strasse 7, 10823 Berlin, Deutschland\n")
matches = re.finditer(regex, test_str, re.IGNORECASE | re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html