# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(([a-zA-Z-éÉèÈàÀùÙâÂêÊîÎôÔûÛïÏëËüÜçÇæœ'.]*\s)\d*(\s[a-zA-Z-éÉèÈàÀùÙâÂêÊîÎôÔûÛïÏëËüÜçÇæœ']*)*,)*\d*(\s[a-zA-Z-éÉèÈàÀùÙâÂêÊîÎôÔûÛïÏëËüÜçÇæœ']*)+,\s([\d]{5})\s[a-zA-Z-éÉèÈàÀùÙâÂêÊîÎôÔûÛïÏëËüÜçÇæœ']+"
test_str = ("Apt. 120, 9 Quai de Caumartin, 21305 Tourcoing\n"
"8 étage, 229 Boulevard de Caumartin, 41224 Clichy\n"
"8116 Boulevard Pierre Charron, 51703 Bordeaux\n"
"9 Quai de Caumartin, 21305 Tourcoing\n"
"Apt. 901, 8431 Boulevard Vaneau, 85330 Mérignac\n"
"314 Passage de la Férronnerie, 76818 Villejuif\n"
"1 étage, 6645 Avenue des Grands Augustins, 47851 Lyon\n"
"Apt. 889, 301 Place Adolphe Mille, 04320 Issy-les-Moulineaux\n"
"Apt. 470, 147 Allée, Voie des Grands Augustins, 37097 Asnières-sur-Seine\n"
"Apt. 356, 1 Allée, Voie de l'Abbaye, 83865 Champigny-sur-Marne\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html