# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?mi)^[A-Za-zÀ-ú](?!(?:.*\.\/\ ){2})(?!(?:.* ){10})(?!.*\.[a-z])[A-Za-zÀ-ú. '-]{5,99}[A-Za-zÀ-ú]$"
test_str = ("Maria Luiza Barreto\n"
"Maria Luíza barreto da silva\n"
"José João\n"
"José João Paulo\n"
"José João Silva\n"
"Maria Silva\n"
"Maria silva\n"
"maria Silva\n"
"MariaSilva\n"
" Maria Silva\n"
"Maria Silva \n"
"Maria da Silva\n"
"Marina Silva\n"
"Maria / Silva\n"
"Maria Silva\n"
"Maria Silva\n"
"Maria G. Silva\n"
"Maria McDuffy\n"
"Getúlio Dornelles Vargas\n"
"Maria das Flores\n"
"John Smith\n"
"John D'Largy\n"
"John Doe-Smith\n"
"John Doe Smith\n"
"Hector Sausage-Hausen\n"
"Mathias d'Arras\n"
"Martin Luther King Jr.\n"
"Ai Wong\n"
"Chao Chang\n"
"Alzbeta Bara\n"
"Marcos Assunção\n"
"Maria da Silva e Silva\n"
"Juscelino Kubitschek De Oliveira\n"
"Natalia maria\n"
"Natalia aria\n"
"Natalia orea\n"
"Maria dornelas\n"
"María Luiza\n"
"Samuel eto'\n"
"Maria da Costa e Silva\n"
"Samuel Eto'o\n"
"Maria Luiza Barreto Da Silva\n"
"Nathali Grasiela Quintans Andrade da Silva Barreto\n"
"Nathali G. Q. A. Da S. Barreto\n"
"María \n"
" M \n"
"Petros barreto da silva \n"
"M D \n"
"(?<! )[A-Za-zÀ-ú']{2,}\\\\s[A-Za-zÀ-ú' ]{2,}")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html