# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(?![ ])(?!.*(?:\d|[ ]{2}|[!$%^&*()_+|~=`\{\}\[\]:\";<>?,\/]))(?:(?:e|da|do|das|dos|de|d'|D'|la|las|el|los|l')\s*?|(?:[A-ZàáâäãåąčćęèéêëėįìíîïłńòóôöõøùúûüųūÿýżźñçčšžÀÁÂÄÃÅĄĆČĖĘÈÉÊËÌÍÎÏĮŁŃÒÓÔÖÕØÙÚÛÜŲŪŸÝŻŹÑßÇŒÆČŠŽ∂ð'][^\s]*\s*?)(?!.*[ ]$))+$"
test_str = ("Maria Silva\n"
"Maria silva\n"
"maria Silva\n"
"MariaSilva\n"
" Maria Silva\n"
"Maria Silva \n"
"Maria da Silva\n"
"Marina Silva\n"
"Maria / Silva\n"
"Maria . Silva\n"
"Maria Silva\n"
"Maria G. Silva\n"
"Maria McDuffy\n"
"Getúlio Dornelles Vargas\n"
"Maria das Flores\n"
"John Smith\n"
"John D'Largy\n"
"John Doe-Smith\n"
"John Doe Smith\n"
"Hector Sausage-Hausen\n"
"Mathias d'Arras\n"
"Martin Luther King Jr.\n"
"Ai Wong\n"
"Chao Chang\n"
"Alzbeta Bara\n"
"Marcos Assunção\n"
"Maria da Silva e Silva\n"
"Juscelino Kubitschek de Oliveira\n"
"Natalia maria\n"
"Natalia aria\n"
"Natalia orea\n"
"Maria dornelas\n"
"Samuel eto'\n"
"Maria da Costa e Silva\n"
"Samuel Eto'o\n"
"María Antonieta de las Nieves\n"
"Eugène\n"
"Antòny de Homé April\n"
"àntony de Home ùpril\n"
"Antony de Home Aprìl\n"
"Antony1 de Home Ap*ril\n"
"Ap*ril Willians\n"
"Antony_ de Home Apr+il\n"
"Ant_ony de Home Apr#il\n"
"Antony@ de Ho@me Apr^il\n"
"Pierre de l'Estache\n"
"Pierre de L'Estoile\n"
"Akihito\n")
matches = re.finditer(regex, test_str, re.MULTILINE | re.UNICODE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html