import re
regex = re.compile(r"(?<![a-zà-öø-ÿ])[a-zà-öø-ÿ'`]+|[A-ZÀ-ÖØ-Þ][a-zà-öø-ÿ'`]*|[a-zà-öø-ÿ]+'[a-zà-öø-ÿ]*|[a-zà-öø-ÿ]+(?=[_-])|[a-zà-öø-ÿ]+(?=[A-ZÀ-ÖØ-Þ])|\d+", flags=re.MULTILINE)
test_str = ("tset\n"
"mestTest\n"
"Sest-test\n"
"zest-ests\n"
"best_Tets\n"
"Last_etss\n"
"LestTset\n"
"veryLongCamel\n"
"VeryLongCamel\n"
"wasn't\n"
"wasn`t\n"
"Wasn't\n"
"Wasn`t\n"
"Guárin-Lassous\n"
"Guérin-Lassous\n"
"Guírin-Lassous\n"
"Guórin-Lassous\n"
"Guúrin-Lassous\n"
"Guârin-Lassous\n"
"Guêrin-Lassous\n"
"Guîrin-Lassous\n"
"Guôrin-Lassous\n"
"Guûrin-Lassous\n"
"Guürin-Lassous\n"
"Guärin-Lassous\n"
"Guërin-Lassous\n"
"Guïrin-Lassous\n"
"Guörin-Lassous\n"
"Guürin-Lassous\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html