import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?(DEFINE)\n"
+ " (?# Definitions )\n"
+ " (?<valid_nameChars>[\\p{L}\\p{Nl}])\n"
+ " (?<valid_nonNameChars>[^\\p{L}\\p{Nl}\\p{Zs}])\n"
+ " (?<valid_startFirstName>(?![a-z])[\\p{L}'])\n"
+ " (?<valid_upperChar>(?![a-z])\\p{L})\n"
+ " (?<valid_nameSeparatorsSoft>[\\p{Pd}'])\n"
+ " (?<valid_nameSeparatorsHard>\\p{Zs})\n"
+ " (?<valid_nameSeparators>(?&valid_nameSeparatorsSoft)|(?&valid_nameSeparatorsHard))\n"
+ " (?# Invalid combinations )\n"
+ " (?<invalid_startChar>^[\\p{Zs}a-z])\n"
+ " (?<invalid_endChar>.*[^\\p{L}\\p{Nl}.\\p{C}]$)\n"
+ " (?<invalid_unaccompaniedSymbol>.*(?&valid_nameSeparatorsHard)(?&valid_nonNameChars)(?&valid_nameSeparatorsHard))\n"
+ " (?<invalid_overTwoUpper>(?:(?&valid_nameChars)*\\p{Lu}){3})\n"
+ " (?<invalid>(?&invalid_startChar)|(?&invalid_endChar)|(?&invalid_unaccompaniedSymbol)|(?&invalid_overTwoUpper))\n"
+ " (?# Valid combinations )\n"
+ " (?<valid_name>(?:(?:(?&valid_nameChars)|(?&valid_nameSeparatorsSoft))*(?&valid_nameChars)+(?:(?&valid_nameChars)|(?&valid_nameSeparatorsSoft))*)+\\.?)\n"
+ " (?<valid_firstName>(?&valid_startFirstName)(?:\\.|(?&valid_name)*))\n"
+ " (?<valid_multipleName>(?&valid_firstName)(?=.*(?&valid_nameSeparators)(?&valid_upperChar))(?:(?&valid_nameSeparatorsHard)(?&valid_name))+)\n"
+ " (?<valid>(?&valid_multipleName)|(?&valid_firstName))\n"
+ ")\n"
+ "^(?!(?&invalid))(?&valid)$";
final String string = "== 1NcOrrect N4M3S ==\n"
+ "CAPITAL LETTER\n"
+ "AlTeRnAtE LeTtEr\n"
+ "Natalia maria\n"
+ "Natalia aria\n"
+ "Natalia orea\n"
+ "Maria dornelas\n"
+ "Samuel eto'\n"
+ "Miguel lasagna\n"
+ "Antony1 de Home Ap*ril\n"
+ "Ap*ril Willians\n"
+ "Antony_ de Home Apr+il\n"
+ "Ant_ony de Home Apr#il\n"
+ "Antony@ de Ho@me Apr^il\n"
+ "Maria Silva\n"
+ "Maria silva\n"
+ "maria Silva\n"
+ " Maria Silva\n"
+ "Maria Silva \n"
+ "Maria / Silva\n"
+ "Maria . Silva\n"
+ "John W8\n\n"
+ "==Correct Names==\n"
+ "Urxan Əbűlhəsənzadə\n"
+ "İsmət Jafarov\n"
+ "Şükür Hagverdiyev\n"
+ "Űmid Abdurrahimov\n"
+ "Ġerardo Seralta\n"
+ "Ċikku Paris\n"
+ "Hind ibn Sheik\n"
+ "Colop-U-Uichikin\n"
+ "Lażżru Role\n"
+ "Alaksiej Taraškievič\n"
+ "Petruso Husoǔski\n"
+ "Sumu-la-El\n"
+ "Valeh ßlÿsgÿroğlu\n"
+ "'Arab al-Rashayida\n"
+ "Tariq al-Hashimi\n"
+ "Nabeeh el-Mady\n"
+ "Tariq Al-Hashimi\n"
+ "Brian O'Conner\n"
+ "Maria da Silva\n"
+ "Maria Silva\n"
+ "Maria G. Silva\n"
+ "Maria McDuffy\n"
+ "Getúlio Dornelles Vargas\n"
+ "Maria das Flores\n"
+ "John Smith\n"
+ "John D'Largy\n"
+ "John Doe-Smith\n"
+ "John Doe Smith\n"
+ "Hector Sausage-Hausen\n"
+ "Mathias d'Arras\n"
+ "Martin Luther King Jr.\n"
+ "Ai Wong\n"
+ "Chao Chang\n"
+ "Alzbeta Bara\n"
+ "Marcos Assunção\n"
+ "Maria da Silva e Silva\n"
+ "Juscelino Kubitschek de Oliveira\n"
+ "Maria da Costa e Silva\n"
+ "Samuel Eto'o\n"
+ "María Antonieta de las Nieves\n"
+ "Eugène\n"
+ "Antòny de Homé April\n"
+ "àntony de Home ùpril\n"
+ "Antony de Home Aprìl\n"
+ "Pierre de l'Estache\n"
+ "Pierre de L'Estoile\n"
+ "Akihito\n"
+ "Nadine Schröder\n"
+ "Anna A. Møller\n"
+ "D. Pedro I\n"
+ "Pope Benedict XVI\n"
+ "Marsibil Ragnarsdóttir\n"
+ "Natanaël Morel\n"
+ "Isaac De la Croix\n"
+ "Jean-Michel Bozonnet\n"
+ "Qutaibah Mu'tazz Abadi\n"
+ "Rushd Jawna' Kassab\n"
+ "Khaldun Abdul-Qahhar Sabbag\n"
+ "'Awad Bashshar Asker\n"
+ "Al B. Zellweger\n"
+ "Gunnleif Snæ-Ulfsson\n"
+ "Käre Toresson\n"
+ "Sorli Ærnmundsson\n"
+ "Arnkel Øystæinsson\n"
+ "Ástríður Dórey\n"
+ "Åsmund Kåresson\n"
+ "Yahatti-Il\n"
+ "Ipqu-Annunitum\n"
+ "Nabu-zar-adan\n"
+ "Eskopas Cañaverri\n"
+ "Botolph of Langchester\n"
+ "Aelfhun the Cantrell\n"
+ "Fraco di Natale\n"
+ "Fraco Di Natale\n"
+ "Iván de Luca\n"
+ "Iván De Luca\n"
+ "Man'nah\n"
+ "Atabala Aüamusalü\n"
+ "Ramiz Ağasəfalu\n"
+ "Dadaş Aghakhanov\n"
+ "Fÿrxad Mübarizlı\n"
+ "Vaclaǔ Šupa\n"
+ "Yakiv Volacič\n"
+ "Flor Van Vaerenbergh\n"
+ "Flor van Vaerenbergh\n"
+ "Edwin van der Sar\n"
+ "Husein Ekmečić\n"
+ "Álvaro Guimarães Alencar\n"
+ "Phone U Yaza Arkar\n"
+ "Seocan MacGhille\n"
+ "X'wat'e Tlekadugovy\n"
+ "Albert-Jan Bootsveld\n"
+ "Maurits-jan Kuipers op den Kollenstaart\n"
+ "Elco ter Hoek\n"
+ "Robbert te Poele\n"
+ "Aad ten Have\n"
+ "'Ehu Kali\n"
+ "Ho'opa'a Loni\n"
+ "Aukanai'i Mahi'ai\n"
+ "Kalman ben Tal El\n"
+ "Żytomir Roszkowski\n"
+ "K'awai\n\n"
+ "==EXTRA== only if possible, strange ones\n"
+ "Maol-Moire Mac'IlleBhuidh\n"
+ "Tòmas MacIlleChruim\n"
+ "Aindreas MacIllEathain\n"
+ "Eanruig MacGilleBhreac\n"
+ "Peadar MacGilleDhonaghart\n"
+ "Maolmhuire MacGill-Eain\n"
+ "Eanruig MacGilleBhreac\n"
+ "Wim van 't Plasman";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html