import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "^[-ʻ'`a-zġūĀñēöåàíáüóéōāī ]+$";
final String string = "https://en.wikipedia.org/wiki/List_of_U.S._cities_with_diacritics\n\n"
+ "Alaska\n\n"
+ "Alaska\n"
+ "Utqiaġvik\n\n\n"
+ "American Samoa\n"
+ "Āfono\n"
+ "Ālega\n"
+ "'Āmanave\n"
+ "Āmouli\n"
+ "Aūa\n"
+ "Fagasā\n"
+ "Faleāsao\n"
+ "Lumā\n"
+ "Tāfuna\n\n\n"
+ "California\n"
+ "La Cañada Flintridge\n"
+ "Piñon Hills\n"
+ "San José\n\n\n"
+ "Colorado\n"
+ "Cañon\n"
+ "Cañon City\n"
+ "Piñon\n"
+ "Piñon\n"
+ "Piñon Acres\n\n\n"
+ "Guam\n"
+ "Hagåtña\n"
+ "Hagåtña Heights\n\n\n"
+ "Hawaii\n"
+ "ʻĀhuimanu\n"
+ "ʻ````Āinaloa\n"
+ "Hanapēpē\n"
+ "Haʻikū-Pauwela\n"
+ "Hālawa\n"
+ "Hāliʻimaile\n"
+ "Hāmoa\n"
+ "Hāna\n"
+ "Hāʻōʻū\n"
+ "Hāwī\n"
+ "Hīlea\n"
+ "Hōlualoa\n"
+ "Hōnaunau-Nāpōʻopoʻo\n"
+ "Honokōhau\n"
+ "Hoʻōpūloa\n"
+ "Kāʻanapali\n"
+ "Kaimū\n"
+ "Kākiʻo\n"
+ "Kalāheo\n"
+ "Kamalō\n"
+ "Kāneʻohe\n"
+ "Kaupō\n"
+ "Kaʻūpūlehu\n"
+ "Keālia\n"
+ "Kēōkea\n"
+ "Kēōkea\n"
+ "Kīhei\n"
+ "Kīholo\n"
+ "Kīlauea\n"
+ "Kīpahulu\n"
+ "Kīpū\n"
+ "Kōloa\n"
+ "Kūkaʻiau\n"
+ "Kūkiʻo\n"
+ "Lāʻie\n"
+ "Lānaʻi City\n"
+ "Laupāhoehoe\n"
+ "Lāwaʻi\n"
+ "Līhuʻe\n"
+ "Māʻalaea\n"
+ "Māʻili\n"
+ "Mākaha\n"
+ "Mākaha Valley\n"
+ "Mākena\n"
+ "Mānā\n"
+ "Mokulēʻia\n"
+ "Mōpua\n"
+ "Mūʻolea\n"
+ "Nāʻālehu\n"
+ "Nāhiku\n"
+ "Nānākuli\n"
+ "Nānāwale Estates\n"
+ "Nāpili-Honokōwai\n"
+ "Nīnole\n"
+ "Nīnole\n"
+ "ʻŌmaʻo\n"
+ "ʻŌmaʻopio\n"
+ "ʻŌʻōkala\n"
+ "Pāʻauhau\n"
+ "Pāhala\n"
+ "Pāhoa\n"
+ "Pāʻia\n"
+ "Pākalā Village\n"
+ "Pālehua\n"
+ "Pāpā Bay Estates\n"
+ "Pāpaʻaloa\n"
+ "Pāpaʻikou\n"
+ "Poʻipū\n"
+ "Puaʻākala\n"
+ "Pūʻālaʻa\n"
+ "Puakō\n"
+ "Pūkoʻo\n"
+ "Pūlehu\n"
+ "Pūpūkea\n"
+ "Puʻunēnē\n"
+ "Wahiawā\n"
+ "Wahīlauhue\n"
+ "Waikāne\n"
+ "Waikapū\n"
+ "Waimānalo\n"
+ "Waimānalo Beach\n"
+ "Waiʻōhinu\n"
+ "Waipāhoehoe\n"
+ "Welokā\n\n\n"
+ "Louisiana\n"
+ "Pointe à la Hache\n"
+ "West Pointe à la Hache\n\n\n"
+ "Minnesota\n"
+ "Arnesén\n"
+ "Lindström\n\n\n"
+ "New Mexico\n"
+ "Cañada de los Alamos\n"
+ "Cañon\n"
+ "Cañon\n"
+ "Cañoncito\n"
+ "Cañoncito\n"
+ "Cañoncito\n"
+ "Cañoncito\n"
+ "Cañones\n"
+ "Doña Ana\n"
+ "Española\n"
+ "Lower Cañones\n"
+ "Peña Blanca\n"
+ "Peñasco\n"
+ "Peñasco Blanco\n"
+ "Piñon\n"
+ "Señorito\n\n\n"
+ "Puerto Rico\n"
+ "Añasco\n"
+ "Bayamón\n"
+ "Canóvanas\n"
+ "Cataño\n"
+ "Comerío\n"
+ "Guánica\n"
+ "Juana Díaz\n"
+ "Las Marías\n"
+ "Loíza\n"
+ "Manatí\n"
+ "Mayagüez\n"
+ "Peñuelas\n"
+ "Rincón\n"
+ "Río Grande\n"
+ "San Germán\n"
+ "San Sebastián\n"
+ "Cañabón\n"
+ "Castañer\n"
+ "Río Piedras\n"
+ "Texas\n"
+ "César Chávez\n"
+ "La Peñusca\n"
+ "Lopeño\n"
+ "Salineño\n"
+ "Salineño North\n";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html