import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "[ptkmnlswj]?\n"
+ "(?:(?<=w)[aei]|\n"
+ "(?<=[jt])[aeou]|\n"
+ "(?<=[pkmnls])[aeiou]|\n"
+ "(?<=\\b)[aeiou])\n"
+ "(?:n(?![nm]?[aeiou]))?";
final String string = "#Sanity Checks:\n"
+ "---\n"
+ "toki pona\n"
+ "nanpa wan\n"
+ "toki li pona mama tuliwan sulisuli \n"
+ "mi olin e sina\n"
+ "---\n\n"
+ "#These should not work:\n"
+ "---\n"
+ "titijiji\n"
+ "tanna\n"
+ "nanma\n"
+ "---\n\n"
+ "#Some sentences from Wikipesija page for Linguistics:\n"
+ "---\n"
+ "toki Inli (kepeken toki Inli: EngLish) li toki suli.\n"
+ "toki Inli li kama tan ma Piten. taso tenpo ni la jan li kepeken kin e toki Inli lon ma Mewika lon ma Kanata lon ma Oselija lon ma Nusilan. jan li toki kin e toki Inli lon ma ante mute. toki Inli li toki pi ma sike. toki Inli li jo e kepeken mute.\n"
+ "---\n"
+ "a an e en i in o on u un\n"
+ "pa pan pe pen pi pin po pon pu pun\n"
+ "ta tan te ten ti tin to ton tu tun\n"
+ "ka kan ke ken ki kin ko kon ku kun\n"
+ "ma man me men mi min mo mon mu mun\n"
+ "na nan ne nen ni nin no non nu nun\n"
+ "sa san se sen si sin so son su sun\n"
+ "ja jan je jen ji jin jo jon ju jun\n"
+ "la lan le len li lin lo lon lu lun\n"
+ "wa wan we wen wi win wo won wu wun\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS | Pattern.CASE_INSENSITIVE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html