import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "^\n"
+ "(?:\n"
+ " [ \\t]*[(][ \\t]*\n"
+ " (\\d+)\n"
+ " [ \\t]*,[ \\t]*\n"
+ " (\\d+)\n"
+ " [ \\t]*,[ \\t]*\n"
+ " (\\S+)\n"
+ " [ \\t]*[)][ \\t]*\n"
+ ")?\n"
+ "$";
final String string = "( 0, 12, Tokenization ) \n"
+ "( 13 , 15 , is ) \n"
+ " ( 16, 22, widely)\n"
+ "( 23, 31, regarded )\n"
+ "(32, 34, as )\n"
+ "(35, 36, a) \n"
+ "(37, 43, solved ) \n"
+ "(44, 51, problem)\n"
+ "(52, 55, due)\n"
+ "(56, 58, to)\n"
+ "(59, 62, the)\n"
+ "(63, 67, high)\n"
+ "(68, 76, accuracy)\n"
+ "(77, 81, that)\n"
+ "(82, 91, rulebased)\n"
+ "(92, 102, tokenizers)\n"
+ "(103, 110, achieve)\n"
+ "(110, 111, .)\n\n"
+ "(0, 3, But)\n"
+ "(4, 14, rule-based)\n"
+ "(15, 25, tokenizers)\n"
+ "(26, 29, are)\n"
+ "(30, 34, hard)\n"
+ "(35, 37, to)\n"
+ "(38, 46, maintain)\n"
+ "(47, 50, and)\n"
+ "(51, 56, their)\n"
+ "(57, 62, rules)\n"
+ "(63, 71, language)\n"
+ "(72, 80, specific)\n"
+ "(80, 81, .)\n\n"
+ "(0, 2, We)\n"
+ "(3, 7, show)\n"
+ "(8, 12, that)\n"
+ "(13, 17, high)\n"
+ "(18, 26, accuracy)\n"
+ "(27, 31, word)\n"
+ "(32, 35, and)\n"
+ "(36, 44, sentence)\n"
+ "(45, 57, segmentation)\n"
+ "(58, 61, can)\n"
+ "(62, 64, be)\n"
+ "(65, 73, achieved)\n"
+ "(74, 76, by)\n"
+ "(77, 82, using)\n"
+ "(83, 93, supervised)\n"
+ "(94, 102, sequence)\n"
+ "(103, 111, labeling)\n"
+ "(112, 114, on)\n"
+ "(115, 118, the)\n"
+ "(119, 128, character)\n"
+ "(129, 134, level)\n"
+ "(135, 143, combined)\n"
+ "(144, 148, with)\n"
+ "(149, 161, unsupervised)\n"
+ "(162, 169, feature)\n"
+ "(170, 178, learning)\n"
+ "(178, 179, .)\n\n"
+ "(0, 2, We)\n"
+ "(3, 12, evaluated)\n"
+ "(13, 16, our)\n"
+ "(17, 23, method)\n"
+ "(24, 26, on)\n"
+ "(27, 32, three)\n"
+ "(33, 42, languages)\n"
+ "(43, 46, and)\n"
+ "(47, 55, obtained)\n"
+ "(56, 61, error)\n"
+ "(62, 67, rates)\n"
+ "(68, 70, of)\n"
+ "(71, 75, 0.27)\n"
+ "(76, 77, ‰)\n"
+ "(78, 79, ()\n"
+ "(79, 86, English)\n"
+ "(86, 87, ))\n"
+ "(87, 88, ,)\n"
+ "(89, 93, 0.35)\n"
+ "(94, 95, ‰)\n"
+ "(96, 97, ( )\n"
+ "(97, 102, Dutch)\n"
+ "(102, 103, ) )\n"
+ "(104, 107, and)\n"
+ "(108, 112, 0.76)\n"
+ "(113, 114, ‰)\n"
+ "(115, 116, ()\n"
+ "(116, 123, Italian)\n"
+ "(123, 124, ))\n"
+ "(125, 128, for)\n"
+ "(129, 132, our)\n"
+ "(133, 137, best)\n"
+ "(138, 144, models)\n"
+ "(144, 145, .)";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.UNICODE_CASE | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html