import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?'char'[A-F0-9]{4,7})\\;(?'replace'.*)(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+(?=\\s)+(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+(?=\\s)+(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+\\;\\ (?'comment'.*)(?:\\n)|\n"
+ "(?'char'[A-F0-9]{4,7})\\;(?'replace'.*)(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+(?=\\s)+(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+\\;\\ (?'comment'.*)(?:\\n)|\n"
+ "(?'char'[A-F0-9]{4,7})\\;(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+(?=\\s)+(?:\\s{1}(?'replace'[A-F0-9]{4,7}))+\\;\\ (?'comment'.*)(?:\\n)|\n"
+ "(?'char'[A-F0-9]{4,7})\\;(?'replace'.*)\\;\\ (?'comment'.*)(?:\\n)|\n"
+ "(?'range_start'[A-F0-9]{4,7})\\-(?'range_end'[A-F0-9]{4,7})(?:\\;\\ )(?'comment'.*)(?:\\n)|\n"
+ "(?'char'[A-F0-9]{4,7})(?:\\;\\ )(?'comment'.*)(?:\\n)|\n"
+ "(?'range_start'[A-F0-9]{4,7})\\-(?'range_end'[A-F0-9]{4,7})(?:\\n)|\n"
+ "(?:^\\ {3})(?'char'[A-F0-9]{4,7})(?:\\n)|\n"
+ "(?<appendix>(?'appendix_type'Start|End)\\sTable\\s(?'appendix_number'(\\w).(?'appendix_order'(\\d)))(?=\\ -----\\n))";
final String string = " F001-AB12\n"
+ " 0221\n"
+ " 0234-024F\n"
+ " 02AE-02AF\n"
+ " 03AB; 03CB; Case map\n"
+ " 03B0; 03C5 0308 0301; Case map\n"
+ " 03C2; 03C3; Case map\n"
+ " 03B0; 03C5 0308 0301 0A1B; Case map\n"
+ " ----- Start Table A.1 -----\n"
+ " 03D0; 03B2; Case map\n"
+ " 03D1; 03B8; Case map\n"
+ " ----- End Table A.1 -----\n"
+ " 03D2; 03C5; Additional folding\n"
+ " 03D3; 03CD; Additional folding\n"
+ " 00DF; 0073 0073; Case map\n"
+ " 037B-037D\n"
+ " 037F-0383\n"
+ " 038B\n"
+ "Hoffman & Blanchet Standards Track [Page 89]\n"
+ "\n"
+ "RFC 3454 Preparation of Internationalized Strings December 2002\n"
+ " 1806; ; Map to nothing\n"
+ " 1806; ; Map to nothing\n"
+ " 1806; ; Map to nothing\n"
+ " 1806; ; Map to nothing\n"
+ " F0000-FFFFD\n"
+ " 100000-10FFFD\n"
+ " F0000\n"
+ " 013B; 013C; Case map\n"
+ " 013D; 013E; Case map\n"
+ " 0080-009F; [CONTROL CHARACTERS]\n"
+ " 06DD; ARABIC END OF AYAH\n"
+ " 070F; SYRIAC ABBREVIATION MARK\n"
+ " 180E; MONGOLIAN VOWEL SEPARATOR\n"
+ " 200C; ZERO WIDTH NON-JOINER\n"
+ " 200D; ZERO WIDTH JOINER\n"
+ " 2028; LINE SEPARATOR\n"
+ " 2029; PARAGRAPH SEPARATOR\n"
+ " 2060; WORD JOINER\n"
+ " 2061; FUNCTION APPLICATION\n"
+ " 2062; INVISIBLE TIMES\n"
+ " 013F; 0140; Case map\n"
+ " 0141; 0142; Case map\n"
+ " 0143; 0144; Case map\n"
+ " 0145; 0146; Case map\n"
+ " 0147; 0148; Case map\n"
+ " 0149; 02BC 006E; Case map\n"
+ " 014A; 014B; Case map\n"
+ " 014C; 014D; Case map\n";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS | Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html