import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?:<!\\[CDATA\\[(?<cdata>.*?)\\]\\]>|(?<xml><\\?.*?\\?>)|<!--(?<comment>.*?)-->|<\\s*(?<tag>[A-z][A-z0-9-_.:]*)(?:\\s+(?:[A-z][A-z0-9-_.:]*)\\s*(?:=\\s*(?<quote>[\"'])((?:\\\\\\k<quote>|(?:(?!\\k<quote>)).)*)(\\k<quote>)\\s*)?)*\\s*(?:/\\s*>|>(?<innerHTML>.*)?<\\s*/\\s*\\k<tag>\\s*>)|(?<text>[^<]*))";
final String string = "<?xml version='1.0' encoding='shift_jis'?>\n"
+ "<!-- foo -->\n"
+ "<foo>bar</foo>\n"
+ "Baz\n"
+ "<foo asdf=\"foo\" />\n"
+ "<CharacterInfo defaultUid=''>\n"
+ "<Character \n"
+ " name='??????'\n"
+ " uid='511111'\n"
+ " weapon='??' \n"
+ " HR='7' GR='0' lastLogin='1645561498'\n"
+ " sex='M' />\n"
+ "<Character name ='Potatoe/>sss' uid= '511111' weapon = '??' HR='7' GR=\"0\" lastLogin='1645561498' sex='M' />\n"
+ "</CharacterInfo>\n\n\n"
+ "Plain text\n"
+ "<hr/>\n"
+ "Multi\n"
+ "line\n"
+ "text\n"
+ "<br />\n"
+ "<!-- \n"
+ "This is a comment!\n"
+ "<foo />\n"
+ "-->\n"
+ "<foo \n"
+ "bar/>\n"
+ "<baz asdf=\"2\"></baz>\n\n"
+ "<asd foo=\"asd\\\"q'we\" / >\n"
+ "<b>Hello, <i>World</i>!</b>\n\n"
+ "<Character \n"
+ " name='??????'\n"
+ " uid='511111'\n"
+ " weapon='??' \n"
+ " HR='7' GR='0' lastLogin='1645561498'\n"
+ " sex='M' />\n"
+ "<Character name ='Potatoe/>sss' uid= '511111' weapon = '??' HR='7' GR=\"0\" lastLogin='1645561498' sex='M' />\n\n"
+ "<![CDATA[]]>\n"
+ "<![CDATA[Foobar<>]]>\n"
+ "<![CDATA[\n"
+ "asdasdasd\n"
+ "adsd\n"
+ "sd\n"
+ "sd\n"
+ "]]>\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html