import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?(DEFINE)\n"
+ " (?<XHTML>\n"
+ " (?&DOCTYPE)\n"
+ " \\s*\n"
+ " (?&HTML)\n"
+ " )\n\n"
+ " # HTML element\n"
+ " (?<HTML>\n"
+ " <html(?&attrs)?>(?&content)<\\/html>\n"
+ " )\n\n"
+ " # Match content\n"
+ " (?<content>\n"
+ " \\s*\n"
+ " (?:\n"
+ " ((?&tag) | [^<>]+)\\s*\n"
+ " )*\n"
+ " )\n\n"
+ " # General tag\n"
+ " (?<tag>\n"
+ " <((?&tagname))(?&attrs)?\\s*(?:\n"
+ " \\/>|\n"
+ " >\\s*(?&content)\\s*\n"
+ " <\\/\\g'-1'>\n"
+ " )\n"
+ " )\n\n"
+ " # Attributes\n"
+ " (?<attrs>\\s+\n"
+ " # The name\n"
+ " (?&keyword) (\n"
+ " \\s*=\\s*\n"
+ " (?:\n"
+ " (?&keyword)|\n"
+ " \"(?:\\\\.|.)+?\"|\n"
+ " '(?:\\\\.|.)+?'\n"
+ " )\n"
+ " )?\n"
+ " (?&attrs)?\n"
+ " )\n\n"
+ " # Match keyword\n"
+ " (?<keyword>[^\\s\\/>\"'=]+)\n"
+ " # Match tag name\n"
+ " (?<tagname>(?!xml)[A-Za-z_][A-Za-z\\d_.-]*)\n\n"
+ " # DOCTYPE expression\n"
+ " (?<DOCTYPE>\n"
+ " <!doctype\\s+x?html>\n"
+ " )\n"
+ ")\n\n"
+ "(?&XHTML)";
final String string = "<!doctype html>\n"
+ " <html style=\"''inva'lid css I know\">textttt<head></head><body><div class=\"onfoodstamps\"><div class=\"upper\">foo<p>ayyy</p>bar</div>baz</div><br/></head></html>";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS | Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html