import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?:(?<=\\n)|(?<=\\A)) # Necessarily at the begining of a new line or start of string\n"
+ "(?<leading_space>[ ]{0,3})\n"
+ "(?<tag_all>\n"
+ " (?:\n"
+ " (?<div_open>\n"
+ " <(?<tag_name>\\S+)\n"
+ " (?<tag_attributes>(?&tag_attr))*\n"
+ " [^\\>]*\n"
+ " >\\n*\n"
+ " )\n"
+ " (?<content>.+?)\n"
+ " \\n*\n"
+ " (?<div_close>\n"
+ " (?(<leading_space>) # If leading spaces were found\n"
+ " (?:\n"
+ " (?<=\\n)\\g\\{leading_space\\} # Either there is a symmetry in leading space for open and closing tag\n"
+ " |\n"
+ " (?:(?<=\\S)[[:blank:]\\h]*) # or the closing tag is on the same line with preceding data\n"
+ " )\n"
+ " |\n"
+ " (?=<\\/\\g\\{tag_name\\}>) # No leading space, so we don't expect anything before the closing tag other than what has already been caught in the 'content'\n"
+ " )\n"
+ " <\\/\\g\\{tag_name\\}>\n"
+ " )\n"
+ " [[:blank:]\\h]*\\n\n"
+ " )\n"
+ " |\n"
+ " (?:\n"
+ " <!--[[:blank:]\\h\\v]*(?<html_comment>.*?)[[:blank:]\\h\\v]*-->\n"
+ " )\n"
+ " |\n"
+ " (?:\n"
+ " <\n"
+ " [[:blank:]\\h\\v]*\n"
+ " (?<tag_name>[a-zA-Z0-9][\\w\\-]+)\n"
+ " (?<tag_attributes>(?&tag_attr))*\n"
+ " [[:blank:]\\h\\v]*\n"
+ " \\/?\n"
+ " [[:blank:]\\h\\v]*\n"
+ " >\n"
+ " )\n"
+ ")\n"
+ "(?(DEFINE)\n"
+ " (?<tag_attr>\n"
+ " (?:\n"
+ " [[:blank:]\\h]*\n"
+ " [\\w\\-]+\n"
+ " [[:blank:]\\h]*\n"
+ " =\n"
+ " [^\\\"\\'[:blank]\\h]+\n"
+ " [[:blank:]\\h]*\n"
+ " )\n"
+ " |\n"
+ " (?:\n"
+ " [[:blank:]\\h]*\n"
+ " [\\w\\-]+\n"
+ " [[:blank:]\\h]*\n"
+ " =\n"
+ " [[:blank:]\\h]*\n"
+ " (?<quote>[\"'])\n"
+ " (.*?)\n"
+ " \\g\\{quote\\}\n"
+ " [[:blank:]\\h]*\n"
+ " )\n"
+ " )\n"
+ ")";
final String string = "<abbr title=\"`first backtick!\">SB</abbr> \n\n"
+ "<abbr title=\"`second backtick!\">SB</abbr>\n\n"
+ "<table>\n"
+ "<tr><td markdown=\"block\">test _emphasis_ (block)</td></tr>\n"
+ "</table>\n\n"
+ "## More complicated\n\n"
+ "<table>\n"
+ "<tr><td markdown=\"1\">\n"
+ "* this is _not_ a list item</td></tr>\n"
+ "<tr><td markdown=\"span\">\n"
+ "* this is _not_ a list item</td></tr>\n"
+ "<tr><td markdown=\"block\">\n"
+ "* this _is_ a list item\n"
+ "</td></tr>\n"
+ "</table>\n";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS | Pattern.DOTALL | Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html