import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(<(?:(?:(?:(script|style|object|embed|applet|noframes|noscript|noembed)(?:\\s+(?=((?:\"[\\S\\s]*?\"|'[\\S\\s]*?'|(?:(?!\\/>)[^>])?)+))\\3)?\\s*>)[\\S\\s]*?<\\/\\2\\s*(?=>))|(?:\\/?[\\w:]+\\s*\\/?)|(?:[\\w:]+\\s+(?:\"[\\S\\s]*?\"|'[\\S\\s]*?'|[^>]?)+\\s*\\/?)|\\?[\\S\\s]*?\\?|(?:!(?:(?:DOCTYPE[\\S\\s]*?)|(?:\\[CDATA\\[[\\S\\s]*?\\]\\])|(?:--[\\S\\s]*?--)|(?:ATTLIST[\\S\\s]*?)|(?:ENTITY[\\S\\s]*?)|(?:ELEMENT[\\S\\s]*?))))>)|([^<>]*?)(querry)";
final String string = "<body class=\"mediawiki querry ltr sitedir-ltr ns-0 ns-subject page-International_English_Language_Testing_System skin-vector action-view vector-animateLayout\">\n"
+ " <div id=\"mw-page-base\" class=\"noprint\"></div>\n"
+ " <div id=\"mw-head-base\" class=\"noprint\"></div>\n"
+ " <div id=\"content\" class=\"mw-body\" role=\"main\">\n"
+ " <a id=\"top\"></a>\n\n"
+ "and also a normal querry\n\n"
+ " <div id=\"siteNotice\"><!-- CentralNotice --></div>\n"
+ " <h1 id=\"firstHeading\" class=\"firstHeading\" lang=\"en\"><span dir=\"auto\">International English Language Testing System</span></h1>\n"
+ " <div id=\"bodyContent\" class=\"mw-body-content\">\n"
+ " <div id=\"siteSub\">From Wikipedia, the free encyclopedia</div>\n"
+ " <div id=\"contentSub\"> (Redirected from <a href=\"/w/index.php?title=IELTS&redirect=no\" title=\"IELTS\">IELTS</a>)</div>\n"
+ " <div id=\"jump-to-nav\" class=\"mw-jump\">\n"
+ " Jump to: <a href=\"#mw-navigation\">navigation</a>, <a href=\"#p-search\">search</a>\n"
+ " </div>\n"
+ " <div id=\"mw-content-text\" lang=\"en\" dir=\"ltr\" class=\"mw-content-ltr\"><table class=\"infobox vevent\" cellspacing=\"3\" style=\"border-spacing:3px;width:22em;\">\n"
+ "<caption class=\"summary\">International English Language Testing System</caption>\n"
+ "<tr>\n"
+ "<td colspan=\"2\" style=\"text-align:center;\"><a href=\"/wiki/File:IELTS_logo.svg\" class=\"image\"><img alt=\"IELTS logo.svg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/7f/IELTS_logo.svg/300px-IELTS_logo.svg.png\" width=\"300\" height=\"113\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/7f/IELTS_logo.svg/450px-IELTS_logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/7f/IELTS_logo.svg/600px-IELTS_logo.svg.png 2x\" data-file-width=\"778\" data-file-height=\"293\" /></a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<th scope=\"row\" style=\"text-align:left;\">Acronym</th>\n"
+ "<td>IELTS</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<th scope=\"row\" style=\"text-align:left;\">Type</th>\n"
+ "<td>Standardized test. Available in 2 versions: \"Academic\" and \"General training\".</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<th scope=\"row\" style=\"text-align:left;\">Developer / administrator</th>\n"
+ "<td><a href=\"/wiki/Cambridge_English_Language_Assessment\" title=\"Cambridge English Language Assessment\">Cambridge English Language Assessment</a>, <a href=\"/wiki/British_Council\" title=\"British Council\">British Council</a>, <a href=\"/wiki/IDP_Education\" title=\"IDP Education\">IDP Education</a>.</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<th scope=\"row\" style=\"text-align:left;\">Knowledge/skill(s) tested</th>\n"
+ "<td>Listening, reading, writing and speaking of the <a href=\"/wiki/English_language\" title=\"English language\">English language</a>.</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<th scope=\"row\" style=\"text-align:left;\">Purpose</th>\n"
+ "<td>To assess the English language proficiency of non-native English speakers.</td>\n"
+ "</tr>\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html