import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?=<div[ ]class=\"aawp\">) # El primer div debe ser con clase aawp\n"
+ "( # primer grupo (será la base de la recursividad)\n\n"
+ " #--- Opciones ---#\n"
+ " # Cualquier cosa salvo <> una o más veces\n"
+ " [^<>]+\n"
+ " # Cualquier etiqueta vacía (void element)\n"
+ " | <(?=area|base|br|col|embed|hr\n"
+ " |img|input|link|meta|param|source\n"
+ " |track|wbr)\\w+[^>]*>\n"
+ " # Comentarios html\n"
+ " | <!-- .*? -->\n"
+ " # Cualquier otra etiqueta (puede tener anidación)\n"
+ " # Recursividad con grupo 1 (?1). El grupo 2 se usa para \n"
+ " # cerrar la misma etiqueta original\n"
+ " | <(\\w+)[^>]*>(?1)*</\\2>\n"
+ ")\n";
final String string = "<div class=\"aawp\">\n"
+ "<br>\n"
+ "<div>\n"
+ " <div>asd</div>\n"
+ "</div>\n"
+ "</div>\n\n"
+ "<div class=\"aawp\">\n"
+ " <div id=\"aawp-tb-445\">\n"
+ " <div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">\n"
+ " <a>a</a>\n"
+ " <br />\n"
+ " <img src=\"abc\">\n"
+ " <hr>\n"
+ " </div>\n"
+ " <div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">\n"
+ " ...\n"
+ " </div>\n"
+ " <div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">\n"
+ " ...\n"
+ " </div>\n"
+ " </div>\n"
+ "</div>\n\n"
+ "<div class=\"aawp\">\n"
+ " ...\n"
+ "</div>\n\n"
+ "<div class=\"aawp\">\n"
+ " <div id=\"aawp-tb-445\">\n"
+ " <div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">\n"
+ " ...\n"
+ " </div>\n"
+ " </div>\n"
+ "</div>\n\n\n"
+ "<div class=\"aawp\">\n"
+ " <div> x </div>\n"
+ " <div> x </div>\n"
+ "</div>\n\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html