import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<(?:div|ul|li)(?=[^>]*\\bclass=\"([^\"]+)\")(?=(?:[^>]*\\bdata-\\w+=\"([^\"]+)\")?)";
final String string = "With preg_match_all I want to get class and data-attributes in html.\n\n"
+ "I asked a similar question before. The correct answer to the previous responsibility was done with DOM. But as an alternative to the DOM structure, I also need a regex version.\n\n"
+ "The pattern works fine. However, if the lines are side-by-side, they also take class names from tags that should not be accepted.\n\n\n"
+ "<div class=\"noproblem\"> \n"
+ " <ul class=\"noproblem\" data-ss=\"1\">\n"
+ " <li class=\"noproblem\" data-ss=\"1\">\n"
+ " <!-- <i> is not my tag. but there s no problem with that. because it s underneath . -->\n"
+ " <i class=\"no_problem\"></i>\n"
+ " </li>\n"
+ " </ul>\n"
+ "</div>\n\n"
+ "<div class=\"noproblem\" data-ss=\"1\"> <!-- problem: data-ss is not accepted -->\n"
+ " <ul class=\"noproblem\" data-ss=\"1\">\n"
+ " <!-- <i> is not my tag. my tags: div|ul|li . -->\n"
+ " <li class=\"noproblem\"><i class=\"this_is_problem\"></i>\n"
+ " </li>\n"
+ " </ul>\n"
+ "</div>\n\n"
+ "<div class=\"noproblem\">\n"
+ " <ul class=\"noproblem\">\n"
+ " <!-- <i> is not my tag. my tags: div|ul|li . -->\n"
+ " <li class=\"noproblem\"><i class=\"this_is_problem\"></i>\n"
+ " </li>\n"
+ " <!-- <span> is not my tag. my tags: div|ul|li . -->\n"
+ " <li class=\"test\"><span class=\"this_is_problem\"></span></li>\n"
+ " <!-- (li class empty version): <span> is not my tag. my tags: div|ul|li . -->\n"
+ " <li><span class=\"this_is_problem\"></span></li>\n"
+ " </ul>\n"
+ "</div>";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html