import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?=<[^<]*[\\s]+title=\"([^\"]*)\")|<\\s*a\\s+[^<]*>([^<]*)([^<]*)";
final String string = "<p title=\"caca\"></p>\n"
+ "<p> </p>\n"
+ "<b>\n"
+ " \n"
+ " </b> \n"
+ "<B>title=\" gdd \"</B>\n"
+ "<i title=\"caca\">dsfgjdgfs</i>\n"
+ "<i title=\" boudin \">dsfgjdgfs</i>\n"
+ "<i> </i>\n"
+ "<i> </i>\n"
+ "<strong></strong>\n"
+ "<i style=\"color: orange\"></i>\n"
+ "<a href=\"#\"> bizurot </a> !probleme \n"
+ "<o: class=\"ms-crap\"></o>\n"
+ "<a href=\"#\" title=\" kameleo \" > test </a> !probleme <a href=\"#\"> cramoute </a> !probleme \n\n"
+ "<a href=\"#\" title=\" banane \" > test <strong>hello</strong> asaidali </a><a href=\"#\"> hdsfahsdgf </a> !probleme </br>\n\n"
+ "<p>Not empty</p>\n"
+ "<p>Not \n"
+ " empty</p>\n"
+ "<i> \n\n"
+ " </i>";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html