import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<\\/?html.*?>";
final String string = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:w=\"urn:schemas-microsoft-com:office:word\" xmlns:x=\"urn:schemas-microsoft-com:office:excel\" xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\" xmlns=\"http://www.w3.org/TR/REC-html40\">\n"
+ "<head>\n"
+ "some head info \n"
+ "</head>\n"
+ "<body>\n"
+ "<div > some content with other HTML tags that I want to preserve </div>\n"
+ "<body>\n"
+ "</html>\n"
+ "<html>\n"
+ "<div> another content with other HTML tags that I want to preserve </div>\n"
+ "</html>\n"
+ "<html xmlns=\"http://www.w3.org/TR/REC-html40\">\n"
+ "<head>\n"
+ "some head info \n"
+ "</head>\n"
+ "<body>\n"
+ "<div> some other content with other HTML tags that I want to preserve </div>\n"
+ "<body>\n"
+ "</html>";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html