import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(.*?) # Data before sentences (to be removed)\n"
+ "( # Capture Both sentences and text in between\n"
+ " H.*?e.*?l.*?l.*?o.*?\\s # Hello[space]\n"
+ " (<.*?>)* # Optional Opening Tag(s)\n"
+ " 進.*?撃.*?の.*?巨.*?人.*? # 進撃の巨人\n"
+ " (<\\/.*?>)* # Optional Closing Tag(s)\n"
+ " (.*?) # Optional Data in between sentences\n"
+ " (<.*?>)* # Optional Opening Tag(s)\n"
+ " L.*?o.*?r.*?e.*?m.*?\\s # Lorem[space]\n"
+ " (<.*?>)* # Optional Opening Tag(s)\n"
+ " i.*?p.*?s.*?u.*?m.*? # ipsum\n"
+ ")\n"
+ "(.*) # Data after sentences (to be removed)";
final String string = "\n"
+ "<html>\n"
+ "<body>\n"
+ "<header>Hello <p> </p> 進撃<em>の巨</人!</em></header>\n"
+ "random code\n"
+ "random code\n"
+ "<p>Lorem <span>ipsum<span>.<p>\n"
+ "</body>\n"
+ "</html>";
final String subst = "\\2";
final Pattern pattern = Pattern.compile(regex, Pattern.DOTALL | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
// The substituted value will be contained in the result variable
final String result = matcher.replaceAll(subst);
System.out.println("Substitution result: " + result);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html