$re = '/(.*?) # Data before sentences (to be removed)
( # Capture Both sentences and text in between
H.*?e.*?l.*?l.*?o.*?\s # Hello[space]
(<.*?>)* # Optional Opening Tag(s)
進.*?撃.*?の.*?巨.*?人.*? # 進撃の巨人
(<\/.*?>)* # Optional Closing Tag(s)
(.*?) # Optional Data in between sentences
(<.*?>)* # Optional Opening Tag(s)
L.*?o.*?r.*?e.*?m.*?\s # Lorem[space]
(<.*?>)* # Optional Opening Tag(s)
i.*?p.*?s.*?u.*?m.*? # ipsum
)
(.*) # Data after sentences (to be removed)/sx';
$str = '
<html>
<body>
<header>Hello <p> </p> 進撃<em>の巨</人!</em></header>
random code
random code
<p>Lorem <span>ipsum<span>.<p>
</body>
</html>';
$subst = "\2";
$result = preg_replace($re, $subst, $str);
echo "The result of the substitution is ".$result;
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for PHP, please visit: http://php.net/manual/en/ref.pcre.php