# If you'd like to omit non-matching lines from the result; add ';d' to the end of the expression.
sed -E 's/(.*?) # Data before sentences (to be removed)
( # Capture Both sentences and text in between
H.*?e.*?l.*?l.*?o.*?\s # Hello[space]
(<.*?>)* # Optional Opening Tag(s)
進.*?撃.*?の.*?巨.*?人.*? # 進撃の巨人
(<\\/.*?>)* # Optional Closing Tag(s)
(.*?) # Optional Data in between sentences
(<.*?>)* # Optional Opening Tag(s)
L.*?o.*?r.*?e.*?m.*?\s # Lorem[space]
(<.*?>)* # Optional Opening Tag(s)
i.*?p.*?s.*?u.*?m.*? # ipsum
)
(.*) # Data after sentences (to be removed)/\2/gsx;t' <<< "
<html>
<body>
<header>Hello <p> </p> 進撃<em>の巨</人!</em></header>
random code
random code
<p>Lorem <span>ipsum<span>.<p>
</body>
</html>"
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for SED, please visit: https://www.gnu.org/software/sed/manual/html_node/The-_0022s_0022-Command.html