// include the latest version of the regex crate in your Cargo.toml
extern crate regex;
use regex::Regex;
fn main() {
let regex = Regex::new(r#"(?msx)(?=<div[ ]class="aawp">) # El primer div debe ser con clase aawp
( # primer grupo (será la base de la recursividad)
#--- Opciones ---#
# Cualquier cosa salvo <> una o más veces
[^<>]+
# Cualquier etiqueta vacía (void element)
| <(?=area|base|br|col|embed|hr
|img|input|link|meta|param|source
|track|wbr)\w+[^>]*>
# Comentarios html
| <!-- .*? -->
# Cualquier otra etiqueta (puede tener anidación)
# Recursividad con grupo 1 (?1). El grupo 2 se usa para
# cerrar la misma etiqueta original
| <(\w+)[^>]*>(?1)*</\2>
)
"#).unwrap();
let string = "<div class=\"aawp\">
<br>
<div>
<div>asd</div>
</div>
</div>
<div class=\"aawp\">
<div id=\"aawp-tb-445\">
<div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">
<a>a</a>
<br />
<img src=\"abc\">
<hr>
</div>
<div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">
...
</div>
<div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">
...
</div>
</div>
</div>
<div class=\"aawp\">
...
</div>
<div class=\"aawp\">
<div id=\"aawp-tb-445\">
<div class=\"aawp-tb aawp-tb--desktop aawp-tb--cols-5 aawp-tb--hide-labe\">
...
</div>
</div>
</div>
<div class=\"aawp\">
<div> x </div>
<div> x </div>
</div>
";
// result will be an iterator over tuples containing the start and end indices for each match in the string
let result = regex.captures_iter(string);
for mat in result {
println!("{:?}", mat);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Rust, please visit: https://docs.rs/regex/latest/regex/