// include the latest version of the regex crate in your Cargo.toml
extern crate regex;
use regex::Regex;
fn main() {
let regex = Regex::new(r#"(?m)(?i)<a\s+href="(?:https?:\/\/)?(?:w{3}\.)?(?:[^"\/]*\.)?([a-z0-9_-]+\.[a-z0-9_-]{2,6})(\/[^"]*)?"[^>]*>(?!.*\1.*)(?:https?:\/\/)?(?:w{3}\.)?(?:[^"\/]*\.)?([a-z0-9_-]+\.[a-z0-9_-]{2,6})(\/[^"]*)?.*?<\/a>"#).unwrap();
let string = "<a href=\"http://www.test1.net/dir1/index.html\" target=\"_blank\">test1.net/admin</a> <-- NOT MATCH
<a href=\"https://test2.com\">THIS SITE</a> <-- NOT MATCH
<a href=\"https://subdomain.test3.org\">test2.org</a> <-- MATCH
<a href=\"http://www2.test4.com\" target=\"_blank\">https://global.test4.com/index.html</a> <-- NOT MATCH
<a href=\"http://eu.test5.com\">https://evil.com/eu.test5.com/</a> <-- MATCH
<a href=\"http://eu.site6.com/index.html\" target=\"_blank\">https://eu.evil.com</a> <-- MATCH
<a href=\"https://site7.com/\">http://www.site7.com/123/test</a> <-- NOT MATCH";
// result will be an iterator over tuples containing the start and end indices for each match in the string
let result = regex.captures_iter(string);
for mat in result {
println!("{:?}", mat);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Rust, please visit: https://docs.rs/regex/latest/regex/