import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?i)<a\\s+href=\"(?:https?:\\/\\/)?(?:w{3}\\.)?(?:[^\"\\/]*\\.)?([a-z0-9_-]+\\.[a-z0-9_-]{2,6})(\\/[^\"]*)?\"[^>]*>(?!.*\\1.*)(?:https?:\\/\\/)?(?:w{3}\\.)?(?:[^\"\\/]*\\.)?([a-z0-9_-]+\\.[a-z0-9_-]{2,6})(\\/[^\"]*)?.*?<\\/a>";
final String string = "<a href=\"http://www.test1.net/dir1/index.html\" target=\"_blank\">test1.net/admin</a> <-- NOT MATCH\n"
+ "<a href=\"https://test2.com\">THIS SITE</a> <-- NOT MATCH\n"
+ "<a href=\"https://subdomain.test3.org\">test2.org</a> <-- MATCH\n"
+ "<a href=\"http://www2.test4.com\" target=\"_blank\">https://global.test4.com/index.html</a> <-- NOT MATCH\n"
+ "<a href=\"http://eu.test5.com\">https://evil.com/eu.test5.com/</a> <-- MATCH\n"
+ "<a href=\"http://eu.site6.com/index.html\" target=\"_blank\">https://eu.evil.com</a> <-- MATCH\n"
+ "<a href=\"https://site7.com/\">http://www.site7.com/123/test</a> <-- NOT MATCH";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html