using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
string pattern = @"(?i)<a\s+href=""(?:https?:\/\/)?(?:w{3}\.)?(?:[^""\/]*\.)?([a-z0-9_-]+\.[a-z0-9_-]{2,6})(\/[^""]*)?""[^>]*>(?!.*\1.*)(?:https?:\/\/)?(?:w{3}\.)?(?:[^""\/]*\.)?([a-z0-9_-]+\.[a-z0-9_-]{2,6})(\/[^""]*)?.*?<\/a>";
string input = @"<a href=""http://www.test1.net/dir1/index.html"" target=""_blank"">test1.net/admin</a> <-- NOT MATCH
<a href=""https://test2.com"">THIS SITE</a> <-- NOT MATCH
<a href=""https://subdomain.test3.org"">test2.org</a> <-- MATCH
<a href=""http://www2.test4.com"" target=""_blank"">https://global.test4.com/index.html</a> <-- NOT MATCH
<a href=""http://eu.test5.com"">https://evil.com/eu.test5.com/</a> <-- MATCH
<a href=""http://eu.site6.com/index.html"" target=""_blank"">https://eu.evil.com</a> <-- MATCH
<a href=""https://site7.com/"">http://www.site7.com/123/test</a> <-- NOT MATCH";
RegexOptions options = RegexOptions.Multiline;
foreach (Match m in Regex.Matches(input, pattern, options))
{
Console.WriteLine("'{0}' found at index {1}.", m.Value, m.Index);
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for C#, please visit: https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex(v=vs.110).aspx