using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
string pattern = @"(<.+?)(?<=\s)on[a-z]+\s*=\s*(?:(['""])(?!\2).+?\2|(?:\S+?\(.*?\)(?=[\s>])))(.*?>)";
string input = @"<meta name=""keywords"" content=""keyword1, keyword2, keyword3"">
<a href=""something"" onclick= ""bad()"">text</a> onclick not in tags
<a href=""something"" onclick =bad()>text</a>
<a href=""something"" onclick=bad('test')>text</a>
<a href=""something"" onclick=bad(""test"")>text</a>
<a href=""something"" onclick=""bad()"" >text</a>
<a href=""http://mydomain.com/index.php?oninaval=12"" class=""titi"">text</a>
What if I write john+onelia=love forever?
<a href=""something"" onclick=""bad()"">text</a> onclick not in tags
<a href=""something"" onclick=bad()>text</a>
<a href=""something"" onclick=""bad()"" >text</a>
<a href=""something"" onclick=a++ >text</a>
onclick=""asd <span class=""myclass""> not in tag too.</span>
<!-- onclick="" --><a href=""something"" onclick= ""bad()"">text</a>
<textarea><enter onclick=""dothat()"" text here></textarea>
yoko ono=""john lennon""
<img src=""/images/img1.jpg"" alt=""onclick=thegood() onclick=thebad() ""/>
<img alt=""onclick="" src=/images/theugly.jpg> the most important part of the message <p class=""disappears""></p>
<a href="" onmouseover=a=7>button1</a>
<a href=""something"" onclick=a++>text</a>
<a href=""something"" onclick=a<<1>text</a>
<a href="" onmouseover=""alert(a);"">button2</a>";
RegexOptions options = RegexOptions.IgnoreCase;
foreach (Match m in Regex.Matches(input, pattern, options))
{
Console.WriteLine("'{0}' found at index {1}.", m.Value, m.Index);
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for C#, please visit: https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex(v=vs.110).aspx