using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
string pattern = @" (?s)
<page>
(
.*?
(?:
(?: <page> | </page> )
(*SKIP) (*FAIL)
| {{Infobox
)
.*?
)
(?: <page> (*SKIP) (*FAIL) | </page> )
";
string substitution = @"$1";
string input = @"List of keywords :
keyword1 = <page>
keyword2 = </page>
keyword3 = {{Infobox
Example A:
<page>
text to consider without {{ not-Infobox
</page>
Result => do not extract (or keep or split) this part.
Example B:
<page>
text to consider with {{Infobox here}}
</page>
Result => extract (or keep or split) this part.";
RegexOptions options = RegexOptions.IgnorePatternWhitespace;
Regex regex = new Regex(pattern, options);
string result = regex.Replace(input, substitution);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for C#, please visit: https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex(v=vs.110).aspx