import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<ref\\s+((?:group|follow|extends)\\s*=(?:(?!name\\s*=)[\\s\\S])*)?name\\s*=\\s*(?:\"\\s*([^\"](?:(?!\\s*\\/>|\\s*\"\\s*>|\\s+(?:group|follow|extends)).)*?)\\s*\"|'\\s*([^'\"](?:(?!\\s*\\/>|\\s*'>|\\s+(?:group|follow|extends)).)*?)\\s*'|([^\"](?:(?!\\s*\\/>|\\s*>|\\s+(?:group|follow|extends)).)*))(\\s+(?:group|follow|extends)\\s*=(?:(?!\\s*\\/>|\"\\s*>|'\\s*>)[\\s\\S])*)*\\s*(?:(\\/)|)>";
final String string = "<ref name=Test1 /><ref name=Test2 />\n"
+ "<ref name=Bad 1 /><ref name=Bad 2 />\n"
+ "<ref name=\"Test 3\"/>\n"
+ "<ref name=\"Test 4\" />\n"
+ "<ref name=Test5>Foo</ref>\n"
+ "<ref name=\"Test 6\">Foo</ref>\n"
+ "<ref \n"
+ " name=Test7>Foo\n"
+ "</ref>\n"
+ "<ref \n"
+ " name = Test8 >\n"
+ " Foo\n"
+ "</ref> \n"
+ "<ref name = Test9 > Foo </ref>\n"
+ "<ref \n"
+ " name=\"Test 10\">Foo\n"
+ "</ref>\n"
+ "<ref \n"
+ " name = \"Test 11\" >\n"
+ " Foo\n"
+ "</ref>\n"
+ "<ref name = Test12 > Foo </ref>\n"
+ "<ref name = Test13 />\n"
+ "<ref name = \"Test 14\" />\n"
+ "<ref \n"
+ " name = Test15 />\n"
+ "<ref\n"
+ " name =\n"
+ " \"Test 16\"\n"
+ "/>\n"
+ "<ref\n"
+ " name\n"
+ " =\n"
+ " Test17\n"
+ " />\n"
+ "<ref\n"
+ " name\n"
+ " =\n"
+ " \"Test 18\"\n"
+ " />\n"
+ "<ref name=\"Test/19\" />\n"
+ "==A heading==\n"
+ "<ref name = \"Test/20\" />\n"
+ "<ref name = \"Test 21\"/>\n"
+ "<ref name = \"Test / 22\"/>\n"
+ "<ref name = \"Test > 23\"/>\n"
+ "<ref name=\"Te>st/ 23.1\"/>\n"
+ "<ref\n"
+ " name\n"
+ " =\"\n"
+ " Test 24\n"
+ " \"/>\n"
+ "<ref name='Test/ 25' />\n"
+ "<ref name='Test/ 26'> Foo </ref>\n"
+ "<ref name='Te>st/ 27' />\n"
+ "<ref name = \" Test 28 \" >Foo</ref>\n"
+ "<ref name='Test \"29\"' /> - This one is expected to produce invalid XML \"'Test \"29\"'\"; surprisingly, MW can parse it anyway for a case this simple, but not if there's a space within the interior-most quotes: \"'Test \"2 9\"'\"\n"
+ "<ref name = \"Test 30\" >\n"
+ "<ref name = \"Test 31\">\n"
+ "<ref name = \"Test 32\" />\n"
+ "<ref name = \"Test 33\"/>\n"
+ "<ref name = Test34 >\n"
+ "<ref name = Test35>\n"
+ "<ref name = Test36 />\n"
+ "<ref name = Test37/>\n"
+ "<ref name = \" Test 38 \"/>\n"
+ "<ref name = \" Test 39 \" />\n"
+ "<ref name='Test 40'/>\n"
+ "<ref name=AAA/>\n"
+ "<ref name=\"BBB AAA BBB\" />\n"
+ "<ref name=\"This \"isjustnot\" valid\"/> - This one is expected to produce invalid XML \"This \"isjustnot\" valid\"; surprisingly, MW can parse it anyway for a case this simple, but not if there's a space within the interior-most quotes: \"This \"is just not\" valid\".\n"
+ "<ref name=\"This 'actually is' valid\"/>\n"
+ "<ref name='But this \"is also\" a problem' /> (or will be after our \" substitution)\n"
+ "<ref name=apostrophe's />\n"
+ "<ref name=\"more apostrophe's\" />\n"
+ "<ref name='this's bad, but we can fix it!' />\n"
+ "<ref name='this is okay'/>\n"
+ "<ref name=\"'this is dumb'\"/>\n"
+ "<ref name=\"this is 'less' dumb\" />\n"
+ "<ref name=\"what about 'this case'\"/>\n"
+ "<ref name=\"'or this' one\" />\n\n"
+ "<ref name=\"foo\" group=\"bar\"/>\n"
+ "<ref name=\"foo\" group=\"bar\" />\n"
+ "<ref group=\"bar\" name=\"foo\" />\n"
+ "<ref name='foo' group='bar'/>\n"
+ "<ref name='foo' group='bar' />\n"
+ "<ref group='bar' name='foo' />\n"
+ "<ref name=foo group=bar/>\n"
+ "<ref name=foo group=bar />\n"
+ "<ref group=bar name=foo/>\n"
+ "<ref name=\"foo\" follow=\"bar\" />\n"
+ "<ref name='foo' follow='bar'/>\n"
+ "<ref name=foo follow=bar/>\n"
+ "<ref name=\"foo\" extends=\"bar\" />\n"
+ "<ref name='foo' extends='bar'/>\n"
+ "<ref name=foo extends=bar/>\n"
+ "<ref name='foo' group=\"bar\" extends=baz follow=\"quux quux\" />\n"
+ "<ref group=\"bar\" name='foo' extends=baz follow=\"quux quux\" />\n"
+ "<ref group=\"bar\" extends=baz name='foo' follow=\"quux quux\" />\n"
+ "<ref group=\"bar\" extends=baz follow=\"quux quux\" name='foo' />\n"
+ "<ref group=\"bar\" extends=baz name=foo follow=\"quux quux\" />\n"
+ "<ref group=\"bar\" extends=baz name=\"foo's foo\" follow=\"quux quux\" />\n"
+ "<ref group=\"bar's > / bar\" extends=baz name='foos > / foo' follow=\"quux quux\" />\n"
+ "<ref group=\n"
+ " \"bar's > / bar\"\n"
+ " extends=\n"
+ " baz\n"
+ " name=\n"
+ " '\n"
+ " foos > / foo\n"
+ " '\n"
+ " follow=\n"
+ " \"quux > quux\"\n"
+ " />\n"
+ "<ref name=\"foos > / foo\" group=\"bar's > / bar\" extends=baz follow=\"quux quux\"/>\n"
+ "<ref \n"
+ " name = \n"
+ " '\n"
+ " foos > / foo\n"
+ " '\n"
+ " group = \n"
+ " \"bar's > / bar\"\n"
+ " extends\n"
+ " =baz\n"
+ " follow\n"
+ " =\"\n"
+ " quux quux\n"
+ " \"\n"
+ " />\n"
+ "<ref name=\"foos > / foo\" group=\"bar's > / bar\" extends=baz follow='quux quux'/>\n";
final String subst = "<ref $1name=\"$2$3$4\"$5${6:+ /:}>";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
// The substituted value will be contained in the result variable
final String result = matcher.replaceAll(subst);
System.out.println("Substitution result: " + result);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html