import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?:\\<\\!\\-\\-(?:(?!\\-\\-\\>)\\r\\n?|\\n|.)*?-\\-\\>)|(?:<(\\S+)\\s+(?=.*>)|(?<=[=\\s])\\G)(?:((?:(?!\\s|=).)*)\\s*?=\\s*?[\\\"']?((?:(?<=\\\")(?:(?<=\\\\)\\\"|[^\\\"])*|(?<=')(?:(?<=\\\\)'|[^'])*)|(?:(?!\\\"|')(?:(?!\\/>|>|\\s).)+))[\\\"']?\\s*)";
final String string = "In this case, $url will indeed contain http://example.com/whatever.jpg. But what happens when you start getting HTML like this:\n\n"
+ "<img src='http://example.com/whatever.jpg'>\n"
+ "or\n\n"
+ "<img src=http://example.com/whatever.jpg>\n"
+ "or\n\n"
+ "<img border=0 src=\"http://example.com/whatever.jpg\">\n"
+ "or\n\n"
+ "<img\n"
+ " src=\"http://example.com/whatever.jpg\">\n"
+ "or you start getting false positives from\n\n"
+ "<!-- // commented out\n"
+ "<img src=\"http://example.com/outdated.png\">\n"
+ "-->\n\n\n\n"
+ "<asd ASD=asd>\n\n"
+ "<!-- // commented out <img src=\"http://example.com/outdated.png\"> -->\n\n"
+ "No quotes:\n"
+ "<iframe src=test.html target=xyz></iframe>\n"
+ "Self-closing tag:\n"
+ "<a href=test.html target=xyz/>\n"
+ "Self closing tag with a space before closure:\n"
+ "<a href=test.html target=xyz />\n"
+ "Double quotes:\n"
+ "<a href=\"test.html\" target=\"xyz\">\n"
+ "Single quotes:\n"
+ "<a href='test.html' target='xyz'>\n"
+ "Escaping double quotes:\n"
+ "<a href=\"test.html?val=1\" title=\"\\\"No rules exist\\\" Andre Breton's quote\">\n"
+ "Escaping single quotes (also with spaces between equals signs):\n"
+ "<a href = \"test.html?val=1\" title = 'Charlie\\'s Angels'>\n"
+ "Tag without opening (ignore attributes):\n"
+ "a href = \"test.html?val=1\" title='Charlie\\'s Angels'>\n"
+ "Tag without closure (ignore attributes):\n"
+ "<a href = \"test.html?val=1\" title='Charlie\\'s Angels'\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html