import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<img\\s[^>]*src=\"(?<imageURL>[^\"]*)\"\\s*\\/> # IMG tag\n"
+ ".*? # Anything in-between IMG and A\n"
+ "<a\\s[^>]*?href=\"\\/title\\/tt\n"
+ " (?<imdbid>\\d{7}) # Got the imdbid\n"
+ " \\/[^>]*>(?<title>.*?) # Got title\n"
+ " <\\/a> # End of A tag\n"
+ " \\s*\\(\n"
+ " (?<year>\\d{4}) # Year\n"
+ " \\)\\s*(?:\\( # Type is optional \n"
+ " (?<type>[^<]*) # Type\n"
+ " \\))? # End of optional group";
final String string = "<tr class=\"findResult odd\">\n"
+ " <td class=\"primary_photo\"><a href=\"/title/tt0499549/?ref_=fn_tt_tt_1\" ><img src=\"http://ia.media-imdb.com/images/M/MV5BMTYwOTEwNjAzMl5BMl5BanBnXkFtZTcwODc5MTUwMw@@._V1_SX32_CR0,0,32,44_AL_.jpg\" /></a></td>\n"
+ " <td class=\"result_text\"><a href=\"/title/tt0499549/?ref_=fn_tt_tt_1\" >Avatar</a> (2009) </td>\n"
+ " </tr>\n"
+ " <tr class=\"findResult even\">\n"
+ " <td class=\"primary_photo\"><a href=\"/title/tt0417299/?ref_=fn_tt_tt_2\" ><img src=\"http://ia.media-imdb.com/images/M/MV5BMTM3MTc3OTc0NF5BMl5BanBnXkFtZTcwOTQ0OTM1MQ@@._V1._CR34,0,295,440_SX32_CR0,0,32,44_AL_.jpg\" /></a></td>\n"
+ " <td class=\"result_text\"><a href=\"/title/tt0417299/?ref_=fn_tt_tt_2\" >Avatar: The Last Airbender</a> (2005) (TV Series) </td>\n"
+ " </tr>";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html