import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?s:(\"name\" : \"(.*?)\".*?作者[:]*</span>[:]*.*?>(.*?)<.*?出版社.*?> (.*?)<.*?>出版年.*?([\\d]{4}-[\\d]).*?定价:</span> (.*?)<.*?ISBN:</span> (.*?)<.*?v:average\"> ([\\d].[\\d]).*?v:votes\">([\\d]+?)<.*?\"rating_per\">(.*?)<.*?\"rating_per\">(.*?)<.*?\"rating_per\">(.*?)<.*?\"rating_per\">(.*?)<.*?\"rating_per\">(.*?)<))";
final String string = "\n"
+ "<script type=\"application/ld+json\">\n"
+ "{\n"
+ " \"@context\":\"http://schema.org\",\n"
+ " \"@type\":\"Book\",\n"
+ " \"workExample\": [],\n"
+ " \"name\" : \"索拉里斯星\",\n\n"
+ " <span class=\"pl\"> 作者</span>:\n"
+ " \n"
+ " \n"
+ " <a class=\"\" href=\"/search/%E6%96%AF%E5%9D%A6%E5%B0%BC%E6%96%AF%E7%93%A6%E5%A4%AB%C2%B7%E8%8E%B1%E5%A7%86\">[波] 斯坦尼斯瓦夫·莱姆</a>\n"
+ " </span><br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">出版社:</span> 译林出版社<br/>\n\n"
+ " \n"
+ " \n"
+ " \n\n"
+ " \n"
+ " \n"
+ " \n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">原作名:</span> Solaris<br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span>\n"
+ " <span class=\"pl\"> 译者</span>:\n"
+ " \n"
+ " \n"
+ " <a class=\"\" href=\"/search/%E9%9D%96%E6%8C%AF%E5%BF%A0\">靖振忠</a>\n"
+ " </span><br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">出版年:</span> 2021-8<br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">页数:</span> 254<br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">定价:</span> 49.00元<br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">装帧:</span> 平装<br/>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">丛书:</span> <a href=\"https://book.douban.com/series/54333\">译林幻系列</a><br>\n\n"
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ " \n"
+ " <span class=\"pl\">ISBN:</span> 9787544782173<br/>\n\n"
+ " \n"
+ " \n"
+ " \n\n\n"
+ "</div>\n\n"
+ "</div>\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
+ " \n\n\n\n\n\n"
+ "<div id=\"interest_sectl\" class=\"\">\n"
+ " <div class=\"rating_wrap clearbox\" rel=\"v:rating\">\n"
+ " <div class=\"rating_logo\">豆瓣评分</div>\n"
+ " <div class=\"rating_self clearfix\" typeof=\"v:Rating\">\n"
+ " <strong class=\"ll rating_num \" property=\"v:average\"> 8.7 </strong>\n"
+ " <span property=\"v:best\" content=\"10.0\"></span>\n"
+ " <div class=\"rating_right \">\n"
+ " <div class=\"ll bigstar45\"></div>\n"
+ " <div class=\"rating_sum\">\n"
+ " <span class=\"\">\n"
+ " <a href=\"comments\" class=\"rating_people\"><span property=\"v:votes\">8085</span>人评价</a>\n"
+ " </span>\n"
+ " </div>\n\n\n"
+ " </div>\n"
+ " </div>\n"
+ " \n"
+ " \n"
+ " \n"
+ "<span class=\"stars5 starstop\" title=\"力荐\">\n"
+ " 5星\n"
+ "</span>\n\n"
+ " \n"
+ "<div class=\"power\" style=\"width:64px\"></div>\n\n"
+ " <span class=\"rating_per\">46.0%</span>\n"
+ " <br>\n"
+ " \n"
+ " \n"
+ "<span class=\"stars4 starstop\" title=\"推荐\">\n"
+ " 4星\n"
+ "</span>\n\n"
+ " \n"
+ "<div class=\"power\" style=\"width:54px\"></div>\n\n"
+ " <span class=\"rating_per\">39.4%</span>\n"
+ " <br>\n"
+ " \n"
+ " \n"
+ "<span class=\"stars3 starstop\" title=\"还行\">\n"
+ " 3星\n"
+ "</span>\n\n"
+ " \n"
+ "<div class=\"power\" style=\"width:17px\"></div>\n\n"
+ " <span class=\"rating_per\">12.5%</span>\n"
+ " <br>\n"
+ " \n"
+ " \n"
+ "<span class=\"stars2 starstop\" title=\"较差\">\n"
+ " 2星\n"
+ "</span>\n\n"
+ " \n"
+ "<div class=\"power\" style=\"width:2px\"></div>\n\n"
+ " <span class=\"rating_per\">1.8%</span>\n"
+ " <br>\n"
+ " \n"
+ " \n"
+ "<span class=\"stars1 starstop\" title=\"很差\">\n"
+ " 1星\n"
+ "</span>\n\n"
+ " \n"
+ "<div class=\"power\" style=\"width:0px\"></div>\n\n"
+ " <span class=\"rating_per\">0.3%</span>\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html