import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<a.*href.?=.?[\"'](?:\\b\\w+://)?([\\w-]+(?:\\.[\\w-]+)+)[/:?]?.*?[\"'].*?>?(?:</)?.?>";
final String string = "<a href=\"http://stepic.org/courses\">\n"
+ "<a href='https://stepic.org'>\n"
+ "<a href='http://neerc.ifmo.ru:1345'>\n"
+ "<a href=\"ftp://mail.ru/distib\" >\n"
+ "<a href=\"ya.ru\">\n"
+ "<a href=\"www.ya.ru\">\n"
+ "<a href=\"../skip_relative_links\">\n"
+ "<a target=\"blank\" href='http://sas-_0123d.ifmo.ru:1345'>\n"
+ "<a href='http://neerc.ifmo.ru:1345'>\n"
+ "<a href=\"../some_path/index.html\">\n"
+ "<a href=\"https://www.ya.ru\">\n"
+ "<a href=\"ftp://mail.ru/distib\" >\n"
+ "<a href=\"bya.ru\">\n"
+ "<a href=\"http://www.ya.ru\">\n"
+ "<a href=\"www.kya.ru\">\n"
+ "<a href=\"../skip_relative_links\">\n"
+ "<a href=\"http://stepic.org/courses\">\n"
+ "<a class = \"hello\" href= \"http://ftepic.org/courses\" id=\"dfdf\">\n"
+ "<p class = \"hello\" href= \"http://dtepic.org/courses\">\n"
+ "<a class = \"hello\" href = \"http://a.b.vc.ttepic.org/courses\">\n"
+ "<a href='https://stepic.org'>\n"
+ "<a href='http://neerc.ifmo.ru:1345' >\n"
+ "<a href = \"ftp://mail.ru/distib\" >\n"
+ "<a href= \"ya.ru\">\n"
+ "<a href =\"www.ya.ru\">\n"
+ "<a href=\"../skip_relative_links\">\n"
+ "<link rel=\"image_src\" href=\"https://examaple.org/files/6a2/72d/e09/6a272de0944f447fb5972c44cc02f795.png\" />\n"
+ "<a href=\"http://www.gtu.edu.ge/index_e.htm\" target=\"_top\">Georgian Technical University</a>\n"
+ "<a href=\"http://stepic-2.org/courses\">\n"
+ "<a href=\"ftp://www.mya-2.ru\">\n"
+ "<a href='https://stepic-2.org'>\n"
+ "<a link href='http://neerc.ifmo-2.ru:1345'>\n"
+ "<a title=test download=\"http://test.com\"; href=\"test.com\" class=\"my test\" style=>\n"
+ "<a title=test class=\"my test\" href= \"test1.com:8080/test/path?get=http://test2.ru/?true\"; rel=\"nofollow\" style=>\n"
+ "<a title=test meta=\"whatever http://test1.com\"; href = \"test.com?get=http://test2.ru/?true\"; class=\"my test\" style= >\n"
+ "<a target=\"blank\" href='http://sasd.ifmo-2.ru:1345'>\n"
+ "<a href='http://neerc.ifmo-2.ru:1345'>\n"
+ "<a href=\"../some_path/index-2.html\">\n"
+ "<a href=\"https://www.ya-2.ru\">\n"
+ "<a href=\"ftp://mail-2.ru/distib\" >\n"
+ "<a href=\"bya-2.ru\">\n"
+ "<a href=\"http://www.ya-2.ru\">\n"
+ "<a href=\"www.kya-2.ru\" >\n"
+ "<a href=\"../skip_relative_links-2\">\n"
+ "<a href=\"http://stepic-2.org/courses\">\n"
+ "<a class = \"hello-2\" href= \"http://ftepic-2.org/courses\" id=\"dfdf\">\n"
+ "<p class = \"hello-2\" href= \"http://dtepic-2.org/courses\">\n"
+ "<a class = \"hello-2\" href = \"http://a.b.vc.ttepic-2.org/courses\">\n"
+ "<a href='https://stepic-2.org'>\n"
+ "<a href='http://neerc.ifmo-2.ru:1345' >\n"
+ "<a href = \"ftp://mail-2.ru/distib\" >\n"
+ "<a href= \"ya-2.ru\">\n"
+ "<a href =\"www.ya-2.ru\">\n"
+ "<a href=\"../skip_relative_links\">\n"
+ "<link rel=\"image_src\" href=\"https://examaple.org/files/6a2/72d/e09/6a272de0944f447fb5972c44cc02f795.png\" />\n"
+ "<a href=\"http://www.gtu.edu-2.ge/index_e.htm\" target=\"_top\">Georgian Technical University</a>\n"
+ "<a class-8 = \"hello-2\" href= \"http://zzz.last.test-1.stepic.org/courses\" id=\"dfdf\">\n"
+ "<a class-4-4 = \"hello-2-raz\" href = \"http://zzz.last.test-2.stepic.org/courses\" >\n"
+ "<a href =\"https://stepic.org/media/attachments/lesson/24471/02\">";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html