import re
regex = re.compile(r"<a.*href.?=.?[\"'](?:\b\w+://)?([\w-]+(?:\.[\w-]+)+)[/:?]?.*?[\"'].*?>?(?:</)?.?>")
test_str = ("<a href=\"http://stepic.org/courses\">\n"
"<a href='https://stepic.org'>\n"
"<a href='http://neerc.ifmo.ru:1345'>\n"
"<a href=\"ftp://mail.ru/distib\" >\n"
"<a href=\"ya.ru\">\n"
"<a href=\"www.ya.ru\">\n"
"<a href=\"../skip_relative_links\">\n"
"<a target=\"blank\" href='http://sas-_0123d.ifmo.ru:1345'>\n"
"<a href='http://neerc.ifmo.ru:1345'>\n"
"<a href=\"../some_path/index.html\">\n"
"<a href=\"https://www.ya.ru\">\n"
"<a href=\"ftp://mail.ru/distib\" >\n"
"<a href=\"bya.ru\">\n"
"<a href=\"http://www.ya.ru\">\n"
"<a href=\"www.kya.ru\">\n"
"<a href=\"../skip_relative_links\">\n"
"<a href=\"http://stepic.org/courses\">\n"
"<a class = \"hello\" href= \"http://ftepic.org/courses\" id=\"dfdf\">\n"
"<p class = \"hello\" href= \"http://dtepic.org/courses\">\n"
"<a class = \"hello\" href = \"http://a.b.vc.ttepic.org/courses\">\n"
"<a href='https://stepic.org'>\n"
"<a href='http://neerc.ifmo.ru:1345' >\n"
"<a href = \"ftp://mail.ru/distib\" >\n"
"<a href= \"ya.ru\">\n"
"<a href =\"www.ya.ru\">\n"
"<a href=\"../skip_relative_links\">\n"
"<link rel=\"image_src\" href=\"https://examaple.org/files/6a2/72d/e09/6a272de0944f447fb5972c44cc02f795.png\" />\n"
"<a href=\"http://www.gtu.edu.ge/index_e.htm\" target=\"_top\">Georgian Technical University</a>\n"
"<a href=\"http://stepic-2.org/courses\">\n"
"<a href=\"ftp://www.mya-2.ru\">\n"
"<a href='https://stepic-2.org'>\n"
"<a link href='http://neerc.ifmo-2.ru:1345'>\n"
"<a title=test download=\"http://test.com\"; href=\"test.com\" class=\"my test\" style=>\n"
"<a title=test class=\"my test\" href= \"test1.com:8080/test/path?get=http://test2.ru/?true\"; rel=\"nofollow\" style=>\n"
"<a title=test meta=\"whatever http://test1.com\"; href = \"test.com?get=http://test2.ru/?true\"; class=\"my test\" style= >\n"
"<a target=\"blank\" href='http://sasd.ifmo-2.ru:1345'>\n"
"<a href='http://neerc.ifmo-2.ru:1345'>\n"
"<a href=\"../some_path/index-2.html\">\n"
"<a href=\"https://www.ya-2.ru\">\n"
"<a href=\"ftp://mail-2.ru/distib\" >\n"
"<a href=\"bya-2.ru\">\n"
"<a href=\"http://www.ya-2.ru\">\n"
"<a href=\"www.kya-2.ru\" >\n"
"<a href=\"../skip_relative_links-2\">\n"
"<a href=\"http://stepic-2.org/courses\">\n"
"<a class = \"hello-2\" href= \"http://ftepic-2.org/courses\" id=\"dfdf\">\n"
"<p class = \"hello-2\" href= \"http://dtepic-2.org/courses\">\n"
"<a class = \"hello-2\" href = \"http://a.b.vc.ttepic-2.org/courses\">\n"
"<a href='https://stepic-2.org'>\n"
"<a href='http://neerc.ifmo-2.ru:1345' >\n"
"<a href = \"ftp://mail-2.ru/distib\" >\n"
"<a href= \"ya-2.ru\">\n"
"<a href =\"www.ya-2.ru\">\n"
"<a href=\"../skip_relative_links\">\n"
"<link rel=\"image_src\" href=\"https://examaple.org/files/6a2/72d/e09/6a272de0944f447fb5972c44cc02f795.png\" />\n"
"<a href=\"http://www.gtu.edu-2.ge/index_e.htm\" target=\"_top\">Georgian Technical University</a>\n"
"<a class-8 = \"hello-2\" href= \"http://zzz.last.test-1.stepic.org/courses\" id=\"dfdf\">\n"
"<a class-4-4 = \"hello-2-raz\" href = \"http://zzz.last.test-2.stepic.org/courses\" >\n"
"<a href =\"https://stepic.org/media/attachments/lesson/24471/02\">")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html