import re
regex = re.compile(r"(?!letsgo\.|shop\.|advertise\.|static\.|www\.)((\w|-)+\.)(tumblr\.com|tmblr\.co)\/?(?!\S)", flags=re.IGNORECASE | re.DOTALL)
test_str = ("Match the following:\n"
"https://dhabitahpunk-art.tumblr.com/\n"
"https://cdn.tumblr.com\n\n"
"DON'T match the following:\n"
"https://www.tumblr.com/todayontumblr\n"
"tumblr.com/todayontumblr\n"
"https://www.tumblr.com/todayontumblr/\n"
"tumblr.com/todayontumblr/\n"
"https://www.tumblr.com/dhabitahpunk-art\n"
"tumblr.com/dhabitahpunk-art\n"
"https://www.tumblr.com/dhabitahpunk-art/\n"
"tumblr.com/dhabitahpunk-art/\n"
"https://tmblr.co/MzB5rhaOrTp3uwB5NhcQsEw\n"
"https://www.tumblr.com/blog/view/grouper/\n"
"https://www.tumblr.com/internships\n"
"https://www.tumblr.com/transparency\n"
"https://www.tumblr.com/search/lana\n"
"https://www.tumblr.com/tagged/lwaxana%20troi?sort=top\n"
"https://www.tumblr.com/following\n"
"https://tumblr.com/customize/\n"
"https://letsgo.tumblr.com/\n"
"https://letsgo.tumblr.com/welcome-guide\n"
"https://64.media.tumblr.com/6c05087e5dbaccedf651d421148dfaa0/e7eb8d73a9a6cc70-8e/s540x810/e9cc402adacca3e983fc0448b94919b9f809c719.gifv\n"
"https://shop.tumblr.com/\n"
"https://shop.tumblr.com/new/\n"
"https://www.tumblr.com/tumblrmart/blue-checkmark\n"
"https://help.tumblr.com/hc/articles/115001572547\n"
"https://help.tumblr.com/hc\n"
"https://tumblr.com/help\n"
"https://api.tumblr.com/console\n"
"https://www.tumblr.com/oauth/apps\n"
"https://www.tumblr.com/api\n"
"https://assets.tumblr.com/downloads\n"
"https://www.tumblr.com/abuse\n"
"https://www.tumblr.com/themes/\n"
"https://www.tumblr.com/themes/tagged/two_column\n"
"https://www.tumblr.com/dmca\n"
"https://www.tumblr.com/docs/api_agreement\n"
"https://www.tumblr.com/account/delete\n"
"https://www.tumblr.com/settings\n"
"https://www.tumblr.com/settings/blog\n"
"https://www.tumblr.com/security\n"
"https://www.tumblr.com/developers\n"
"https://www.tumblr.com/press\n"
"https://www.tumblr.com/buttons\n"
"https://www.tumblr.com/logo\n"
"https://www.tumblr.com/tips\n"
"https://www.tumblr.com/support\n"
"https://www.tumblr.com/auth\n"
"https://www.tumblr.com/auth/google?redirectTo=undefined\n"
"https://www.tumblr.com/register\n"
"https://www.tumblr.com/register?source=new_to_tumblr\n"
"https://www.tumblr.com/login\n"
"https://www.tumblr.com/login?redirect_to=%2Fexplore%2Ftoday\n"
"https://www.tumblr.com/explore\n"
"https://www.tumblr.com/explore/trending\n"
"https://advertise.tumblr.com/\n"
"https://advertise.tumblr.com/#why\n"
"https://www.tumblr.com/jobs\n"
"https://www.tumblr.com/policy\n"
"https://www.tumblr.com/privacy_policy\n"
"https://www.tumblr.com/policy/privacy\n"
"https://www.tumblr.com/policy/terms-of-service\n"
"https://www.tumblr.com/apps\n"
"https://www.tumblr.com/about\n"
"https://about.tumblr.com/#quick-facts\n"
"https://www.tumblr.com/\n"
"https://www.tumblr.com/dhabitahpunk-art/tagged/Picturesque\n"
"tumblr.com/dhabitahpunk-art/tagged/Picturesque\n"
"tumblr.com\n"
"https://dhabitahpunk-art.tumblr.com/page/2\n"
"dhabitahpunk-art.tumblr.com/page\n"
"https://dhabitahpunk-art.tumblr.com/page/\n"
"https://dhabitahpunk-art.tumblr.com/commissions\n"
"https://dhabitahpunk-art.tumblr.com/post/714262490632044544/time-taken-355-hoursreference-unknown\n"
"https://static.tumblr.com/\n"
"https://static.tumblr.com/zyubucd/CIjopeea7/transparencyreport2016b.pdf")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html