# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(https?:\/\/)?(www.)?([\da-zA-z\_\-]+)\.(com|(|[\da-zA-Z]{2,6}))([\/\w \.\-\#\&\?\%\_]*)?([^\/| |\s])$"
test_str = ("http://regex101.com/\n"
"http://regex101.com\n"
"http://www.regex101.com/\n"
"http://www.regex101.com\n\n"
"https://regex101.com/\n"
"https://regex101.com\n"
"https://www.regex101.com/\n"
"https://www.regex101.com\n\n"
"http://regex101.com/blahblahblahblahblah/\n"
"http://regex101.com/blahblahblahblahblah\n"
"http://www.regex101.com/blahblahblahblahblah/\n"
"http://www.regex101.com/blahblahblahblahblah\n\n"
"https://regex101.com/blahblahblahblahblah/\n"
"https://regex101.com/blahblahblahblahblah\n"
"https://www.regex101.com/blahblahblahblahblah/\n"
"https://www.regex101.com/blahblahblahblahblah\n\n"
"http://regex101.com/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://regex101.com/blahblahblahblahblah/blahblahblahblahblah\n"
"http://www.regex101.com/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://www.regex101.com/blahblahblahblahblah/blahblahblahblahblah\n\n"
"https://regex101.com/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://regex101.com/blahblahblahblahblah/blahblahblahblahblah\n"
"https://www.regex101.com/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://www.regex101.com/blahblahblahblahblah/blahblahblahblahblah\n\n\n\n"
"http://regex101.co.uk/\n"
"http://regex101.co.uk\n"
"http://www.regex101.co.uk/\n"
"http://www.regex101.co.uk\n\n"
"https://regex101.co.uk/\n"
"https://regex101.co.uk\n"
"https://www.regex101.co.uk/\n"
"https://www.regex101.co.uk\n\n"
"http://regex101.co.uk/blahblahblahblahblah/\n"
"http://regex101.co.uk/blahblahblahblahblah\n"
"http://www.regex101.co.uk/blahblahblahblahblah/\n"
"http://www.regex101.co.uk/blahblahblahblahblah\n\n"
"https://regex101.co.uk/blahblahblahblahblah/\n"
"https://regex101.co.uk/blahblahblahblahblah\n"
"http://www.regex101.co.uk/blahblahblahblahblah/\n"
"http://www.regex101.co.uk/blahblahblahblahblah\n\n"
"http://regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah\n"
"http://www.regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://www.regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah\n\n"
"https://regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah\n"
"https://www.regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://www.regex101.co.uk/blahblahblahblahblah/blahblahblahblahblah\n\n"
"http://regex101.de/\n"
"http://regex101.de\n"
"http://www.regex101.de/\n"
"http://www.regex101.de\n\n"
"https://regex101.de/\n"
"https://regex101.de\n"
"https://www.regex101.de/\n"
"https://www.regex101.de\n\n"
"http://regex101.de/blahblahblahblahblah/\n"
"http://regex101.de/blahblahblahblahblah\n"
"http://www.regex101.de/blahblahblahblahblah/\n"
"http://www.regex101.de/blahblahblahblahblah\n\n"
"https://regex101.de/blahblahblahblahblah/\n"
"https://regex101.de/blahblahblahblahblah\n"
"http://www.regex101.de/blahblahblahblahblah/\n"
"http://www.regex101.de/blahblahblahblahblah\n\n"
"http://regex101.de/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://regex101.de/blahblahblahblahblah/blahblahblahblahblah\n"
"http://www.regex101.de/blahblahblahblahblah/blahblahblahblahblah/\n"
"http://www.regex101.de/blahblahblahblahblah/blahblahblahblahblah\n\n"
"https://regex101.de/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://regex101.de/blahblahblahblahblah/blahblahblahblahblah\n"
"https://www.regex101.de/blahblahblahblahblah/blahblahblahblahblah/\n"
"https://www.regex101.de/blahblahblahblahblah/blahblahblahblahblah\n")
matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html