# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = (r"(?:^|\s)(([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]+|::(ffff(:0{1,4})?:)?((25[0-5]|2[0-4]\d|1\d{0,2}|[1-9]?\d)\.){3}(25[0-5]|2[0-4]\d|1\d{0,2}|[1-9]?\d)|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|2[0-4]\d|1\d{0,2}|[1-9]?\d)\.){3}(25[0-5]|2[0-4]\d|1\d{0,2}|[1-9]?\d))(?=\s|$)\n")
test_str = ("{1: Initial address, regex should say valid/match}\n"
"2001:0db8:0000:0000:0000:ff00:0042:8329\n\n"
"{2: After removing all leading zeroes, regex should say valid/match}\n"
"2001:db8:0:0:0:ff00:42:8329\n\n"
"{3: After omitting consecutive sections of zeroes, regex should say valid/match}\n"
"2001:db8::ff00:42:8329\n\n"
"{4: The loopback address, regex should say valid/match}\n"
"0000:0000:0000:0000:0000:0000:0000:0001\n\n"
"{5: The loopback address be abbreviated to ::1 by using both rules, regex should say valid/match}\n"
"::1\n\n"
"{6: This should be valid, regex should say valid/match}\n"
"ABCD:ABCD:ABCD:ABCD:ABCD:ABCD:192.168.158.190\n\n"
"{7: This should NOT be valid/match}\n"
"::\n\n"
"{These two formats allows IPv6 applications to communicate directly with IPv4 applications, regex should say valid/match}\n"
"{8}\n"
"0:0:0:0:0:ffff:192.1.56.10\n"
"{9}\n"
"::ffff:192.1.56.10/96\n\n"
"{These next two formats are used for tunneling. It allows IPv6 nodes to communicate across an IPv4 infrastructure, regex should say valid/match}\n"
"{10}\n"
"0:0:0:0:0:0:192.1.56.10\n"
"{11}\n"
"::192.1.56.10/96\n\n"
"{These 4 should be valid/match}\n"
"{12}\n"
"::FFFF:129.144.52.38\n"
"{13}\n"
"::129.144.52.38\n"
"{14}\n"
"::FFFF:d\n"
"{15}\n"
"1080:0:0:0:8:800:200C:417A\n\n"
"{These 4 should NOT be valid/match}\n"
"{16}\n"
"::FFFF:d.d.d\n"
"{17}\n"
"::FFFF:d.d\n"
"{18}\n"
"::d.d.d\n"
"{19}\n"
"::d.d")
matches = re.finditer(regex, test_str, re.IGNORECASE | re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html