# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^CVE-(1999|2\d{3})-(0\d{2}[1-9]|[1-9]\d{3,})$"
test_str = ("#This one is not valid but wasn't covered by the test cases provided by MITRE\n"
"CVE-2001-0000\n\n"
"# This file contains test data for implementations to verify that\n"
"# CVE IDs are properly parsed and handled to conform with the\n"
"# 2014 CVE ID Syntax change.\n"
"#\n"
"# About this test data: README-tests.txt\n"
"# More info: http://cve.mitre.org/cve/identifiers/syntaxchange.html\n"
"#\n"
"# ****** VALID SYNTAX ******\n"
"#\n"
"# Publicly-referenced IDs for the new syntax (formerly \"Option B\")\n"
"#\n"
"CVE-2014-0001\n"
"CVE-2014-0999\n"
"CVE-2014-1234\n"
"CVE-2014-3127\n"
"CVE-2014-9999\n"
"CVE-2014-10000\n"
"CVE-2014-54321\n"
"CVE-2014-99999\n"
"CVE-2014-100000\n"
"CVE-2014-123456\n"
"CVE-2014-456132\n"
"CVE-2014-999999\n"
"CVE-2014-1000000\n"
"CVE-2014-1234567\n"
"CVE-2014-7654321\n"
"CVE-2014-9999999\n"
"#\n"
"# Invalid ID. This is the only invalid ID in this file, and it's\n"
"# intended to help spot incorrect tests that mistakenly accept all\n"
"# inputs. See README.\n"
"#\n"
"CVE-ABCD-EFGH\n"
"#\n"
"# These are valid but could cause problems if IDs are stored in bytes\n"
"# due to numeric overflows (stranger things have happened).\n"
"#\n"
"CVE-2014-16385\n"
"CVE-2014-32769\n"
"CVE-2014-65537\n"
"CVE-2014-131073\n"
"#\n"
"# unusually large number of trailing zeros\n"
"#\n"
"CVE-2014-100000000\n"
"#\n"
"# storing CVE number portion as 32-bit signed integer (seen in at\n"
"# least one real-world implementation)\n"
"#\n"
"CVE-2014-2147483647\n"
"CVE-2014-2147483648\n"
"#\n"
"# storing CVE number portion as 32-bit unsigned integer (possibly seen\n"
"# in at least one real-world implementation)\n"
"#\n"
"CVE-2014-4294967295\n"
"CVE-2014-4294967296\n"
"#\n"
"# storing CVE ID string in a fixed-length 32-byte buffer, with or\n"
"# without a required trailing '\\0' character\n"
"#\n"
"CVE-2014-1111111111111111111111\n"
"CVE-2014-11111111111111111111111\n"
"CVE-2014-111111111111111111111111\n\n"
"####################################################################\n\n"
"# This file contains test data for implementations to verify that\n"
"# CVE IDs are properly parsed and handled to conform with the\n"
"# 2014 CVE ID Syntax change.\n"
"#\n"
"# About this test data: README-tests.txt\n"
"# More info: http://cve.mitre.org/cve/identifiers/syntaxchange.html\n"
"#\n"
"#\n"
"# ****** SYNTAX VIOLATIONS ******\n"
"#\n"
"# Option A syntax from early 2013 - option not chosen. These might look\n"
"# good at first glance, but have leading 0's with more than 4 digits.\n"
"#\n"
"CVE-2014-000001\n"
"CVE-2014-009999\n"
"CVE-2014-000001\n"
"CVE-2014-000999\n"
"CVE-2014-001234\n"
"CVE-2014-009999\n"
"CVE-2014-010000\n"
"CVE-2014-054321\n"
"CVE-2014-099999\n"
"#\n"
"# Option A' syntax - modified Option A for second vote - option not chosen.\n"
"# Similar to original Option A, there are leading 0's with more than 4 digits.\n"
"#\n"
"CVE-2014-00000001\n"
"CVE-2014-00000999\n"
"CVE-2014-00001234\n"
"CVE-2014-00009999\n"
"CVE-2014-00010000\n"
"CVE-2014-00123456\n"
"CVE-2014-01234567\n"
"#\n"
"# Option C syntax from early 2013 - option not chosen\n"
"#\n"
"CVE-2014-1-8\n"
"CVE-2014-999-3\n"
"CVE-2014-1234-3\n"
"CVE-2014-9999-3\n"
"CVE-2014-10000-8\n"
"CVE-2014-54321-5\n"
"CVE-2014-123456-5\n"
"CVE-2014-999999-5\n"
"CVE-2014-1234567-4\n"
"#\n"
"# Intentionally valid ID. This is the only valid ID in this file, and\n"
"# it's intended to help spot incorrect tests that mistakenly reject\n"
"# all inputs. See README.\n"
"#\n"
"CVE-2014-1234\n"
"#\n"
"# Miscellaneous examples used during discussion of syntax\n"
"#\n"
"CVE-YYYY-NNNN\n"
"CVE-YYYY-NNNNN\n"
"CVE-YYYY-NNNNNN\n"
"#\n"
"# Loose extraction assuming only CVE prefix and two alphanumerics\n"
"# separated by hyphens\n"
"#\n"
"CVE-SRC-OHA\n"
"CVE-2AAA-3BBB\n"
"#\n"
"# Missing sequence number / invalid year\n"
"#\n"
"CVE-114\n"
"CVE-73\n"
"#\n"
"# Malformed sequence number\n"
"#\n"
"CVE-2014-789\n"
"CVE-2014-\n"
"CVE-2014-9\n"
"CVE-2014-98\n"
"#\n"
"# leading 0's - prohibited except for 999 and less (i.e., \"0001\"\n"
"# through \"0999\"\n"
"#\n"
"CVE-2015-010000\n"
"CVE-2015-09999\n"
"CVE-2014-00001\n"
"#\n"
"# CR/LF in middle of ID\n"
"#\n"
"CVE-2014-\n"
"1234\n"
"CVE-2014\n"
"-1234\n"
"CVE-201\n"
"4-1235\n"
"#\n"
"# no year provided\n"
"#\n"
"CVE-3153\n"
"#\n"
"# position-oriented (assume columns 5 through 8 are year). The first one\n"
"# is a real-world conversion error by CVE code (oops).\n"
"#\n"
"CVE- 14-1236\n"
"CVE-AAAA-1237\n"
"#\n"
"# missing/invalid \"CVE-\" prefix\n"
"#\n"
"C-2014-1238\n"
"2014-1240\n"
"CVE:2014-1241\n"
"CVE 2014 1242\n"
"#\n"
"# invalid year\n"
"#\n"
"CVE-201-0771\n"
"CVE-14-1239\n"
"CVE-20132-0169\n"
"#\n"
"# Odd stuff straight from CVE web logs (thanks, random anonymous\n"
"# people!). Includes some real-world typos or, in some cases,\n"
"# security-related IDs that utilize portions of the CVE ID.\n"
"#\n"
"2013\n"
"0497\n"
"2010-270\n"
"2013-199\n"
"2013-6XXX\n"
"CVE2014-0591\n"
"CVE:13-7108\n"
"CVE-XXXX-XXXX\n"
"CVE-TODO\n"
"1421010/13\n"
"CVE20076753\n"
"CVE:2013-4547\n"
"(CVE-2013-136\n"
"CVE - 2006 - 0788\n"
"CVE-2008-600\n"
"199-0618\n"
"CVE-199-0618\n"
"CA-2003-16\n"
"# URL-encoded\n"
"+CVE+-+2006+-+0788\n"
"CVE-2013%2D4345\n"
"CVE -20093103\n"
"CVE-'2014-1610\n"
"CVE--2009-3555\n"
"CVE-1999-077\n"
"CVE-2006.1737\n"
"CVE-20076-4704\n"
"CVE-2010--0281\n"
"CVE-2010-\n"
"CVE-2013-*\n"
"CVE-2013-167`\n"
"CVE-2013-00XX\n"
"CVE-2013--4339\n"
"CVE-2013-****\n"
"CVE-2013-3.893\n"
"CVE-CVE:2013-4883\n"
"CVE-CVE-2013-4883\n"
"CVE2010-3333.J\n"
"2013-A-0196\n"
"CVE-2013-A-0196\n"
"#\n"
"# common shorthand for multiple IDs\n"
"#\n"
"CVE-2007-{4352,5392,5393}\n"
"CVE:2012-0013\n"
"CVE_2013-7063\n"
"E-2011-3192\n"
"EXPLOIT-CVE2013-2465\n"
"VE-2012-0158\n"
"VE-2013-5875C\n"
"ZDI-12-170\n"
"CVE-YYYY-XXXX\n"
"CVE-2012=1234\n"
"#\n"
"# these originated in late 1999/early 2000 era\n"
"#\n"
"GENERIC-MAP-NOMATCH\n"
"CVE-MAP-NOMATCH\n"
"CVE-NO-MATCH\n"
"CVE-NO-NAME\n"
"CVE-NONE-0662\n"
"#\n"
"# Arbitrary 13-character string\n"
"#\n"
"ABCDEFGHIJKLM\n"
"#\n"
"# NOCVE identifiers, e.g., http://cs.coresecurity.com/core-impact-pro/exploits?page=11\n"
"#\n"
"NOCVE-9999-54104\n"
"NOCVE-9999-46110\n"
"CVE-9999-1\n"
"CVE-9999-11\n"
"CVE-9999-111\n"
"#\n"
"# erroneous attempts to convert certain homoglyphs / Unicode to 7-bit\n"
"# ASCII\n"
"CVE?2014?0001\n"
"#\n"
"# mashups of CVEs and telephone numbers\n"
"#\n"
"CVE-555-1212\n"
"CVE-800-555-1212\n"
"CVE-1-800-555-1212\n"
"#\n"
"# mashups of CVEs and Jenny\n"
"#\n"
"CVE-867-5309\n"
"CVE-867-5309(1981)\n"
"#\n"
"# extraneous spaces (very common in disclosures from multiple sources)\n"
"#\n"
"CVE-2014- 0001\n"
"CVE- 2014-0001\n"
"CVE- 2014- 0001\n"
"CVE-2014- 13001\n"
"CVE- 2014-13001\n"
"CVE- 2014- 13001\n"
"#\n"
"# non-dash format (widely used by IBM ISS X-Force, e.g., the http://xforce.iss.net/xforce/xfdb/89235 page)\n"
"#\n"
"CVE20140001\n"
"cve20140001\n"
"CVE201413001\n"
"cve201413001\n"
"#\n"
"# traditional VUPEN style - which happens to match CVE except for the\n"
"# \"ADV-\" prefix instead of \"CVE-\"\n"
"#\n"
"ADV-2006-0001\n"
"#\n"
"# exploit-db.com format\n"
"#\n"
"CVE: 2014-0001\n"
"CVE: 2014-13001\n"
"#\n"
"# OSVDB format\n"
"#\n"
"CVE ID: 2014-0001\n"
"CVE ID: 2014-13001\n"
"2014-0001\n"
"2014-13001\n"
"#\n"
"# results of bad global search/replace of CVE with CVE®\n"
"# (registered trademark symbol)\n"
"#\n"
"CVE®-2014-0001\n"
"#\n"
"# attempts at XML conversion\n"
"#\n"
"<CVE>-2014-0001\n"
"<CVE>2014-0001\n"
"<CVE>2014-0001</CVE>\n"
"<CVE>2014-0001</>\n"
"#\n"
"# attempts at JSON conversion\n"
"#\n"
"\"CVE\": \"2014-0001\"\n"
"\"cve\": \"2004-0001\"\n"
"\"CVE\":\"2014-0001\"\n"
"\"cve\":\"2004-0001\"\n"
"#\n"
"# use of the letter 'O' instead of the number '0'\n"
"#\n"
"CVE-2014-OOO1\n"
"CVE-2O14-0001\n"
"#\n"
"# use of the letter 'l' instead of the number '1'\n"
"#\n"
"CVE-2014-000l\n"
"CVE-20l4-0001\n"
"#\n"
"# regular expressions or various other groupings\n"
"#\n"
"CVE-2014-130[12]\n"
"CVE-[0-9]{4}-[0-9]{4}\n"
"CVE-[0-9]{4,}-[0-9]{4,}\n"
"# \"sticky\" keyboards\n"
"#\n"
"CVEE-2014-0001\n"
"CVEEEEEEE-2014-0001\n"
"# attempts at plurals\n"
"#\n"
"CVEs-2014-0001 and 2014-0002\n"
"# misplaced organizational specifiers\n"
"#\n"
"CVE[MITRE]-2014-0001\n"
"CVE[Mitre]-2014-0001\n"
"# confusion with National Vulnerability Database\n"
"#\n"
"NVD-2014-0001\n"
"# confusion with Defense Vulnerability Database\n"
"#\n"
"DVD-2014-0001\n"
"# confusion with other organizations\n"
"#\n"
"CERT-2014-0001\n"
"JVN-2014-0001\n"
"JVNDB-2014-000001\n"
"# intraword footnotes\n"
"#\n"
"CVE[1]-2014-0001\n"
"CVE*-2014-0001\n"
"CVE**-2014-0001\n"
"# Literal tab character.\n"
"#\n"
"CVE 2014-0001\n"
"# erroneous generation of a -1 value\n"
"#\n"
"CVE-2014--1\n"
"# erroneous generation of a zero value\n"
"#\n"
"CVE-2014-0\n"
"# ordering confusion\n"
"#\n"
"2014-0001-CVE\n"
"# this is technically valid syntax, but since the year can never be before\n"
"# 1999, this could be rejected based on CVE \"business rules\".\n"
"CVE-0001-2014\n"
"# wildcards or meta-expressions\n"
"#\n"
"CVE-2014-*\n"
"CVE-2014-####\n"
"CVE-2014-****\n"
"CVE-2014-?\n"
"CVE-2014-????\n"
"CVE-2014*\n"
"CVE-2014?\n"
"# extraneous dashes\n"
"#\n"
"CVE-2014--0001\n"
"CVE--2014-0001\n"
"# typos of dash\n"
"#\n"
"CVE=2014=0001\n"
"CVE0201400001\n"
"# various uncategorized examples\n"
"#\n"
"CVE_2014_0001\n"
"CVE-ID-2014-0001\n"
"CVEID-2014-0001\n"
"CVE#2014-0001\n"
"CVE# 2014-0001\n"
"CVEID#2014-0001\n"
"CVEID# 2014-0001\n"
"CVE-ID#2014-0001\n"
"CVE-ID# 2014-0001\n"
"CVE#2014-0001\n"
"CVE# 2014-0001\n"
"CEV-2014-0001\n"
"VCE-2014-0001\n"
"VEC-2014-0001\n"
"CWE-2014-0001\n"
"CPE-2014-0001\n"
"CME-2014-0001\n"
"CE-2014-0001\n"
"VE-2014-0001\n"
"E-2014-0001\n"
"-2014-0001\n"
"CVE-2014-000{1,2}\n"
"CVE/MITRE-2014-0001\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html