import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "^CVE-(1999|2\\d{3})-(0\\d{2}[1-9]|[1-9]\\d{3,})$";
final String string = "#This one is not valid but wasn't covered by the test cases provided by MITRE\n"
+ "CVE-2001-0000\n\n"
+ "# This file contains test data for implementations to verify that\n"
+ "# CVE IDs are properly parsed and handled to conform with the\n"
+ "# 2014 CVE ID Syntax change.\n"
+ "#\n"
+ "# About this test data: README-tests.txt\n"
+ "# More info: http://cve.mitre.org/cve/identifiers/syntaxchange.html\n"
+ "#\n"
+ "# ****** VALID SYNTAX ******\n"
+ "#\n"
+ "# Publicly-referenced IDs for the new syntax (formerly \"Option B\")\n"
+ "#\n"
+ "CVE-2014-0001\n"
+ "CVE-2014-0999\n"
+ "CVE-2014-1234\n"
+ "CVE-2014-3127\n"
+ "CVE-2014-9999\n"
+ "CVE-2014-10000\n"
+ "CVE-2014-54321\n"
+ "CVE-2014-99999\n"
+ "CVE-2014-100000\n"
+ "CVE-2014-123456\n"
+ "CVE-2014-456132\n"
+ "CVE-2014-999999\n"
+ "CVE-2014-1000000\n"
+ "CVE-2014-1234567\n"
+ "CVE-2014-7654321\n"
+ "CVE-2014-9999999\n"
+ "#\n"
+ "# Invalid ID. This is the only invalid ID in this file, and it's\n"
+ "# intended to help spot incorrect tests that mistakenly accept all\n"
+ "# inputs. See README.\n"
+ "#\n"
+ "CVE-ABCD-EFGH\n"
+ "#\n"
+ "# These are valid but could cause problems if IDs are stored in bytes\n"
+ "# due to numeric overflows (stranger things have happened).\n"
+ "#\n"
+ "CVE-2014-16385\n"
+ "CVE-2014-32769\n"
+ "CVE-2014-65537\n"
+ "CVE-2014-131073\n"
+ "#\n"
+ "# unusually large number of trailing zeros\n"
+ "#\n"
+ "CVE-2014-100000000\n"
+ "#\n"
+ "# storing CVE number portion as 32-bit signed integer (seen in at\n"
+ "# least one real-world implementation)\n"
+ "#\n"
+ "CVE-2014-2147483647\n"
+ "CVE-2014-2147483648\n"
+ "#\n"
+ "# storing CVE number portion as 32-bit unsigned integer (possibly seen\n"
+ "# in at least one real-world implementation)\n"
+ "#\n"
+ "CVE-2014-4294967295\n"
+ "CVE-2014-4294967296\n"
+ "#\n"
+ "# storing CVE ID string in a fixed-length 32-byte buffer, with or\n"
+ "# without a required trailing '\\0' character\n"
+ "#\n"
+ "CVE-2014-1111111111111111111111\n"
+ "CVE-2014-11111111111111111111111\n"
+ "CVE-2014-111111111111111111111111\n\n"
+ "####################################################################\n\n"
+ "# This file contains test data for implementations to verify that\n"
+ "# CVE IDs are properly parsed and handled to conform with the\n"
+ "# 2014 CVE ID Syntax change.\n"
+ "#\n"
+ "# About this test data: README-tests.txt\n"
+ "# More info: http://cve.mitre.org/cve/identifiers/syntaxchange.html\n"
+ "#\n"
+ "#\n"
+ "# ****** SYNTAX VIOLATIONS ******\n"
+ "#\n"
+ "# Option A syntax from early 2013 - option not chosen. These might look\n"
+ "# good at first glance, but have leading 0's with more than 4 digits.\n"
+ "#\n"
+ "CVE-2014-000001\n"
+ "CVE-2014-009999\n"
+ "CVE-2014-000001\n"
+ "CVE-2014-000999\n"
+ "CVE-2014-001234\n"
+ "CVE-2014-009999\n"
+ "CVE-2014-010000\n"
+ "CVE-2014-054321\n"
+ "CVE-2014-099999\n"
+ "#\n"
+ "# Option A' syntax - modified Option A for second vote - option not chosen.\n"
+ "# Similar to original Option A, there are leading 0's with more than 4 digits.\n"
+ "#\n"
+ "CVE-2014-00000001\n"
+ "CVE-2014-00000999\n"
+ "CVE-2014-00001234\n"
+ "CVE-2014-00009999\n"
+ "CVE-2014-00010000\n"
+ "CVE-2014-00123456\n"
+ "CVE-2014-01234567\n"
+ "#\n"
+ "# Option C syntax from early 2013 - option not chosen\n"
+ "#\n"
+ "CVE-2014-1-8\n"
+ "CVE-2014-999-3\n"
+ "CVE-2014-1234-3\n"
+ "CVE-2014-9999-3\n"
+ "CVE-2014-10000-8\n"
+ "CVE-2014-54321-5\n"
+ "CVE-2014-123456-5\n"
+ "CVE-2014-999999-5\n"
+ "CVE-2014-1234567-4\n"
+ "#\n"
+ "# Intentionally valid ID. This is the only valid ID in this file, and\n"
+ "# it's intended to help spot incorrect tests that mistakenly reject\n"
+ "# all inputs. See README.\n"
+ "#\n"
+ "CVE-2014-1234\n"
+ "#\n"
+ "# Miscellaneous examples used during discussion of syntax\n"
+ "#\n"
+ "CVE-YYYY-NNNN\n"
+ "CVE-YYYY-NNNNN\n"
+ "CVE-YYYY-NNNNNN\n"
+ "#\n"
+ "# Loose extraction assuming only CVE prefix and two alphanumerics\n"
+ "# separated by hyphens\n"
+ "#\n"
+ "CVE-SRC-OHA\n"
+ "CVE-2AAA-3BBB\n"
+ "#\n"
+ "# Missing sequence number / invalid year\n"
+ "#\n"
+ "CVE-114\n"
+ "CVE-73\n"
+ "#\n"
+ "# Malformed sequence number\n"
+ "#\n"
+ "CVE-2014-789\n"
+ "CVE-2014-\n"
+ "CVE-2014-9\n"
+ "CVE-2014-98\n"
+ "#\n"
+ "# leading 0's - prohibited except for 999 and less (i.e., \"0001\"\n"
+ "# through \"0999\"\n"
+ "#\n"
+ "CVE-2015-010000\n"
+ "CVE-2015-09999\n"
+ "CVE-2014-00001\n"
+ "#\n"
+ "# CR/LF in middle of ID\n"
+ "#\n"
+ "CVE-2014-\n"
+ "1234\n"
+ "CVE-2014\n"
+ "-1234\n"
+ "CVE-201\n"
+ "4-1235\n"
+ "#\n"
+ "# no year provided\n"
+ "#\n"
+ "CVE-3153\n"
+ "#\n"
+ "# position-oriented (assume columns 5 through 8 are year). The first one\n"
+ "# is a real-world conversion error by CVE code (oops).\n"
+ "#\n"
+ "CVE- 14-1236\n"
+ "CVE-AAAA-1237\n"
+ "#\n"
+ "# missing/invalid \"CVE-\" prefix\n"
+ "#\n"
+ "C-2014-1238\n"
+ "2014-1240\n"
+ "CVE:2014-1241\n"
+ "CVE 2014 1242\n"
+ "#\n"
+ "# invalid year\n"
+ "#\n"
+ "CVE-201-0771\n"
+ "CVE-14-1239\n"
+ "CVE-20132-0169\n"
+ "#\n"
+ "# Odd stuff straight from CVE web logs (thanks, random anonymous\n"
+ "# people!). Includes some real-world typos or, in some cases,\n"
+ "# security-related IDs that utilize portions of the CVE ID.\n"
+ "#\n"
+ "2013\n"
+ "0497\n"
+ "2010-270\n"
+ "2013-199\n"
+ "2013-6XXX\n"
+ "CVE2014-0591\n"
+ "CVE:13-7108\n"
+ "CVE-XXXX-XXXX\n"
+ "CVE-TODO\n"
+ "1421010/13\n"
+ "CVE20076753\n"
+ "CVE:2013-4547\n"
+ "(CVE-2013-136\n"
+ "CVE - 2006 - 0788\n"
+ "CVE-2008-600\n"
+ "199-0618\n"
+ "CVE-199-0618\n"
+ "CA-2003-16\n"
+ "# URL-encoded\n"
+ "+CVE+-+2006+-+0788\n"
+ "CVE-2013%2D4345\n"
+ "CVE -20093103\n"
+ "CVE-'2014-1610\n"
+ "CVE--2009-3555\n"
+ "CVE-1999-077\n"
+ "CVE-2006.1737\n"
+ "CVE-20076-4704\n"
+ "CVE-2010--0281\n"
+ "CVE-2010-\n"
+ "CVE-2013-*\n"
+ "CVE-2013-167`\n"
+ "CVE-2013-00XX\n"
+ "CVE-2013--4339\n"
+ "CVE-2013-****\n"
+ "CVE-2013-3.893\n"
+ "CVE-CVE:2013-4883\n"
+ "CVE-CVE-2013-4883\n"
+ "CVE2010-3333.J\n"
+ "2013-A-0196\n"
+ "CVE-2013-A-0196\n"
+ "#\n"
+ "# common shorthand for multiple IDs\n"
+ "#\n"
+ "CVE-2007-{4352,5392,5393}\n"
+ "CVE:2012-0013\n"
+ "CVE_2013-7063\n"
+ "E-2011-3192\n"
+ "EXPLOIT-CVE2013-2465\n"
+ "VE-2012-0158\n"
+ "VE-2013-5875C\n"
+ "ZDI-12-170\n"
+ "CVE-YYYY-XXXX\n"
+ "CVE-2012=1234\n"
+ "#\n"
+ "# these originated in late 1999/early 2000 era\n"
+ "#\n"
+ "GENERIC-MAP-NOMATCH\n"
+ "CVE-MAP-NOMATCH\n"
+ "CVE-NO-MATCH\n"
+ "CVE-NO-NAME\n"
+ "CVE-NONE-0662\n"
+ "#\n"
+ "# Arbitrary 13-character string\n"
+ "#\n"
+ "ABCDEFGHIJKLM\n"
+ "#\n"
+ "# NOCVE identifiers, e.g., http://cs.coresecurity.com/core-impact-pro/exploits?page=11\n"
+ "#\n"
+ "NOCVE-9999-54104\n"
+ "NOCVE-9999-46110\n"
+ "CVE-9999-1\n"
+ "CVE-9999-11\n"
+ "CVE-9999-111\n"
+ "#\n"
+ "# erroneous attempts to convert certain homoglyphs / Unicode to 7-bit\n"
+ "# ASCII\n"
+ "CVE?2014?0001\n"
+ "#\n"
+ "# mashups of CVEs and telephone numbers\n"
+ "#\n"
+ "CVE-555-1212\n"
+ "CVE-800-555-1212\n"
+ "CVE-1-800-555-1212\n"
+ "#\n"
+ "# mashups of CVEs and Jenny\n"
+ "#\n"
+ "CVE-867-5309\n"
+ "CVE-867-5309(1981)\n"
+ "#\n"
+ "# extraneous spaces (very common in disclosures from multiple sources)\n"
+ "#\n"
+ "CVE-2014- 0001\n"
+ "CVE- 2014-0001\n"
+ "CVE- 2014- 0001\n"
+ "CVE-2014- 13001\n"
+ "CVE- 2014-13001\n"
+ "CVE- 2014- 13001\n"
+ "#\n"
+ "# non-dash format (widely used by IBM ISS X-Force, e.g., the http://xforce.iss.net/xforce/xfdb/89235 page)\n"
+ "#\n"
+ "CVE20140001\n"
+ "cve20140001\n"
+ "CVE201413001\n"
+ "cve201413001\n"
+ "#\n"
+ "# traditional VUPEN style - which happens to match CVE except for the\n"
+ "# \"ADV-\" prefix instead of \"CVE-\"\n"
+ "#\n"
+ "ADV-2006-0001\n"
+ "#\n"
+ "# exploit-db.com format\n"
+ "#\n"
+ "CVE: 2014-0001\n"
+ "CVE: 2014-13001\n"
+ "#\n"
+ "# OSVDB format\n"
+ "#\n"
+ "CVE ID: 2014-0001\n"
+ "CVE ID: 2014-13001\n"
+ "2014-0001\n"
+ "2014-13001\n"
+ "#\n"
+ "# results of bad global search/replace of CVE with CVE®\n"
+ "# (registered trademark symbol)\n"
+ "#\n"
+ "CVE®-2014-0001\n"
+ "#\n"
+ "# attempts at XML conversion\n"
+ "#\n"
+ "<CVE>-2014-0001\n"
+ "<CVE>2014-0001\n"
+ "<CVE>2014-0001</CVE>\n"
+ "<CVE>2014-0001</>\n"
+ "#\n"
+ "# attempts at JSON conversion\n"
+ "#\n"
+ "\"CVE\": \"2014-0001\"\n"
+ "\"cve\": \"2004-0001\"\n"
+ "\"CVE\":\"2014-0001\"\n"
+ "\"cve\":\"2004-0001\"\n"
+ "#\n"
+ "# use of the letter 'O' instead of the number '0'\n"
+ "#\n"
+ "CVE-2014-OOO1\n"
+ "CVE-2O14-0001\n"
+ "#\n"
+ "# use of the letter 'l' instead of the number '1'\n"
+ "#\n"
+ "CVE-2014-000l\n"
+ "CVE-20l4-0001\n"
+ "#\n"
+ "# regular expressions or various other groupings\n"
+ "#\n"
+ "CVE-2014-130[12]\n"
+ "CVE-[0-9]{4}-[0-9]{4}\n"
+ "CVE-[0-9]{4,}-[0-9]{4,}\n"
+ "# \"sticky\" keyboards\n"
+ "#\n"
+ "CVEE-2014-0001\n"
+ "CVEEEEEEE-2014-0001\n"
+ "# attempts at plurals\n"
+ "#\n"
+ "CVEs-2014-0001 and 2014-0002\n"
+ "# misplaced organizational specifiers\n"
+ "#\n"
+ "CVE[MITRE]-2014-0001\n"
+ "CVE[Mitre]-2014-0001\n"
+ "# confusion with National Vulnerability Database\n"
+ "#\n"
+ "NVD-2014-0001\n"
+ "# confusion with Defense Vulnerability Database\n"
+ "#\n"
+ "DVD-2014-0001\n"
+ "# confusion with other organizations\n"
+ "#\n"
+ "CERT-2014-0001\n"
+ "JVN-2014-0001\n"
+ "JVNDB-2014-000001\n"
+ "# intraword footnotes\n"
+ "#\n"
+ "CVE[1]-2014-0001\n"
+ "CVE*-2014-0001\n"
+ "CVE**-2014-0001\n"
+ "# Literal tab character.\n"
+ "#\n"
+ "CVE 2014-0001\n"
+ "# erroneous generation of a -1 value\n"
+ "#\n"
+ "CVE-2014--1\n"
+ "# erroneous generation of a zero value\n"
+ "#\n"
+ "CVE-2014-0\n"
+ "# ordering confusion\n"
+ "#\n"
+ "2014-0001-CVE\n"
+ "# this is technically valid syntax, but since the year can never be before\n"
+ "# 1999, this could be rejected based on CVE \"business rules\".\n"
+ "CVE-0001-2014\n"
+ "# wildcards or meta-expressions\n"
+ "#\n"
+ "CVE-2014-*\n"
+ "CVE-2014-####\n"
+ "CVE-2014-****\n"
+ "CVE-2014-?\n"
+ "CVE-2014-????\n"
+ "CVE-2014*\n"
+ "CVE-2014?\n"
+ "# extraneous dashes\n"
+ "#\n"
+ "CVE-2014--0001\n"
+ "CVE--2014-0001\n"
+ "# typos of dash\n"
+ "#\n"
+ "CVE=2014=0001\n"
+ "CVE0201400001\n"
+ "# various uncategorized examples\n"
+ "#\n"
+ "CVE_2014_0001\n"
+ "CVE-ID-2014-0001\n"
+ "CVEID-2014-0001\n"
+ "CVE#2014-0001\n"
+ "CVE# 2014-0001\n"
+ "CVEID#2014-0001\n"
+ "CVEID# 2014-0001\n"
+ "CVE-ID#2014-0001\n"
+ "CVE-ID# 2014-0001\n"
+ "CVE#2014-0001\n"
+ "CVE# 2014-0001\n"
+ "CEV-2014-0001\n"
+ "VCE-2014-0001\n"
+ "VEC-2014-0001\n"
+ "CWE-2014-0001\n"
+ "CPE-2014-0001\n"
+ "CME-2014-0001\n"
+ "CE-2014-0001\n"
+ "VE-2014-0001\n"
+ "E-2014-0001\n"
+ "-2014-0001\n"
+ "CVE-2014-000{1,2}\n"
+ "CVE/MITRE-2014-0001\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html