import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "#OxJm)\n"
+ " (?(DEFINE)\n"
+ " # Tokens that act as one unit.\n"
+ " (?<rem>\n"
+ " /\\* (?:[^*]++|\\*(?!/))*+ \\*/ # block comment\n"
+ " | //[^\\r\\n]++(?:\\r?+\\n?+|$) # line comment\n"
+ " )\n"
+ " (?<squote> '(?:[^'\\\\]++|\\\\.)*+')\n"
+ " (?<dquote> \"(?:[^\"\\\\]++|\\\\.)*+\")\n"
+ " (?<rquote>R \"(?<delim>[^(]*+)\\((?:[^)]++|\\)(?!\\k\\{delim\\}\"))*+\\)\\k\\{delim\\}\")\n"
+ " (?<string_literal>\n"
+ " (?<string_literal_helper>(?&squote)|(?&dquote)|(?&rquote))\n"
+ " (?:\\s*+(?&string_literal_helper))*+\n"
+ " )\n"
+ " (?<int_literal>[-+]?+\\s*+\n"
+ " (?:\n"
+ " 0[xX](?<hex>[a-fA-F\\d]++ (?:'(?&hex))?+)\n"
+ " | 0[bB](?<bin> [01]++ (?:'(?&bin))?+)\n"
+ " | 0 (?<oct> [0-7]++ (?:'(?&oct))?+)\n"
+ " | (?<dec> \\d++ (?:'(?&dec))?+)\n"
+ " )\n"
+ " (?:[uU](?:ll|LL|[lLzZ])?+)?+\n"
+ " )\n"
+ " (?<float_literal>[-+]?+\\s*+\n"
+ " (?:\n"
+ " 0[xX] (?&hex)\n"
+ " (?: \\.(?&hex) (?<matched>))?+\n"
+ " (?:[pP][-=]?+(?&hex) (?<matched>))?+\n"
+ " (?(matched)|(?!)) # at least one of the above must match\n"
+ " | (?&dec)\n"
+ " (?: \\.(?&dec) (?<matched>))?+\n"
+ " (?:[eE][-=]?+(?&dec) (?<matched>))?+\n"
+ " (?(matched)|(?!)) # at least one of the above must match\n"
+ " )\n"
+ " (?:f16|f32|f64|f128|bf16|F16|F32|F64|F128|BF16|[flFL])?+\n"
+ " )\n"
+ " (?<literal> (?&string_literal) | (?&float_literal) | (?&int_literal) )\n\n"
+ " (?<token>(?&literal) | (?&rem))\n"
+ " (?<params> \\((?:(?&token)|[^()] | (?¶ms))*+\\))\n"
+ " (?<tparams> <(?:(?&token)|[^<>()]|(?&tparams) |(?¶ms))*+> )\n"
+ " (?<brace_list> \\{(?:(?&token)|[^\\{\\}()]|(?&brace_list)|(?¶ms))*+\\})\n"
+ " (?<index> \\[(?:(?&token)|[^[\\]] |(?&index) )*+\\])\n"
+ " (?<lambda_capture> (?&index)) # not the same but this will still match it correctly\n\n"
+ " # a simple symbol\n"
+ " (?<symbol>[a-zA-Z_][a-zA-Z\\d_]*+)\n"
+ " # symbols used for operator overloading\n"
+ " (?<operator_symbol>\n"
+ " operator\\s*+\n"
+ " (?:\n"
+ " &&|\\|\\||\\[\\]|\\(\\)|\\+\\+|--|\"\"|<=>|->\\*?+|,|(?:new|delete)(?:\\s*+(?:\\[\\]))?+\n"
+ " | (?:<<|>>|[-+~!%^&*<=>])=?+\n"
+ " | (?&type)\n"
+ " )\n"
+ " )\n\n"
+ " (?<scoped_type_symbol_name>\n"
+ " (?:::\\s*+)?+\n"
+ " (?<scoped_type_symbol_name_helper>\n"
+ " (?:template(?<template_keyword_matched>)\\s++)?+\n"
+ " (?&symbol)\\s*+\n"
+ " # Check for template parameters\n"
+ " (?(template_keyword_matched)\n"
+ " # MANDITORY if template keyword is used.\n"
+ " (?:(?&tparams)\\s*+)\n"
+ " | # OPTIONAL if template keyword is omitted.\n"
+ " # TODO: Look into possible edge cases where this might\n"
+ " # not work as expected.\n"
+ " (?:(?&tparams)\\s*+)?+ \n"
+ " )\n"
+ " (?:\\s*+::\\s*+(?&scoped_type_symbol_name_helper))?+\n"
+ " )\n"
+ " )\n\n"
+ " (?<scoped_value_symbol_name>\n"
+ " (?:::\\s*+)?+\n"
+ " (?<scoped_value_symbol_name_helper>\n"
+ " (?:template(?<template_keyword_matched>)\\s++)?+\n"
+ " (?:(?&operator_symbol)(?<operator_symbol_matched>)|(?&symbol))\\s*+\n"
+ " # Check for template parameters\n"
+ " (?(template_keyword_matched)\n"
+ " # MANDITORY if template keyword is used.\n"
+ " (?:(?&tparams)\\s*+)\n"
+ " | # OPTIONAL if template keyword is omitted.\n"
+ " # TODO: Look into possible edge cases where this might\n"
+ " # not work as expected.\n"
+ " (?:(?&tparams)\\s*+)?+ \n"
+ " )\n"
+ " (?(operator_symbol_matched)\n"
+ " # Operator symbol is at deepest scope.\n"
+ " | # If didn't match operator symbol, then see if there is a deeper scope\n"
+ " (?:\\s*+::\\s*+(?&scoped_value_symbol_name_helper))?+\n"
+ " )\n"
+ " )\n"
+ " )\n\n"
+ " # types\n"
+ " (?<cv>\n"
+ " const \\b(?:\\s++volitile\\b)?+\n"
+ " | volitile\\b(?:\\s++const \\b)?+\n"
+ " )\n"
+ " (?<user_type>\n"
+ " (?:typename\\s*+)?+\n"
+ " (?&scoped_type_symbol_name)\n"
+ " )\n\n"
+ " (?<type>\n"
+ " # not going to do function pointers yet\n"
+ " (?:(?&cv)\\s++)?+\n"
+ " (?:auto\\b\n"
+ " # Yes, this will capture illegal types such as short char, but I'm ok with that.\n"
+ " # This is to help document, not validate the code!\n"
+ " | (?: (?:(?:un)?+signed) \\b (?<matched>))?+\n"
+ " (?:\\s*+ (?:short|long) \\b (?<matched>))?+\n"
+ " (?:\\s*+ (?:long) \\b (?<matched>))?+\n"
+ " (?:\\s*+ (?:int|char) \\b (?<matched>))?+\n"
+ " (?(matched)|(?!)) # at least one of the items above must have matched\n"
+ " | ((?:long\\s++)?+double|float)\\b\n"
+ " | decltype\\s*+(?¶ms)\n"
+ " | (?&user_type)\n"
+ " )\n"
+ " (?:\\s*+(?&cv))?+\n"
+ " # pointers\n"
+ " (?:\\s*+\\*\\s*+(?&cv)?+)*+\n"
+ " # reference\n"
+ " &?+\n"
+ " )\n\n"
+ " # expressions\n"
+ " (?<fn_called_or_var>\n"
+ " (?&scoped_value_symbol_name)\\s*+\n"
+ " (?¶ms)?+\n"
+ " )\n"
+ " (?<lambda>(?&lambda_capture)\\s*+(?¶ms)\\s*+(?&brace_list))\n"
+ " (?<evaluable>\n"
+ " (?:\\*\\s*+)*+ # dereference\n"
+ " (?:(?&fn_called_or_var)|(?&lambda)\\s*+(?¶ms)|(?¶ms)|(?&literal))\n"
+ " (?:\\s*+(?&index))*+\n"
+ " )\n"
+ " (?<binary_op> &&|<=>|<<|>>|\\|\\||[-+*/%|&<=>])\n"
+ " (?<unary_prefix_op> --|\\+\\+|[-+!~])\n"
+ " (?<unary_suffix_op> --|\\+\\+)\n\n"
+ " (?<expression>\n"
+ " (?:(?&unary_prefix_op)++\\s*+)?+(?&evaluable)(\\s*+(?&unary_suffix_op)++)?+\n"
+ " (?:\\s*+(?&binary_op)\\s*+(?&expression))*+\n"
+ " )\n"
+ " )\n"
+ " (?<whitespace>^[ \\t]*+)\n"
+ " (?:\n"
+ " # check for enum\n"
+ " enum\\s++(?<enum_name>(?&symbol))\n"
+ " | # check for template\n"
+ " (?:template\\s*+(?<template_parameters>(?&tparams)))?+\\s*+\n"
+ " (?:\n"
+ " # check for struct/class\n"
+ " (?:struct|class)\\s++(?<class_name>(?&symbol))\n"
+ " | # check for function or variable\n"
+ " (?<extern>extern\\s++)?+\n"
+ " (?<static>static\\s++)?+\n"
+ " (?<constexpr>constexpr\\s++)?+\n"
+ " (?<static>static\\s++)?+\n"
+ " (?<return_type>auto(?<auto_return_type>)|(?&type))\\s*+\n"
+ " (?<symbol_name>(?&scoped_value_symbol_name))\\s*+\n"
+ " (?<function_parameters>(?¶ms)\n"
+ " (?(auto_return_type)\n"
+ " # optional trailing return type\n"
+ " (?:\\s*+->\\s*+(?<return_type>(?&type)))?+\n"
+ " )\n"
+ " )?+\n"
+ " )\n"
+ " )\n";
final String string = "\n"
+ " template <typename ElementGenerator, typename StoredType, typename...Rest>\n"
+ " constexpr auto generate_tuple(\n"
+ " ElementGenerator&& element_generator, DiagnosticReportCode code, bool database_query, StoredType\n"
+ " , Rest&&...args\n"
+ " ) {\n"
+ " static_assert(!std::is_same<StoredType, DiagnosticReportCode>::value\n"
+ " ";
final Pattern pattern = Pattern.compile(regex, Pattern.COMMENTS | Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
if (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html