$re = '@#OxJm)
(?(DEFINE)
# Tokens that act as one unit.
(?<rem>
/\* (?:[^*]++|\*(?!/))*+ \*/ # block comment
| //[^\r\n]++(?:\r?+\n?+|$) # line comment
)
(?<squote> \'(?:[^\'\\\\]++|\\\\.)*+\')
(?<dquote> "(?:[^"\\\\]++|\\\\.)*+")
(?<rquote>R "(?<delim>[^(]*+)\((?:[^)]++|\)(?!\k{delim}"))*+\)\k{delim}")
(?<string_literal>
(?<string_literal_helper>(?&squote)|(?&dquote)|(?&rquote))
(?:\s*+(?&string_literal_helper))*+
)
(?<int_literal>[-+]?+\s*+
(?:
0[xX](?<hex>[a-fA-F\d]++ (?:\'(?&hex))?+)
| 0[bB](?<bin> [01]++ (?:\'(?&bin))?+)
| 0 (?<oct> [0-7]++ (?:\'(?&oct))?+)
| (?<dec> \d++ (?:\'(?&dec))?+)
)
(?:[uU](?:ll|LL|[lLzZ])?+)?+
)
(?<float_literal>[-+]?+\s*+
(?:
0[xX] (?&hex)
(?: \.(?&hex) (?<matched>))?+
(?:[pP][-=]?+(?&hex) (?<matched>))?+
(?(matched)|(?!)) # at least one of the above must match
| (?&dec)
(?: \.(?&dec) (?<matched>))?+
(?:[eE][-=]?+(?&dec) (?<matched>))?+
(?(matched)|(?!)) # at least one of the above must match
)
(?:f16|f32|f64|f128|bf16|F16|F32|F64|F128|BF16|[flFL])?+
)
(?<literal> (?&string_literal) | (?&float_literal) | (?&int_literal) )
(?<token>(?&literal) | (?&rem))
(?<params> \((?:(?&token)|[^()] | (?¶ms))*+\))
(?<tparams> <(?:(?&token)|[^<>()]|(?&tparams) |(?¶ms))*+> )
(?<brace_list> \{(?:(?&token)|[^{}()]|(?&brace_list)|(?¶ms))*+\})
(?<index> \[(?:(?&token)|[^[\]] |(?&index) )*+\])
(?<lambda_capture> (?&index)) # not the same but this will still match it correctly
# a simple symbol
(?<symbol>[a-zA-Z_][a-zA-Z\d_]*+)
# symbols used for operator overloading
(?<operator_symbol>
operator\s*+
(?:
&&|\|\||\[\]|\(\)|\+\+|--|""|<=>|->\*?+|,|(?:new|delete)(?:\s*+(?:\[\]))?+
| (?:<<|>>|[-+~!%^&*<=>])=?+
| (?&type)
)
)
(?<scoped_type_symbol_name>
(?:::\s*+)?+
(?<scoped_type_symbol_name_helper>
(?:template(?<template_keyword_matched>)\s++)?+
(?&symbol)\s*+
# Check for template parameters
(?(template_keyword_matched)
# MANDITORY if template keyword is used.
(?:(?&tparams)\s*+)
| # OPTIONAL if template keyword is omitted.
# TODO: Look into possible edge cases where this might
# not work as expected.
(?:(?&tparams)\s*+)?+
)
(?:\s*+::\s*+(?&scoped_type_symbol_name_helper))?+
)
)
(?<scoped_value_symbol_name>
(?:::\s*+)?+
(?<scoped_value_symbol_name_helper>
(?:template(?<template_keyword_matched>)\s++)?+
(?:(?&operator_symbol)(?<operator_symbol_matched>)|(?&symbol))\s*+
# Check for template parameters
(?(template_keyword_matched)
# MANDITORY if template keyword is used.
(?:(?&tparams)\s*+)
| # OPTIONAL if template keyword is omitted.
# TODO: Look into possible edge cases where this might
# not work as expected.
(?:(?&tparams)\s*+)?+
)
(?(operator_symbol_matched)
# Operator symbol is at deepest scope.
| # If didn\'t match operator symbol, then see if there is a deeper scope
(?:\s*+::\s*+(?&scoped_value_symbol_name_helper))?+
)
)
)
# types
(?<cv>
const \b(?:\s++volitile\b)?+
| volitile\b(?:\s++const \b)?+
)
(?<user_type>
(?:typename\s*+)?+
(?&scoped_type_symbol_name)
)
(?<type>
# not going to do function pointers yet
(?:(?&cv)\s++)?+
(?:auto\b
# Yes, this will capture illegal types such as short char, but I\'m ok with that.
# This is to help document, not validate the code!
| (?: (?:(?:un)?+signed) \b (?<matched>))?+
(?:\s*+ (?:short|long) \b (?<matched>))?+
(?:\s*+ (?:long) \b (?<matched>))?+
(?:\s*+ (?:int|char) \b (?<matched>))?+
(?(matched)|(?!)) # at least one of the items above must have matched
| ((?:long\s++)?+double|float)\b
| decltype\s*+(?¶ms)
| (?&user_type)
)
(?:\s*+(?&cv))?+
# pointers
(?:\s*+\*\s*+(?&cv)?+)*+
# reference
&?+
)
# expressions
(?<fn_called_or_var>
(?&scoped_value_symbol_name)\s*+
(?¶ms)?+
)
(?<lambda>(?&lambda_capture)\s*+(?¶ms)\s*+(?&brace_list))
(?<evaluable>
(?:\*\s*+)*+ # dereference
(?:(?&fn_called_or_var)|(?&lambda)\s*+(?¶ms)|(?¶ms)|(?&literal))
(?:\s*+(?&index))*+
)
(?<binary_op> &&|<=>|<<|>>|\|\||[-+*/%|&<=>])
(?<unary_prefix_op> --|\+\+|[-+!~])
(?<unary_suffix_op> --|\+\+)
(?<expression>
(?:(?&unary_prefix_op)++\s*+)?+(?&evaluable)(\s*+(?&unary_suffix_op)++)?+
(?:\s*+(?&binary_op)\s*+(?&expression))*+
)
)
(?<whitespace>^[ \t]*+)
(?:
# check for enum
enum\s++(?<enum_name>(?&symbol))
| # check for template
(?:template\s*+(?<template_parameters>(?&tparams)))?+\s*+
(?:
# check for struct/class
(?:struct|class)\s++(?<class_name>(?&symbol))
| # check for function or variable
(?<extern>extern\s++)?+
(?<static>static\s++)?+
(?<constexpr>constexpr\s++)?+
(?<static>static\s++)?+
(?<return_type>auto(?<auto_return_type>)|(?&type))\s*+
(?<symbol_name>(?&scoped_value_symbol_name))\s*+
(?<function_parameters>(?¶ms)
(?(auto_return_type)
# optional trailing return type
(?:\s*+->\s*+(?<return_type>(?&type)))?+
)
)?+
)
)
@xJm';
$str = '
template <typename ElementGenerator, typename StoredType, typename...Rest>
constexpr auto generate_tuple(
ElementGenerator&& element_generator, DiagnosticReportCode code, bool database_query, StoredType
, Rest&&...args
) {
static_assert(!std::is_same<StoredType, DiagnosticReportCode>::value
';
preg_match($re, $str, $matches, PREG_OFFSET_CAPTURE, 0);
// Print the entire match result
var_dump($matches);
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for PHP, please visit: http://php.net/manual/en/ref.pcre.php