const regex = /\(+\*+(?:(?:\(+\*+(?:(?:\(+\*+(?:[^*(]|(?:\*+[^)*])|(?:\(+[^*(]))*\*+\)+)|[^*(]|(?:\*+[^)*])|(?:\(+[^*(]))*\*+\)+)|[^*(]|(?:\*+[^)*])|(?:\(+[^*(]))*\*+\)+/g;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('\\(+\\*+(?:(?:\\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+', 'g')
const str = `The following regex handles any (* *) style string. Let's call it p{1}
p{1} = \\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
The targeted string can include the characters {*, (, )}, but not the character sequences "(*" or "*)" (on which the regex terminates).
Examples:
(* simple one *)
(* more * ) ( * () **( difficult * ))()*(( one *)
(* r *( e*a) () * * l ()* * ( ) ) l * ( ) ) * **( y bad *)
Explanation:
\\(+\\*+ # begin with a (* sequence (or some variation like ((* or ((**, etc.)
(?: # begin comment content
[^*(] # allow any non-* non-( characters (which begin open/close brackets)
| # also
(?:\\*+[^)*]) # allow * 1+ times in a row ONLY if it's not immediately followed by ) or *
| # also
(?:\\(+[^*(]) # allow ( 1+ times in a row ONLY if it's not immediately followed by * or (
)* # allow any number of these characters / character sequences
\\*+\\)+ # then close the comment with a *) (or some variation like **) or *))), etc.)
To capture un-nested and nested comments, simply allow comments inside of comments. ie:
p{2} = \\(+\\*+(?:(?:p{1})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
in other words
p{2} = \\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
You'll see that this works on the following examples:
(* test (* one *) *)
(* a bit * ( ) * harder (* ) * () (( *) *)
(* r *( (( e **( a ) ((* l *() l *) y * bad * ( *)
To capture up to triply-nested comments, follow the pattern set by p{2}:
p{3} = \\(+\\*+(?:(?:p{2})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
p{3} = \\(+\\*+(?:(?:\\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
Examples:
(* an (* easy (* one *) *) *)
(* only (* some (* levels (* captured *) because *) 4x *) nested *)
The pattern can be followed to allow any depth of nested comments to be captured, by defining
p{N} = \\(+\\*+(?:(?:p{N-1})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+
for N > 1
`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions