import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "\\(+\\*+(?:(?:\\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+";
final String string = "The following regex handles any (* *) style string. Let's call it p{1}\n\n"
+ " p{1} = \\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ "The targeted string can include the characters {*, (, )}, but not the character sequences \"(*\" or \"*)\" (on which the regex terminates).\n\n"
+ "Examples:\n\n"
+ " (* simple one *)\n"
+ " (* more * ) ( * () **( difficult * ))()*(( one *)\n"
+ " (* r *( e*a) () * * l ()* * ( ) ) l * ( ) ) * **( y bad *)\n\n"
+ "Explanation:\n\n"
+ " \\(+\\*+ # begin with a (* sequence (or some variation like ((* or ((**, etc.)\n"
+ " (?: # begin comment content\n"
+ " [^*(] # allow any non-* non-( characters (which begin open/close brackets)\n"
+ " | # also\n"
+ " (?:\\*+[^)*]) # allow * 1+ times in a row ONLY if it's not immediately followed by ) or *\n"
+ " | # also\n"
+ " (?:\\(+[^*(]) # allow ( 1+ times in a row ONLY if it's not immediately followed by * or (\n"
+ " )* # allow any number of these characters / character sequences\n"
+ " \\*+\\)+ # then close the comment with a *) (or some variation like **) or *))), etc.)\n\n"
+ "To capture un-nested and nested comments, simply allow comments inside of comments. ie:\n\n"
+ " p{2} = \\(+\\*+(?:(?:p{1})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ "in other words\n\n"
+ " p{2} = \\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ "You'll see that this works on the following examples:\n"
+ " (* test (* one *) *)\n"
+ " (* a bit * ( ) * harder (* ) * () (( *) *)\n"
+ " (* r *( (( e **( a ) ((* l *() l *) y * bad * ( *)\n\n"
+ "To capture up to triply-nested comments, follow the pattern set by p{2}:\n\n"
+ " p{3} = \\(+\\*+(?:(?:p{2})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ " p{3} = \\(+\\*+(?:(?:\\(+\\*+(?:(?:\\(+\\*+(?:[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+)|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ "Examples:\n"
+ " (* an (* easy (* one *) *) *)\n"
+ " (* only (* some (* levels (* captured *) because *) 4x *) nested *)\n\n"
+ "The pattern can be followed to allow any depth of nested comments to be captured, by defining\n\n"
+ " p{N} = \\(+\\*+(?:(?:p{N-1})|[^*(]|(?:\\*+[^)*])|(?:\\(+[^*(]))*\\*+\\)+\n\n"
+ "for N > 1\n\n";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html