import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "educacao\\/busca-?([\\w%-]+)?\\/?(?!em-)([\\w%-]+)?\\/?(em-)?([\\w%-]+)?\\/?";
final String string = "###################\n\n"
+ "# bug cases\n"
+ "https://www.catho.com.br/educacao/busca/em-em-em-em-extrema-mg\n"
+ "https://www.catho.com.br/educacao/busca/em-extrema-mg\n"
+ "https://www.catho.com.br/educacao/busca/em-mextrema-mg\n"
+ "https://www.catho.com.br/educacao/busca-curso-admin/usp/em-mextrema-mg\n"
+ "https://www.catho.com.br/educacao/busca-curso-admin/em-mextrema-mg\n"
+ "https://www.catho.com.br/educacao/busca-admin/em-mextrema-mg\n\n"
+ "#####################\n\n"
+ "// without term or filters\n"
+ "https://www.catho.com.br/educacao/busca\n"
+ "https://www.catho.com.br/educacao/busca/\n\n"
+ "###################\n\n"
+ "// with term or match but no filters\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/\n"
+ "https://www.catho.com.br/educacao/busca-de%20csign-industrial\n"
+ "https://www.catho.com.br/educacao/busca-de%20csign-industrial/\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/\n\n"
+ "###################\n\n"
+ "// without term but with provider\n"
+ "https://www.catho.com.br/educacao/busca/centro-universitario-senac\n"
+ "https://www.catho.com.br/educacao/busca/centro-universitario-senac/\n\n"
+ "// without term but with location\n"
+ "https://www.catho.com.br/educacao/busca/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca/em-sao-paolo/\n\n"
+ "// without term but with provider & location\n"
+ "https://www.catho.com.br/educacao/busca/centro-universitario-senac/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca/centro-universitario-senac/em-sao-paolo/\n\n"
+ "###################\n\n"
+ "// with term & provider\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/centro-universitario-senac\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/centro-universitario-senac/\n\n"
+ "// with term & location\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/em-sao-paolo/\n\n"
+ "// with term & provider & location\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/centro-universitario-senac/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca-design-industrial/centro-universitario-senac/em-sao-paolo/\n\n"
+ "###################\n\n"
+ "// with match & provider\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/centro-universitario-senac\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/centro-universitario-senac/\n\n"
+ "// with match & location\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/em-sao-paolo/\n\n"
+ "// with match & provider & location\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/centro-universitario-senac/em-sao-paolo\n"
+ "https://www.catho.com.br/educacao/busca-curso-design-industrial/centro-universitario-senac/em-sao-paolo/";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html