import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = " *(?:.*\\S.*\\n)+\\n? *: +(?:.|\\n)+?(?:\\n{2,}(?=\\S)(?! *: +)(?! *(?:\\S.*\\n)+?\\n? *: +)|$)";
final String string = "=== Single <dt> and <dd>\n\n"
+ "GitHub\n"
+ ": a site where over 73 million developers shape the future of software, together\n\n"
+ "=== Multiple <dt>-<dd> group\n\n"
+ "Firefox\n"
+ ": a free, open source, cross-platform, graphical web browser developed by the Mozilla Corporation and hundreds of volunteers\n\n"
+ "MDN\n"
+ ": provides information about Open Web technologies including HTML, CSS, and APIs for both Web sites and progressive web apps\n\n"
+ "=== Single <dt> with multiple <dd>s\n\n"
+ "Apple\n"
+ " : an American multinational technology company that specializes in consumer electronics, computer software and online services.\n"
+ " : an edible fruit produced by an apple tree\n\n"
+ "=== Multiple <dt>s\n\n"
+ "Windows\n"
+ "Microsoft Windows\n"
+ "Win\n"
+ ": a group of several proprietary graphical operating system families, all of which are developed and marketed by Microsoft\n\n"
+ "=== separated by blank lines\n\n"
+ "Solaris\n\n"
+ ": a 1961 science fiction novel by Polish writer Stanisław Lem\n\n\n\n"
+ ": a proprietary Unix operating system originally developed by Sun Microsystems\n\n"
+ "=== <dd>s with inline breaks\n\n"
+ "Oracle\n"
+ ": an American multinational computer technology\n"
+ "corporation headquartered in Austin, Texas\n\n"
+ ": the priest or priestess uttering the prediction,\n"
+ "may also refer to the site of the oracle\n\n"
+ "===\n\n"
+ "😂\n"
+ ": 😂 face with tears of joy\n\n"
+ "===\n\n"
+ "Term 1\n\n"
+ ": This is a definition with two paragraphs. Lorem ipsum \n"
+ " dolor sit amet, consectetuer adipiscing elit. Aliquam \n"
+ " hendrerit mi posuere lectus.\n\n"
+ " Vestibulum enim wisi, viverra nec, fringilla in, laoreet\n"
+ " vitae, risus.\n\n"
+ ": Second definition for term 1, also wrapped in a paragraph\n"
+ " because of the blank line preceding it.\n\n"
+ "Term 2\n\n"
+ ": This definition has a code block, a blockquote and a list.\n\n"
+ " code block.\n\n"
+ " > block quote\n"
+ " > on two lines.\n\n"
+ " 1. first list item\n"
+ " 2. second list item";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html