import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "^ \n"
+ "# Uncomment to allow prefix continuation (second letter of a dangling match)\n"
+ "# ([abcdefghiklmnoprstuvy\\ ])?\n"
+ "( \\\n"
+ " |(?: # Single-letter elements\n"
+ " [BCFHIKNOPSUVWY]\n"
+ " )\n"
+ " |(?: # Two-letter elements.\n"
+ " (?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])\n"
+ " | (?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])\n"
+ " | (?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])\n"
+ " | (?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])\n"
+ " | (?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr])\n"
+ " )\n"
+ " |(?: # Smushed trigrams. No need to go past trigrams because two twos make four\n"
+ " (?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))\n"
+ " |(?:E(?:ra|re|rg))\n"
+ " |(?:G(?:ag|al|am|ar|at|er))\n"
+ " |(?:L(?:ag|al|am|ar|at|ra|re|rg))\n"
+ " |(?:M(?:ga|gd|ge|ta|te|tl|tm))\n"
+ " |(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))\n"
+ " |(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))\n"
+ " |(?:X(?:er))\n"
+ " |(?:Z(?:ra|re|rg))\n"
+ " )\n"
+ "# repeated any number of times\n"
+ ")+\n"
+ "# Uncomment to allow last letter if it's a potential first letter of the next word\n"
+ "# ([ADEGLMRTXZ]\\ *)?\n"
+ "$";
final String string = "**(See bottom for Javascript/one-line version)**\n"
+ "If you want continuation -- to look for \"fez\" \"rave\" \"rites\" chains, strike the second and second-to-last lines. Latex needs a suffix and ear needs a prefix.\n"
+ "latex\n"
+ "ear\n"
+ "latexear\n"
+ "fez\n"
+ "rave\n"
+ "rites\n"
+ "fezraverites\n"
+ "if you don't want overlap, comment out the big group with (?<=..) tests. Walter and lag will not match: lag is La + Ag but can't be L+Ag or La+G; walter is W+Al+Te+Er but there's no single A, L, T, E or R to make it a strict chain of elements.\n"
+ "Walter\n"
+ "aga\n"
+ "McLvinandfezrobHogwartsWizrdsofMoney\n"
+ "## Match:\n"
+ "McLvIn\n"
+ "McLvinandfezrobHogwartsWizrdsofMoney\n"
+ "zr\n"
+ "McLvinand\n"
+ "ergo\n"
+ "Orgasmicallabkitscashflow\n"
+ "update\n"
+ "Organicfurbies\n"
+ "Babkes\n"
+ "WalterWhite\n"
+ "That\n"
+ "ibexesnogladybirds\n"
+ "Xenophobic\n"
+ "picnicforgus\n"
+ "snapes\n"
+ "siriusblack\n\n\n"
+ "## match until last letter, and last letter potentially starts a new word (allows continuation)\n"
+ "fez\n"
+ "McLvinandfez\n"
+ "bobatea\n\n"
+ "# La + At + Te\n"
+ "late\n"
+ "# the x awaits an e in the next word\n"
+ "latex\n"
+ "# N + Nd + Dy + Y + Yb\n"
+ "ndyb\n"
+ "# the a is a potential continuation\n"
+ "andy\n"
+ "# ...but the x can't be a second letter\n"
+ "xndyb\n\n"
+ "## Don't Match at some interior point in line\n"
+ "Jemmamead\n"
+ "## Don't Match anywhere in line\n"
+ "JemmaQmead\n\n"
+ "### For Javascript:\n\n"
+ "^([abcdefghiklmnoprstuvy\\s])?(\\ |(?:[BCFHIKNOPSUVWY])|(?:(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])|(?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])|(?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])|(?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])|(?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr]))|(?:(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))|(?:E(?:ra|re|rg))|(?:G(?:ag|al|am|ar|at|er))|(?:L(?:ag|al|am|ar|at|ra|re|rg))|(?:M(?:ga|gd|ge|ta|te|tl|tm))|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))|(?:X(?:er))|(?:Z(?:ra|re|rg))))+([ADEGLMRTXZ]\\ *)?$";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.COMMENTS);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html