import re
regex = re.compile(r"(\$)([^$]+)(\$)", flags=re.MULTILINE)
test_str = ("\\documentclass{article}\n"
"%\\usepackage[T1]{fontenc}\n"
"%\\usepackage{textcomp}\n"
"%\\usepackage[math]{iwona}\n"
"\\usepackage[lf,minionint]{MinionPro}\n"
"\\usepackage{a4,bm}\n"
"%\\usepackage{dotlessj}\n"
"\\pagestyle{empty}\n"
"\\begin{document}\n"
"\\begin{center}\n"
"{\\Large{\\bfseries Test of the font {\\scshape MinionPro}.}}\\bigskip\\\\\n"
"{\\Large\\verb|\\usepackage[lf,minionint]{MinionPro}|}\n"
"%\\medskip\\\\\n"
"%{\\verb|\\usepackage[romanfamily=bright-osf]{lucimatx}|} if you want old-style figures (like this: \\oldstylenums{1695})\\\\\n"
"\\end{center}\n\n"
"The default math mode font is $Math\\ Italic$. This should not be\n"
"confused with ordinary \\emph{Text Italic} -- notice the different spacing\\,!\n"
"\\verb|\\mathbf| produces bold roman letters: $ \\mathbf{abcABC} $.\n"
"If you wish to embolden complete formulas,\n"
"use the \\verb|\\boldmath| command \\emph{before} going into math mode. \n"
"This changes the default math fonts to bold. \n"
" \n"
"\\begin{tabular}{ll}\n"
"\\texttt{normal} & $ x = 2\\pi \\Rightarrow x \\simeq 6.28 $\\\\\n"
"\\texttt{mathbf} & $\\mathbf{x} = 2\\pi \\Rightarrow \\mathbf{x} \\simeq 6.28 $\\\\\n"
"\\texttt{boldmath} & {\\boldmath $x = \\mathbf{2}\\pi \\Rightarrow x \n"
" \\simeq{\\mathbf{6.28}} $}\\\\\n"
"\\end{tabular}\n"
"\\smallskip\n\n"
"Greek is available in upper and lower case:\n"
"$\\alpha,\\beta \\dots \\Omega$, and there are special\n"
"symbols such as $ \\hbar$.\n"
"%The following letters should be upright: $\\upGamma, \\upDelta\\dots \\upOmega$.\n"
"Digits in formulas $1, 2, 3\\dots$ may differ from those in text: 1, 2, 3\\dots\n\n"
"There is a calligraphic alphabet \\verb|\\mathcal| for upper case letters\n"
"$ \\mathcal{ABCDE}\\dots $.\n"
"%and there are letters for number sets: $\\mathbb{A\\dots Z} $, which are produced using \\verb|\\mathbb|.\n\n"
"\\noindent\n"
"This font has both lining figures (13589, default) and oldstyle figures (\\oldstylenums{13589}, select with {\\tt$\\backslash$oldstylenums\\{..\\}}). \n"
"%{\\em \\swshape{I}t \\swshape{A}lso \\swshape{H}as \\swshape{S}wash \\swshape{I}talics} {\\tt$\\backslash$swshape\\{..\\}}\n"
"\\\\\n"
"{\\fontseries{c}\\selectfont there is also a condensed weight} {\\tt$\\backslash$fontseries\\{c\\}$\\backslash$selectfont}\n"
" \n"
"\\begin{equation}\n"
" \\phi(t)=\\frac{1}{\\sqrt{2\\pi}}\n"
" \\int^t_0 e^{-x^2/2} dx \n"
"\\end{equation}\n\n"
"\\begin{equation}\n"
" \\prod_{j\\geq 0}\n"
" \\left(\\sum_{k\\geq 0}a_{jk} z^k\\right) \n"
"= \\sum_{k\\geq 0} z^n\n"
" \\left( \\sum_{{k_0,k_1,\\ldots\\geq 0}\n"
" \\atop{k_0+k_1+\\ldots=n} }\n"
" a{_0k_0}a_{1k_1}\\ldots \\right) \n"
"\\end{equation}\n\n"
"\\begin{equation}\n"
"\\pi(n) = \\sum_{m=2}^{n}\n"
" \\left\\lfloor \\left(\\sum_{k=1}^{m-1}\n"
" \\lfloor(m/k)/\\lceil m/k\\rceil \n"
" \\rfloor \\right)^{-1}\n"
" \\right\\rfloor\n"
"\\end{equation}\n\n"
"\\begin{equation}\n"
"\\{\\underbrace{%\n"
" \\overbrace{\\mathstrut a,\\ldots,a}^{k\\ a's},\n"
" \\overbrace{\\mathstrut b,\\ldots,b}^{l\\ b's}}\n"
" _{k+1\\ \\mathrm{elements}} \\}\n"
"\\end{equation}\n\n"
"\\begin{displaymath}\n"
"\\mbox{W}^+\\\n"
"\\begin{array}{l}\n"
"\\nearrow\\raise5pt\\hbox{$\\mu^+ + \\nu_{\\mu}$}\\\\\n"
"\\rightarrow \\pi^+ +\\pi^0 \\\\[5pt]\n"
"\\rightarrow \\kappa^+ +\\pi^0 \\\\\n"
"\\searrow\\lower5pt\\hbox{$\\mathrm{e}^+ \n"
" +\\nu_{\\scriptstyle\\mathrm{e}}$}\n"
"\\end{array}\n"
"\\end{displaymath}\n\n"
"\\begin{displaymath}\n"
"\\frac{\\pm\n"
"\\left|\\begin{array}{ccc}\n"
"x_1-x_2 & y_1-y_2 & z_1-z_2 \\\\\n"
"l_1 & m_1 & n_1 \\\\\n"
"l_2 & m_2 & n_2\n"
"\\end{array}\\right|}{\n"
"\\sqrt{\\left|\\begin{array}{cc}l_1&m_1\\\\\n"
"l_2&m_2\\end{array}\\right|^2\n"
"+ \\left|\\begin{array}{cc}m_1&n_1\\\\\n"
"n_1&l_1\\end{array}\\right|^2\n"
"+ \\left|\\begin{array}{cc}m_2&n_2\\\\\n"
"n_2&l_2\\end{array}\\right|^2}}\n"
"\\end{displaymath}\n\n"
"text accents: \\`{a},\\'{a},\\\"{a},\\^{a}\n"
"may differ from math accents:\n"
"\\begin{displaymath}\n"
"\\mbox{ acute=}\\acute{a}\n"
"\\mbox{ grave=}\\grave{a}\n"
"\\mbox{ ddot=}\\ddot {a}\n"
"\\mbox{ tilde=}\\tilde{a}\n"
"\\mbox{ bar=}\\bar {a}\n"
"\\mbox{ breve=}\\breve{a}\n"
"\\mbox{ check=}\\check{a}\n"
"\\mbox{ hat=}\\hat {a}\n"
"\\mbox{ vec=}\\vec {a}\n"
"\\mbox{ dot=}\\dot {a}\n"
"\\end{displaymath}\n\n"
"dotlessi=\\i\\ \n"
"dotlessj=\\j\\ \n"
"dagger=$\\dagger$\\ \\ \\ \n"
"\\verb|\\bm{x}|\\ $\\bm{x}$ \n\n"
"%$\\hbar$ $\\hslash$\n"
"\\end{document}\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html