import re
regex = re.compile(r"(^(?:List\sof\sFigures?|Figures?\sLegends?|Captions?|List)\s*:?(?:(?!^)[^\n]+|(?!\n{4,}|\n\w+\s*\w+\s*:?\s*$)\n|Fig)*)", flags=re.MULTILINE | re.IGNORECASE | re.DOTALL)
test_str = ("List of Figures:\n\n"
"Figure 1 Kalpakkam study area and locations of borewell and surface water sampling points. Low lying area on the western side is clearly delineated.\n\n"
"Figure 2 displays Local meteoric water line (in thick line) of Chennai region (Bhishm Kumar et al., 2010) and Evaporation line (in dashed line). The stable isotope (δD and δ18O) composition of borewell and surface water body samples are plotted to analyse their origin.\n\n"
"Not figures:\n\n"
"Figure 3 depicts the plot of salinity vs. δD for various borewell and surface water samples. It can be seen that borewell (freshwater) and lake water samples are clustered at the lower range of salinity and δD levels. Similarly, the saline water samples are clustered at the higher range of salinity and δD levels.\n\n"
"Figure 4 depicts the plot of salinity vs. δ18O for various borewell and surface water samples. It can be seen that borewell (freshwater) and lake water samples are clustered at the lower range of salinity and δ18O levels. Similarly, the saline water samples are clustered at the higher range of salinity and δ18O levels.\n\n"
"Not starting with figure")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html