import re
regex = re.compile(r"[a-zA-Z]+[\s\,\-\&]*\(*(?!\d)[a-zA-Z\€\$]*\)*\s?", flags=re.MULTILINE)
test_str = ("I. CASH FLOW FROM OPERATING ACTIVITIES\n"
"Profit attributable to owners of the parent 1,908 4,790\n"
"Depreciation, amortization, impairment and provisions (1) 927 1,080\n"
"Share in profit (loss) from equity-accounted companies (net of dividends received) 13 (63) (110)\n"
"Change in fair value of currency and commodity derivatives (2) 26 81 (3,608)\n"
"Capital gains and losses on asset disposals (3) (364) (20)\n"
"Profit (loss) from discontinued operations and disposal gain (before tax) (110) (990)\n"
"Profit attributable to non-controlling interests 55 62\n"
"Other (4) 217 1,206\n"
"Cash flow from operations, before change in working capital 2,651 2,410\n"
"Change in inventories and work-in-progress 14 (347) (308)\n"
"Change in operating receivables and payables (5) 15, 23, 26 219 561\n"
"Change in other receivables and payables 15, 23 (40) 63\n"
"Change in working capital (168) 316\n"
"TOTAL I (6) 2,483 2,726\n"
"II. CASH FLOW USED IN INVESTING ACTIVITIES\n"
"Capitalization of R&D expenditure (7) 10 (364) (286)\n"
"Payments for the purchase of intangible assets, net of proceeds (8) (324) (262)\n"
"Payments for the purchase of property, plant and equipment, net of proceeds (9) (704) (740)\n"
"Payments arising from the acquisition of investments or businesses, net (810) (54)\n"
"Proceeds arising from the sale of investments or businesses, net 2 3,060\n"
"Proceeds (payments) arising from the sale (acquisition) of investments and loans (10) 5 (1,974)\n"
"TOTAL II (2,195) (256)\n"
"III. CASH FLOW FROM (USED IN) FINANCING ACTIVITIES\n"
"Change in share capital – owners of the parent - -\n"
"Change in share capital – non-controlling interests (9) (4)\n"
"Acquisitions and disposals of treasury shares 18.b (38) (449)\n"
"Repayment of borrowings and long-term debt 22 (73) (66)\n"
"Increase in borrowings 22 682 1,058\n"
"Change in repayable advances 21 (24) (25)\n"
"Change in short-term borrowings 22 10 449\n"
"Dividends and interim dividends paid to owners of the parent 18.e (612) (340)\n"
"Dividends paid to non-controlling interests (30) (32)\n"
"TOTAL III (94) 591\n"
"CASH FLOW FROM OPERATING ACTIVITIES OF DISCONTINUED OPERATIONS TOTAL IV 171 14\n"
"CASH FLOW USED IN INVESTING ACTIVITIES OF DISCONTINUED OPERATIONS TOTAL V (111) (52)\n"
"CASH FLOW USED IN FINANCING ACTIVITIES OF DISCONTINUED OPERATIONS TOTAL VI (8) (198)\n"
"EFFECT OF CHANGES IN FOREIGN EXCHANGE RATES TOTAL VII 15 (17)\n"
"NET INCREASE IN CASH AND CASH EQUIVALENTS I+II+III+IV+V+VI+VII 261 2,808\n"
"Cash and cash equivalents at beginning of period 1,659 1,926\n"
"Cash and cash equivalents of discontinued operations at beginning of period 186 180\n"
"Cash and cash equivalents at end of period 16 1,926 4,914\n"
"Cash and cash equivalents of discontinued operations at end of period 180 -\n"
"NET INCREASE IN CASH AND CASH EQUIVALENTS 261 2,808")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html