import re
regex = re.compile(r"0x[0-9a-fA-F]{8}[\t ]*(\S+)", flags=re.MULTILINE)
test_str = ("/ 260: fcn.004020b0 (int32_t arg_4h, int32_t arg_8h);\n"
"| ; var int32_t var_324h @ ebp-0x324\n"
"| ; arg int32_t arg_4h @ ebp+0x4\n"
"| ; arg int32_t arg_8h @ ebp+0x8\n"
"| 0x004020b0 55 push ebp\n"
"| 0x004020b1 8bec mov ebp, esp\n"
"| 0x004020b3 81ec24030000 sub esp, 0x324\n"
"| 0x004020b9 6a17 push 0x17 ; 23\n"
"| 0x004020bb ff151c304000 call dword [sym.imp.KERNEL32.dll_IsProcessorFeaturePresent] ; 0x40301c\n"
"| 0x004020c1 85c0 test eax, eax\n"
"| ,=< 0x004020c3 7407 je 0x4020cc\n"
"| | 0x004020c5 b902000000 mov ecx, 2\n"
"| | 0x004020ca cd29 int 0x29\n"
"| | ; CODE XREF from fcn.004020b0 @ 0x4020c3\n"
"| `-> 0x004020cc a340744000 mov dword [0x407440], eax ; [0x407440:4]=0\n"
"| 0x004020d1 890d3c744000 mov dword [0x40743c], ecx ; [0x40743c:4]=0\n"
"| 0x004020d7 891538744000 mov dword [0x407438], edx ; [0x407438:4]=0")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html