import re
regex = re.compile(r"([0-9]+)\s+(\w+)\s+(\w+).*?\r?\n•\s+(.*?)\s+[0-9]+\s\|.*\s*")
test_str = ("1 el art the\n"
"• esa mujer era la mujer que yo quería ser\n"
"100 | 2037803\n\n"
"2 de prep of, from\n"
"• el hijo de un hermano mío\n"
"100 | 1319834\n\n"
"3 que conj that, which\n"
"• dice que no\n"
"100 | 662653")
subst = "\\1\\t\\2\\t\\3\\t\\4\\t\\n"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html