import re
regex = re.compile(r"""
(?<= \s )
# Match opening braket (i.e., `[`).
( \[ )
# Match any single character (e.g., `x`).
( . )
# Matching closing braket (i.e., `]`)
( \] )
(?= \s* [?!*]? \s* )
# Exclude entries without text (i.e., incl. in tables).
(?!
\s* [?!*]? \s* \|
|
\s* [?!*]? \s* $
)
# Match the text (i.e., capture based on modifier presence).
(?:
# Match modifier (i.e., `!`, `?`, or `*`) and the text that follows.
\s* ( [!?*] ) \s* ( .*? )
|
# Match the text that does not follow a modifier.
\s* (?! [!?*]) \s* ( .*? )
)
# Match until either of the stops that follow are met.
(?= \s+ \^[a-z0-9]{6,} | \s+ \| | \s*$)
""", flags=re.MULTILINE | re.VERBOSE)
test_str = ("# Should match\n\n"
"- [ ] Some task\n"
"- [ ] Some task | [[link]]\n"
"- [ ] Some task ^abcdef\n"
"- [ ] Some task | [[link]] ^abcdef\n"
"- [ ] ! Some task\n"
"- [ ] ! Some task | [[link]]\n"
"- [ ] ! Some task ^abcdef\n"
"- [ ] ! Some task | [[link]] ^abcdef\n"
"- [ ] Task one | [ ] ! Task two | [ ] Task three ^abcdef\n\n"
"| Tracker | Task | Backlog |\n"
"| ----------: | :---------------------- | :------- |\n"
"| 00:00-00:00 | [ ] Task item | [[linK]] |\n"
"| 00:00-00:00 | [ ] Task item ^abcdef | [[link]] |\n"
"| 00:00-00:00 | [ ] [[task-item]] | [[link]] |\n"
"| 00:00-00:00 | [ ] ! Task item | [[linK]] |\n"
"| 00:00-00:00 | [ ] ! Task item ^abcdef | [[link]] |\n"
"| 00:00-00:00 | [ ] ! [[task-item]] | [[link]] |\n\n"
"# Should not match\n\n"
"- [ ] \n"
"- [ ]\n"
"- [ ] \n"
"- [ ] ! \n"
"- [ ] !\n"
"- [ ] ! \n\n"
"| Tracker | Task | Backlog |\n"
"| ----------: | :---------------------- | :------- |\n"
"| 00:00-00:00 | [ ] | [[linK]] |\n"
"| 00:00-00:00 | [ ] ! | [[linK]] |\n\n"
"# Desired outcome\n\n"
"- group `notation`:\n"
" - match: `[` and `]`\n\n"
"- group `symbol`: \n"
" - match: any single character (e.g., `\\s`) between `[` and `]`\n\n"
"- group `modifier`:\n"
" - match: `!`, `?`, or `*` that follows after `[ ]`\n\n"
"- group `text`: \n"
" - match: task text after `[ ]` without modifier present\n\n"
"- group `textmod`: \n"
" - match: task text after `[ ] !` with modifier present\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html