use strict;
my $str = '# Should match
- [ ] Some task
- [ ] Some task | [[link]]
- [ ] Some task ^abcdef
- [ ] Some task | [[link]] ^abcdef
- [ ] ! Some task
- [ ] ! Some task | [[link]]
- [ ] ! Some task ^abcdef
- [ ] ! Some task | [[link]] ^abcdef
- [ ] Task one | [ ] ! Task two | [ ] Task three ^abcdef
| Tracker | Task | Backlog |
| ----------: | :---------------------- | :------- |
| 00:00-00:00 | [ ] Task item | [[linK]] |
| 00:00-00:00 | [ ] Task item ^abcdef | [[link]] |
| 00:00-00:00 | [ ] [[task-item]] | [[link]] |
| 00:00-00:00 | [ ] ! Task item | [[linK]] |
| 00:00-00:00 | [ ] ! Task item ^abcdef | [[link]] |
| 00:00-00:00 | [ ] ! [[task-item]] | [[link]] |
# Should not match
- [ ]
- [ ]
- [ ]
- [ ] !
- [ ] !
- [ ] !
| Tracker | Task | Backlog |
| ----------: | :---------------------- | :------- |
| 00:00-00:00 | [ ] | [[linK]] |
| 00:00-00:00 | [ ] ! | [[linK]] |
# Desired outcome
- group `notation`:
- match: `[` and `]`
- group `symbol`:
- match: any single character (e.g., `\\s`) between `[` and `]`
- group `modifier`:
- match: `!`, `?`, or `*` that follows after `[ ]`
- group `text`:
- match: task text after `[ ]` without modifier present
- group `textmod`:
- match: task text after `[ ] !` with modifier present
';
my $regex = qr/(?<= \s )
# Match opening braket (i.e., `[`).
(?<g1> \[)
# Match any single character (e.g., `x`).
(?<g2> .)
# Matching closing braket (i.e., `]`)
(?<g1> \])
(?= \s* [?!*]? \s* )
# Exclude entries without text (i.e., incl. in tables).
(?!
\s* [?!*]? \s* \|
|
\s* [?!*]? \s* $
)
# Match the text (i.e., capture based on modifier presence).
(?:
# Match modifier (i.e., `!`, `?`, or `*`) and the text that follows.
\s* (?<g3>[!?*]) \s* (?<g4>.*?)
|
# Match the text that does not follow a modifier.
\s* (?! [!?*]) \s* (?<g5>.*?)
)
# Match until either of the stops that follow are met.
(?= \s+ \^[a-z0-9]{6,} | \s+ \| | \s*$)/mxp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html