import re
regex = re.compile(r"(?i)(%(25){0,}20|\s)*(%(25){0,}24|\$)(%(25){0,}20|\s)*(%(25){0,}7B|{){0,1}(%(25){0,}20|\s)*(%(25){0,}(6A|4A)|J)(%(25){0,}(6E|4E)|N)(%(25){0,}(64|44)|D)(%(25){0,}(69|49)|I)(%(25){0,}20|\s)*(%(25){0,}3A|:)[\w\%]+(%(25){1,}3A|:)(%(25){1,}2F|\/)[^\n]+", flags=re.MULTILINE)
test_str = ("${jndi:ldap://\n"
"${jndi:ldaps:/}\n"
"${jndi:rmi:/blarg\n"
"${jndi:dns://\n"
"${jndi:nis://\n"
"${jndi:iiop://\n"
"${jndi:corba://\n"
"${jndi:nds://\n"
"${jndi:http://\n"
"$jndi:https://\n"
" $ { JNDI :ANYPROTOCOL://\n"
" $ { JNDI :ANYPROTOCOL://\n"
"${jNDi:l%252564ap:/ \n\n"
"Breakdown:\n"
"(?i) = case-insensitive\n"
"(%(25){0,}20|\\s)* = any number of spaces\n"
"(%(25){0,}24|\\$) = $\n"
"(%(25){0,}20|\\s)*\n"
"(%(25){0,}7B|{){0,1} = { zero or one time *Updated condition\n"
"(%(25){0,}20|\\s)*\n"
"(%(25){0,}(6A|4A)|J) = J\n"
"(%(25){0,}(6E|4E)|N) = N\n"
"(%(25){0,}(64|44)|D) = D\n"
"(%(25){0,}(69|49)|I) =I\n"
"(%(25){0,}20|\\s)*\n"
"(%(25){0,}3A|:) = :\n"
"[\\w\\%]+ = any number of any letters, url encoded or not\n"
"(%(25){0,}3A|:)\n"
"(%(25){0,}2F|\\/) = /\n"
"[^\\n]+ = until end of line\n\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html