# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
(?P<everything>
(?:^|[\ \t\f!\"\#$%&'()*+,:;<=>?@\[\]^_`{|}~])(?!\/\/)[\w\.-]*?
# URL path
(?P<subreddit>
# </r/subreddit>
(?:\/?(?<!\w)
r\/[\w-]+\b(?<!\/pcmasterrace))
|
# </tb>
(?:\/tb)
)
# </comments>/POSTID
(?P<comments>
(?:\/comments)?)??
# Start the Post ID detection, and testing for whether or not it should match a shortlink
(?P<postID>\/\w{2,7}\b(?<!\/46ijrl)(?<!\/wiki))
# Match rest of URL for debugging
[^\s\r\n\)]*
)
"""
test_str = ("DO NOT MATCH\n"
"](/fds \n\n"
"[fdasf](/sw\n"
"fjsdalr/CHAN5/comments/2qjq7j/test_post/\n\n"
"MATCH\n"
"http://reddit.com/r/CHAN5/comments/2qjq7j/test_post/\n"
"np.reddit.com/r/CHAN5/comments/2qjq7j/test_post/\n"
"reddit.com/r/CHAN5/comments/2qjq7j/test_post/\n"
"/r/CHAN5/comments/2qjq7j/test_post/\n"
"r/CHAN5/comments/2qjq7j/test_post/\n"
"r/CHAN5/comments/2qjq7j/test_post/\n\n"
"[r/CHAN5/comments/2qjq7j/test_post/\n"
"<r/CHAN5/comments/2qjq7j/test_post/\n"
":/r/CHAN5/comments/2qjq7j/test_post/\n\n"
"DO NOT MATCH\n"
"/2qjq7j\n\n"
"MATCH\n"
"[Full link](http://reddit.com/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Full link](https://reddit.com/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Full with subdomain](//np.reddit.com/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Full with two-part subdomain](//np-dk.reddit.com/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Full without www](//reddit.com/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Without reddit.com](/r/CHAN5/comments/2qjq7j/test_post/)\n"
"[Without subreddit](/comments/2qjq7j)\n"
"[Ultrashortlink](/2qjq7j)\n"
"[Subreddit and post ID (no /comments)](/r/CHAN5/2qjq7j)\n"
"[Redd.it](//redd.it/2qjq7j)\n"
"[Reddit.com/tb](//reddit.com/tb/2qjq7j)\n"
"[Redd.it full link](//redd.it/r/CHAN5/comments/2qjq7j)\n"
"[subreddit without comments](/r/chan5/2qjq7j)\n"
"[weird other syntax](https://www.reddit.com/comments/3ovgmw/_/cw1h7jn)\n"
"[weird other syntax](https://www.reddit.com/comments/3ovgmw/)\n"
"[weird other syntax](https://www.reddit.com/comments/3ovgmw/ \"test\")\n"
"[weird other syntax](https://www.reddit.com/comments/3ovgmw?context=3)\n"
"[Ultrashortlink w/ alt text](/2qjq7j \"fdsfa\")\n\n"
"[Deprecated toolbar link that still works for some reason](https://www.reddit.com/tb/1y70ej)\n\n"
"https://www.reddit.com/r/SubredditSimulatorMeta/comments/4ksecc/the_anteaters_tongue_is_fully_extended_it_can_be/\n"
"DO NOT MATCH\n"
"[PCMR](https://reddit.com/r/pcmasterrace)\n"
"https://www.reddit.com/r/pcmasterrace/comments/4d9a43/campaign_to_bring_rock_band_4_to_pc_enters_its/\n"
"https://www.reddit.com/r/pcmasterrace/comments/4d9a43/campaign_to_bring_rock_band_4_to_pc_enters_its/d1owcop\n"
"https://www.reddit.com/r/pcmasterrace/comments/4d9a43/campaign_to_bring_rock_band_4_to_pc_enters_its/d1oz6if?context=10000\n"
"https://i.redd.it/vp2hsd4za4zw.jpg \n"
"[Image link](//i.redd.it/vp2hsd4za4zw.jpg)\n"
"https://www.reddit.com/gold/about/\n"
"https://www.reddit.com/r/SubredditSimulator/comments/4ksecc/the_anteaters_tongue_is_fully_extended_it_can_be/\n\n"
"MATCH (not the first or the non-Reddit link)\n"
"If you're looking for more information about this campaign, here are some useful resources.\n\n"
"- [The Harmonix AMA in /r/PCMasterRace](https://www.reddit.com/r/pcmasterrace/comments/4cqxz9/we_are_harmonix_and_only_you_can_help_us_bring/) Tons of questions and answers here.\n"
"- [A recent interview with HMXJosh that covers many of the questions asked in the AMA in different levels of detail](https://www.reddit.com/r/Rockband/comments/4co7ta/my_interview_with_josh_from_harmonix_about_rock/)\n"
"- [The original announcement thread in /r/RockBand](https://www.reddit.com/r/Rockband/comments/48heg7/rock_band_4_on_pc_gasp_crowdfunding_campaign_is/)\n"
"- [A really informative Update on the Fig page specifically abou tthe quality of the PC port](https://www.fig.co/campaigns/rock-band-4-pc?update=97#updates)\n"
"- [A collection of HMXJosh posts from the last few days](https://www.reddit.com/r/Rockband/comments/4cr3u3/wondering_about_exports_or_other_hot_topics_heres/)\n\n\n\n"
"The biggest draw, in my opinion, is the return of the Rock Band Network. Basically this means user-generated content for sale, and (most likely) free customs as well. RBN on the Xbox 360 brought all kinds of lesser known artists and genres to the game and before long there were many people (myself included) who almost exclusively purchased DLC from there instead of the official store. RBN offered double bass charts for drummers, which is something we've never seen in official DLC either. So the prospect of getting this back is, for me, pretty huge.\n\n"
"On top of this, PC as a platform offers better peripheral support, more powerful graphics options, and an opportunity for a really efficient UI in places where controllers are sub-optimal (eg, searching your song list). ")
matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE | re.VERBOSE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html