import re
regex = re.compile(r"((?:if|for|while)\s+?[^:]*?:\n)|(else\s*?:\n)|(else\s+?if[^:]*?:\n)")
test_str = ("import datetime\n"
"import difflib\n"
"import hashlib\n"
"import random\n"
"import re\n\n"
"from annoying.functions import get_object_or_None\n"
"from django.conf import settings\n"
"from django.contrib.auth.models import User\n"
"from django.db import models\n"
"from markdown_deux import markdown\n"
"from pygments import highlight\n"
"from pygments.formatters import HtmlFormatter\n"
"from pygments.lexers import get_lexer_by_name\n"
"from pygments.util import ClassNotFound\n"
"from snipts.utils import slugify_uniquely\n"
"from taggit.managers import TaggableManager\n"
"from taggit.utils import edit_string_for_tags\n"
"from teams.models import Team\n\n\n"
"class Snipt(models.Model):\n"
" \"\"\"An individual Snipt.\"\"\"\n\n"
" user = models.ForeignKey(User, blank=True, null=True, on_delete=models.CASCADE)\n"
" last_user_saved = models.ForeignKey(\n"
" User,\n"
" blank=True,\n"
" null=True,\n"
" related_name=\"last_user_saved\",\n"
" on_delete=models.CASCADE,\n"
" )\n\n"
" title = models.CharField(max_length=255, blank=True, null=True, default=\"Untitled\")\n"
" slug = models.SlugField(max_length=255, blank=True)\n"
" custom_slug = models.SlugField(max_length=255, blank=True)\n"
" tags = TaggableManager()\n\n"
" lexer = models.CharField(max_length=50)\n"
" code = models.TextField()\n"
" meta = models.TextField(blank=True, null=True)\n"
" description = models.TextField(blank=True, null=True)\n"
" stylized = models.TextField(blank=True, null=True)\n"
" stylized_min = models.TextField(blank=True, null=True)\n"
" embedded = models.TextField(blank=True, null=True)\n"
" line_count = models.IntegerField(blank=True, null=True, default=None)\n\n"
" key = models.CharField(max_length=100, blank=True, null=True)\n"
" public = models.BooleanField(default=False)\n"
" secure = models.BooleanField(default=False)\n"
" blog_post = models.BooleanField(default=False)\n\n"
" views = models.IntegerField(default=0)\n\n"
" created = models.DateTimeField(auto_now_add=True, editable=False)\n"
" modified = models.DateTimeField(auto_now=True, editable=False)\n"
" publish_date = models.DateTimeField(blank=True, null=True)\n\n"
" def _unidiff_output(self, expected, actual):\n"
" expected = expected.splitlines(1)\n"
" actual = actual.splitlines(1)\n\n"
" diff = difflib.unified_diff(expected, actual)\n\n"
" return \"\".join(diff)\n\n"
" def __init__(self, *args, **kwargs):\n"
" super(Snipt, self).__init__(*args, **kwargs)\n"
" self.original_code = self.code\n\n"
" def save(self, *args, **kwargs):\n\n"
" if not self.slug:\n"
" self.slug = slugify_uniquely(self.title, Snipt)\n\n"
" if not self.key:\n"
" self.key = hashlib.md5(\n"
" (\n"
" self.slug + str(datetime.datetime.now()) + str(random.random())\n"
" ).encode(\"utf-8\")\n"
" ).hexdigest()\n\n"
" if self.lexer == \"markdown\":\n"
" self.stylized = markdown(self.code, \"default\")\n\n"
" # Snipt embeds\n"
" for match in re.findall('\\[\\[(\\w{32})\\]\\]', self.stylized):\n"
" self.stylized = self.stylized.replace('[[' + str(match) + ']]',\n"
" \"\"\"\n"
" <script type=\"text/javascript\"\n"
" src=\"https://snipt.net/embed/{}/?snipt\">\n"
" </script>\n"
" <div id=\"snipt-embed-{}\"></div>\"\"\".format(\n"
" match, match\n"
" ),\n"
" )\n\n"
" # YouTube embeds\n"
" for match in re.findall(\n"
" \"\\[\\[youtube-(\\w{11})\\-(\\d+)x(\\d+)\\]\\]\", self.stylized\n"
" ):\n"
" self.stylized = self.stylized.replace(\n"
" \"[[youtube-{}-{}x{}]]\".format(\n"
" str(match[0]), str(match[1]), str(match[2])\n"
" ),\n"
" \"\"\"<iframe width=\"{}\" height=\"{}\"\n"
" src=\"https://www.youtube.com/embed/{}\"\n"
" frameborder=\"0\" allowfullscreen></iframe>\"\"\".format(\n"
" match[1], match[2], match[0]\n"
" ),\n"
" )\n\n"
" # Vimeo embeds\n"
" for match in re.findall(\"\\[\\[vimeo-(\\d+)\\-(\\d+)x(\\d+)\\]\\]\", self.stylized):\n"
" self.stylized = self.stylized.replace(\n"
" \"[[vimeo-{}-{}x{}]]\".format(\n"
" str(match[0]), str(match[1]), str(match[2])\n"
" ),\n"
" \"\"\"<iframe src=\"https://player.vimeo.com/video/{}\"\n"
" width=\"{}\" height=\"{}\" frameborder=\"0\"\n"
" webkitAllowFullScreen mozallowfullscreen\n"
" allowFullScreen></iframe>\"\"\".format(\n"
" match[0], match[1], match[2]\n"
" ),\n"
" )\n\n"
" # Tweet embeds\n"
" for match in re.findall(\"\\[\\[tweet-(\\d+)\\]\\]\", self.stylized):\n"
" self.stylized = self.stylized.replace(\n"
" \"[[tweet-{}]]\".format(str(match)),\n"
" '<div class=\"embedded-tweet\" data-tweet-id=\"{}\"></div>'.format(\n"
" str(match)\n"
" ),\n"
" )\n\n"
" # Parse Snipt usernames\n"
" for match in re.findall('@(\\w+) ', self.stylized):\n\n"
" # Try and get the Snipt user by username.\n"
" user = get_object_or_None(User, username=match)\n\n"
" if user:\n"
" url = user.profile.get_user_profile_url()\n"
" self.stylized = self.stylized.replace(\n"
" \"@{} \".format(str(match)),\n"
" '<a href=\"{}\">@{}</a> '.format(url, match),\n"
" )\n\n"
" else:\n"
" self.stylized = highlight(\n"
" self.code,\n"
" get_lexer_by_name(self.lexer, encoding=\"UTF-8\"),\n"
" HtmlFormatter(\n"
" linenos=\"table\", anchorlinenos=True, lineanchors=\"L\", linespans=\"L\"\n"
" ),\n"
" )\n"
" self.line_count = len(self.code.split(\"\\n\"))\n\n"
" if self.lexer == \"markdown\":\n"
" lexer_for_embedded = \"text\"\n"
" else if asdgh:\n"
" asd\n"
" else:\n"
" lexer_for_embedded = self.lexer\n\n"
" embedded = highlight(\n"
" self.code,\n"
" get_lexer_by_name(lexer_for_embedded, encoding=\"UTF-8\"),\n"
" HtmlFormatter(\n"
" style=\"native\",\n"
" noclasses=True,\n"
" prestyles=\"\"\"\n"
" background-color: #1C1C1C;\n"
" border-radius: 5px;\n"
" color: #D0D0D0;\n"
" display: block;\n"
" font: 11px Monaco, monospace;\n"
" margin: 0;\n"
" overflow: auto;\n"
" padding: 15px;\n"
" -webkit-border-radius: 5px;\n"
" -moz-border-radius: 5px;\n"
" \"\"\",\n"
" ),\n"
" )\n"
" embedded = (\n"
" embedded.replace('\\\\\"', '\\\\\\\\\"')\n"
" .replace(\"'\", \"\\\\'\")\n"
" .replace(\"\\\\\", \"\\\\\\\\\")\n"
" .replace(\"background: #202020\", \"\")\n"
" )\n"
" self.embedded = embedded\n\n"
" snipt = super(Snipt, self).save(*args, **kwargs)\n\n"
" diff = self._unidiff_output(self.original_code or \"\", self.code)\n\n"
" if diff != \"\":\n"
" log_entry = SniptLogEntry(\n"
" user=self.last_user_saved, snipt=self, code=self.code, diff=diff\n"
" )\n"
" log_entry.save()\n\n"
" return snipt\n\n"
" def __unicode__(self):\n"
" return self.title\n\n"
" def favs(self):\n"
" return Favorite.objects.filter(snipt=self).count()\n\n"
" def get_stylized_min(self):\n"
" if self.stylized_min is None:\n"
" if self.lexer == \"markdown\":\n"
" self.stylized_min = markdown(self.code[:1000], \"default\")\n"
" else:\n"
" self.stylized_min = highlight(\n"
" self.code[:1000],\n"
" get_lexer_by_name(self.lexer, encoding=\"UTF-8\"),\n"
" HtmlFormatter(linenos=\"table\", linenospecial=1, lineanchors=\"line\"),\n"
" )\n"
" return self.stylized_min\n\n"
" def get_absolute_url(self):\n\n"
" if self.blog_post:\n"
" if self.user.profile.blog_domain:\n"
" return u\"http://{}/{}/\".format(\n"
" self.user.profile.blog_domain.split(\" \")[0], self.slug\n"
" )\n"
" else:\n"
" return u'https://{}.snipt.net/{}/'.format(\n"
" self.user.username.replace('_', '-'), self.slug)\n\n"
" if self.custom_slug:\n"
" return u\"/{}/\".format(self.custom_slug)\n\n"
" if self.public:\n"
" return u\"/{}/{}/\".format(self.user.username, self.slug)\n"
" else:\n"
" return u\"/{}/{}/?key={}\".format(self.user.username, self.slug, self.key)\n\n"
" def get_full_absolute_url(self):\n\n"
" if self.blog_post:\n"
" if self.user.profile.blog_domain:\n"
" return u\"http://{}/{}/\".format(\n"
" self.user.profile.blog_domain.split(\" \")[0], self.slug\n"
" )\n"
" else:\n"
" return u'https://{}.snipt.net/{}/'.format(\n"
" self.user.username, self.slug)\n\n"
" if self.public:\n"
" return u\"/{}/{}/\".format(self.user.username, self.slug)\n"
" else:\n"
" return u\"/{}/{}/?key={}\".format(self.user.username, self.slug, self.key)\n\n"
" def get_download_url(self):\n\n"
" try:\n"
" lexer_obj = get_lexer_by_name(self.lexer)\n"
" except ClassNotFound:\n"
" lexer_obj = None\n\n"
" if lexer_obj and lexer_obj.filenames:\n"
" filename = lexer_obj.filenames[0].replace(\"*\", self.slug)\n"
" else:\n"
" if self.lexer == \"markdown\":\n"
" filename = u\"{}.md\".format(self.slug)\n"
" else:\n"
" filename = u\"{}.txt\".format(self.slug)\n\n"
" return u\"/download/{}/{}\".format(self.key, filename)\n\n"
" def get_embed_url(self):\n\n"
" if settings.DEBUG:\n"
" root = 'http://local.snipt.net'\n"
" else:\n"
" root = 'https://snipt.net'\n\n"
" return \"{}/embed/{}/\".format(root, self.key)\n\n"
" def get_raw_url(self):\n"
" return \"/raw/{}/\".format(self.key)\n\n"
" @property\n"
" def sorted_tags(self):\n"
" return self.tags.all().order_by(\"name\")\n\n"
" @property\n"
" def tags_list(self):\n"
" return edit_string_for_tags(self.tags.all())\n\n"
" @property\n"
" def lexer_name(self):\n"
" if self.lexer == \"markdown\":\n"
" return \"Markdown\"\n"
" else:\n"
" return get_lexer_by_name(self.lexer).name\n\n"
" def is_authorized_user(self, user):\n"
" if self.user == user:\n"
" return True\n"
" if self.user.profile.is_a_team:\n"
" team = Team.objects.get(user=self.user, disabled=False)\n"
" return team.user_is_member(user)\n"
" return False\n\n\n"
"class SniptLogEntry(models.Model):\n"
" \"\"\"An individual log entry for a Snipt changeset.\"\"\"\n\n"
" user = models.ForeignKey(User, on_delete=models.CASCADE)\n"
" snipt = models.ForeignKey(Snipt, on_delete=models.CASCADE)\n\n"
" code = models.TextField()\n"
" diff = models.TextField()\n\n"
" created = models.DateTimeField(auto_now_add=True, editable=False)\n"
" modified = models.DateTimeField(auto_now=True, editable=False)\n\n"
" @property\n"
" def snipt_name(self):\n"
" return self.snipt.title or \"Untitled\"\n\n\n"
"class SniptSecureView(models.Model):\n"
" \"\"\"A single view to a secure snipt.\"\"\"\n\n"
" user = models.ForeignKey(User, on_delete=models.CASCADE)\n"
" snipt = models.ForeignKey(Snipt, on_delete=models.CASCADE)\n\n"
" created = models.DateTimeField(auto_now_add=True, editable=False)\n"
" modified = models.DateTimeField(auto_now=True, editable=False)\n\n"
" @property\n"
" def snipt_name(self):\n"
" return self.snipt.title or \"Untitled\"\n\n\n"
"class Favorite(models.Model):\n"
" snipt = models.ForeignKey(Snipt, on_delete=models.CASCADE)\n"
" user = models.ForeignKey(User, on_delete=models.CASCADE)\n\n"
" created = models.DateTimeField(auto_now_add=True, editable=False)\n"
" modified = models.DateTimeField(auto_now=True, editable=False)\n\n"
" def __unicode__(self):\n"
" return u\"{} favorited by {}\".format(self.snipt.title, self.user.username)\n\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html