Regular Expressions 101

Community Patterns

Add missing ClassVar type annotations to arbitrarily nested Python class attributes with existing annotations.

0

Regular Expression
Python

r"
(?P<preceeding>^(?P<whitespace>(?:[ ]{4}|\t)+)(?!self\.)(?P<attrprefix>[a-z0-9_\.]{1,})?(?P<clsattrname>[A-Z_]{1,}): )(?P<annotation>(?!ClassVar)(?:[A-Za-z0-9_]+\.*)+(?:(?:(?:\[?[A-Za-z0-9_]+(?:, | \| )?)+(?:\.{3})?)*(?:\]*))*)+(?P<remainder>.*?$[\r\n])
"
gm

Description

Assuming class variables are defined in SCREAMING_SNAKE_CASE, this regex searches for class attributes which do not yet have a ClassVar annotation from the typing module. This can be useful for adding missing class variables by replacing the group named 'annotation' with ClassVar[${annotation}]. I.e. ${preceeding}ClassVar[${annotation}]${remainder}

WARNINGS - Top-level functions and local constants are capture as well if they are written in SCREAMING_SNAKE style and have a preceding indent! - It doesn't cover all edge-cases by any means, so save/stash your work before mass-applying, or otherwise apply one-by-one (not ideal but faster than replacing by hand). Notes: - Unannotated variables are ignored - In PyCharm IDE regex replace; replace occurences of ?P< with ?<

Assumes: - Class variables are named according to SCREAMING_SNAKE convention. - Functions and attributes are named according to underscored_lowercase convention. - Indent -> 4 spaces or 1 tab per level.

Example:

# Regex:
regex_str = r"(?P<preceeding>^(?P<whitespace>(?:[ ]{4}|\t)+)(?!self\.)(?P<attrprefix>[a-z0-9_\.]{1,})?(?P<clsattrname>[A-Z_]{1,}): )(?P<annotation>(?!ClassVar)(?:[A-Za-z0-9_]+\.*)+(?:(?:(?:\[?[A-Za-z0-9_]+(?:, | \| )?)+(?:\.{3})?)*(?:\]*))*)+(?P<remainder>.*?$[\r\n])"

#pycharm_compatible_version = (?<preceeding>^(?<whitespace>(?:[ ]{4}|\t)+)(?!self\.)(?<attrprefix>[a-z0-9_\.]{1,})?(?<clsattrname>[A-Z_]{1,}): )(?<annotation>(?!ClassVar)(?:[A-Za-z0-9_]+\.*)+(?:(?:(?:\[?[A-Za-z0-9_]+(?:, | \| )?)+(?:\.{3})?)*(?:\]*))*)+(?<remainder>.*?$[\r\n])

# WARNINGS
# - Top-level functions and local constants are capture as well if they are written in SCREAMING_SNAKE style!
#Assumes:
# - Class variables are named according to SCREAMING_SNAKE convention.
# - Functions and attributes are named according to underscored_lowercase convention.
# - Indent -> 4 spaces or 1 tab per level.
# Notes:
# - Unannotated variables are skipped
# - In PyCharm IDE regex replace; replace occurences of ?P< with ?<
from __future__ import annotations
import pathlib, os
from typing import Union, Sequence
class ParentClass:
    pass
class SomeMixin:
    pass

TOPLEVEL_CONSTANT: str = 'nomatch' # No match

class SomeClass(ParentClass, SomeMixin):
    FOO_CLS_ATTR: Union[str, int] = 'foo'  # Regex should match
    BAR_CLS_ATTR: Sequence[tuple[int, ...]] = ((1, 2), (3, 4)) # Regex should match
    ANNOTATION_AS_ATTR_ATTR: pathlib.Path = Path(os.getenv("HOME")) # Regex should match
    FIZZ_CLS_ATTR: ClassVar[bool] = True  # No match
    BUZZ_CLS_ATTR = 10 # No match (not annotated)
    instance_attribute: float = 1337.420 # No match

    class NestedClass:
        NESTED_ATTR: str = 'nested_attribute'
        
        @classmethod
        def nested_method(cls):
            cls.NESTED_ATTR: str = 'should_match' # Regex should match
            print("Nested lvl 1!")
            def arbitrarilyy_nested_func(obj):
                obj.a.b.c.d.ARBITRARILY_NESTED_ATTR: str = 'nested' # Regex should match

    @classmethod
    def some_method(cls):
        cls.FOO_CLS_ATTR: str = 'foo' # Regex should match
        cls.instance_attribute.OTHER_CLASATTR: str = 'foo' # Regex should match
        print(cls.FOO_CLS_ATTR) # No Match

def toplevel_func(arg1: int, CONST_ARG: int = 10) -> int: # No match
    MULTIPLIER: float = 6.9
    # ^^ Warning! Constants in top-level functions are captured as well! 
    # ^^ Python doesn't support subpatterns AFAIK.
    return arg1*MULTIPLIER

### --> after replacing by ${preceeding}ClassVar[${annotation}]${remainder} ###

class SomeClass(ParentClass, SomeMixin):
    FOO_CLS_ATTR: ClassVar[str] = 'foo'  # Regex matches
    BAR_CLS_ATTR: ClassVar[Sequence[int]] = (1, 2, 3)
    FIZZ_CLS_ATTR: ClassVar[bool] = True
    BUZZ_CLS_ATTR = 10 # No match (not annotated)
    instance_attribute: float = 1337.420

    class NestedClass:
        NESTED_ATTR: ClassVar[str] = 'nested_attribute'
        
        @classmethod
        def nested_method(cls):
            cls.NESTED_ATTR: ClassVar[str] = 'should_match'
            print("Nested lvl 1!")
            def arbitrarilyy_nested_func(obj):
                obj.a.b.c.d.ARBITRARILY_NESTED_ATTR: ClassVar[str] = 'nested'

    @classmethod
    def some_method(cls):
        cls.FOO_CLS_ATTR: ClassVar[str] = 'foo' # Regex matches
        cls.instance_attribute.OTHER_CLASATTR: ClassVar[str] = 'foo'
        print(cls.FOO_CLS_ATTR) # No Match

def toplevel_func(arg1: int, CONST_ARG: int = 10) -> int
    MULTIPLIER: ClassVar[float] = 6.9 
    # ^^ ERRONEOUSLY ADJUSTED...
    # ^^ Warning! Constants in top-level functions are captured as well! 
    # ^^ Python doesn't support subpatterns AFAIK.
    return arg1*MULTIPLIER
Submitted by jrbergen - 3 years ago