import re
regex = re.compile(r"(π¨)(?:([π»πΌπ½πΎπΏ])?(\x{200D}[π¦°π¦±π¦³π¦²])?\x{FE0F}?)?", flags=re.MULTILINE)
test_str = ("Codepoints\n\n"
" π¨ U+1F468\n\n"
"Shortcodes\n\n"
" :man:\n\n"
"Related\n\n"
" π¨ Man\n"
" π¨π» Man: Light Skin Tone\n"
" π¨πΌ Man: Medium-Light Skin Tone\n"
" π¨π½ Man: Medium Skin Tone\n"
" π¨πΎ Man: Medium-Dark Skin Tone\n"
" π¨πΏ Man: Dark Skin Tone\n"
" π© Woman\n"
" π©π» Woman: Light Skin Tone\n"
" π©πΌ Woman: Medium-Light Skin Tone\n"
" π©π½ Woman: Medium Skin Tone\n"
" π©πΎ Woman: Medium-Dark Skin Tone\n"
" π©πΏ Woman: Dark Skin Tone\n\n\n"
"See also\n\n"
" π§ Man: Beard\n"
" π§π» Man: Light Skin Tone, Beard\n"
" π§πΌ Man: Medium-Light Skin Tone, Beard\n"
" π§π½ Man: Medium Skin Tone, Beard\n"
" π§πΎ Man: Medium-Dark Skin Tone, Beard\n"
" π§πΏ Man: Dark Skin Tone, Beard\n\n"
" π¨β𦳠Man: White Hair\n"
" π¨π»β𦳠Man: Light Skin Tone, White Hair\n"
" π¨πΌβ𦳠Man: Medium-Light Skin Tone, White Hair\n"
" π¨π½β𦳠Man: Medium Skin Tone, White Hair\n"
" π¨πΎβ𦳠Man: Medium-Dark Skin Tone, White Hair\n"
" π¨πΏβ𦳠Man: Dark Skin Tone, White Hair\n"
" π©β𦳠Woman: White Hair\n"
" π©π»β𦳠Woman: Light Skin Tone, White Hair\n"
" π©πΌβ𦳠Woman: Medium-Light Skin Tone, White Hair\n"
" π©π½β𦳠Woman: Medium Skin Tone, White Hair\n"
" π©πΎβ𦳠Woman: Medium-Dark Skin Tone, White Hair\n"
" π©πΏβ𦳠Woman: Dark Skin Tone, White Hair\n\n"
" π¨βπ¦° Man: Red Hair\n"
" π¨π»βπ¦° Man: Light Skin Tone, Red Hair\n"
" π¨πΌβπ¦° Man: Medium-Light Skin Tone, Red Hair\n"
" π¨π½βπ¦° Man: Medium Skin Tone, Red Hair\n"
" π¨πΎβπ¦° Man: Medium-Dark Skin Tone, Red Hair\n"
" π¨πΏβπ¦° Man: Dark Skin Tone, Red Hair\n"
" π©βπ¦° Woman: Red Hair\n"
" π©π»βπ¦° Woman: Light Skin Tone, Red Hair\n"
" π©πΌβπ¦° Woman: Medium-Light Skin Tone, Red Hair\n"
" π©π½βπ¦° Woman: Medium Skin Tone, Red Hair\n"
" π©πΎβπ¦° Woman: Medium-Dark Skin Tone, Red Hair\n"
" π©πΏβπ¦° Woman: Dark Skin Tone, Red Hair\n\n"
" π¦ Boy\n"
" β Male Sign\n"
" πΊ Man Dancing\n"
" π² Man With Chinese Cap\n"
" π΄ Man in Suit Levitating\n"
" π€΅ Man in Tuxedo\n"
" π΄ Old Man\n"
" π΅ Old Woman\n"
" π
Santa Claus")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html