import re
regex = re.compile(r"^(?:(?:https?:)?\/\/)?(?:(?:(?:www|m(?:usic)?)\.)?youtu(?:\.be|be\.com)\/(?:shorts\/|live\/|v\/|e(?:mbed)?\/|watch(?:\/|\?(?:\S+=\S+&)*v=)|oembed\?url=https?%3A\/\/(?:www|m(?:usic)?)\.youtube\.com\/watch\?(?:\S+=\S+&)*v%3D|attribution_link\?(?:\S+=\S+&)*u=(?:\/|%2F)watch(?:\?|%3F)v(?:=|%3D))?|www\.youtube-nocookie\.com\/embed\/)([\w-]{11})[\?&#]?\S*$", flags=re.MULTILINE)
test_str = (" various protocols (and lack of one):\n"
"https://www.youtube.com/watch?v=U9t-slLl30E\n"
"http://www.youtube.com/watch?v=U9t-slLl30E\n"
"//www.youtube.com/watch?v=U9t-slLl30E\n"
"www.youtube.com/watch?v=U9t-slLl30E\n\n"
" and for each protocol\n"
" various domains:\n"
"www.youtube.com/watch?v=U9t-slLl30E\n"
"m.youtube.com/watch?v=U9t-slLl30E\n"
"music.youtube.com/watch?v=OD3F7J2PeYU\n"
"youtube.com/watch?v=U9t-slLl30E\n"
"www.youtube-nocookie.com/embed/U9t-slLl30E\n"
"youtu.be/U9t-slLl30E\n\n"
" and for each domain (despite -nocookie)\n"
" various paths:\n"
"youtube.com/watch?v=U9t-slLl30E\n"
"youtube.com/watch/U9t-slLl30E\n"
"youtube.com/v/U9t-slLl30E\n"
"youtube.com/embed/U9t-slLl30E\n"
"youtube.com/e/U9t-slLl30E\n"
"youtube.com/live/9UMxZofMNbA\n"
"youtube.com/shorts/gOcxEMJSksg\n"
"youtube.com/oembed?url=http%3A//www.youtube.com/watch?v%3DU9t-slLl30E&format=json\n"
"youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DU9t-slLl30E%26feature%3Dshare\n"
"youtube.com/attribution_link?a=8g8kPrPIi-ecwIsS&u=/watch%3Fv%3DU9t-slLl30E%26feature%3Dem-uploademail\n\n"
" and for each path\n"
" various parameters:\n"
"youtube.com/watch?v=U9t-slLl30E\n"
"youtube.com/watch?v=U9t-slLl30E&feature=shared\n"
"youtube.com/watch?v=U9t-slLl30E&t=1m02s\n"
"youtube.com/watch?v=U9t-slLl30E&lc=UgyYsn3aIQWSA19Esi54AaABAg\n"
"youtube.com/watch?v=Lo2qQmj0_h4&list=PLmXxqSJJq-yVWpRFGImHYZBQTuBGLjG4t&index=5&pp=iAQB8AUB\n"
" in various order:\n"
"youtube.com/watch?feature=shared&v=U9t-slLl30E\n\n"
" but not these:\n"
"(wrong ID)\n"
"youtube.com/watch?v=U$t-slLl30E\n"
"(too short ID)\n"
"youtube.com/watch?v=U9t-slLl30&t=10\n"
"(wrong or deprecated paths)\n"
"youtube.com/GitHub?v=U9t-slLl30E\n"
"youtube.com/?v=U9t-slLl30E\n"
"youtube.com/?vi=U9t-slLl30E\n"
"youtube.com/?feature=player_embedded&v=U9t-slLl30E\n"
"youtube.com/watch?vi=U9t-slLl30E\n"
"youtube.com/vi/U9t-slLl30E\n"
"(www.youtube-nocookie.com/embed/ only!)\n"
"youtube-nocookie.com/embed/U9t-slLl30E\n"
"www.youtube-nocookie.com/watch?v=U9t-slLl30E\n"
"http://www.youtube-nocookie.com/v/U9t-slLl30E?version=3&hl=en_US&rel=0\n"
"(playlist)\n"
"youtube.com/playlist?list=PLmXxqSJJq-yVWpRFGImHYZBQTuBGLjG4t")
subst = "https://www.youtube.com/watch?v=XGkm-6yCIio"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html