import re
regex = re.compile(r"(?<=(?:\[embed\])|(?:\[embed\]\s)|(?:))(https?:\/\/)(:?www\.)?(:?youtube\.com\/watch|youtu\.be\/)([\w\?=\&]+)(?=(?:\[\/embed\])|(?:\s\[\/embed\])|(?:))", flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
test_str = ("bla https://www.youtube.com/watch?v=Vpg9yizPP_g\n"
"http://www.youtube.com/watch?v=Vpg9yizPP_g bla\n"
"[embed]https://www.youtube.com/watch?v=Vpg9yizPP_g[/embed] bla\n"
"[embed] https://www.youtube.com/watch?v=Vpg9yizPP_g [/embed]\n"
"[embed]http://www.youtube.com/watch?v=Vpg9yizPP_g[/embed]\n"
"[embed] http://www.youtube.com/watch?v=Vpg9yizPP_g [/embed]\n"
"bla https://youtube.com/watch?v=Vpg9yizPP_g bla\n"
"http://youtube.com/watch?v=Vpg9yizPP_g\n"
"[embed]https://youtube.com/watch?v=Vpg9yizPP_g[/embed]bla\n"
"bla[embed] https://youtube.com/watch?v=Vpg9yizPP_g [/embed]\n"
"[embed]http://youtube.com/watch?v=Vpg9yizPP_g[/embed]\n"
"[embed] http://youtube.com/watch?v=Vpg9yizPP_g [/embed]\n"
"https://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g\n"
"http://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g\n"
"[embed]https://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g[/embed]\n"
"[embed] https://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g [/embed]\n"
"[embed]http://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g[/embed]\n"
"bla[embed] http://www.youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g [/embed]bla\n"
"https://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g bla\n"
"bla http://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g\n"
"[embed]https://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g[/embed]\n"
"[embed] https://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g [/embed]\n"
"[embed]http://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g[/embed]\n"
"[embed] http://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g [/embed]\n"
"https://youtu.be/Vpg9yizPP_g\n"
"http://youtu.be/Vpg9yizPP_g bla\n"
"[embed]https://youtu.be/Vpg9yizPP_g[/embed]\n"
"[embed] https://youtu.be/Vpg9yizPP_g [/embed]\n"
"[embed]http://youtu.be/Vpg9yizPP_g[/embed]\n"
"[embed] http://youtu.be/Vpg9yizPP_g [/embed]\n"
"<a href src=\"http://youtu.be/Vpg9yizPP_g\">vid</a>\n"
"<a href src=\"http://youtube.com/watch?feature=player_embedded&v=Vpg9yizPP_g\">vid</a>\n"
"<a href src=\"https://youtube.com/watch?v=Vpg9yizPP_g\">vid</a>")
subst = "\\1\\2\\3\\4"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html