$re = '/(?# PCRE 8.x )
^
(?<scheme> (?i) [a-z0-9+.-]+ (?-i) )
:
(?:\/\/
(?<authority>
(?<userinfo> (?<username> (?&uriUnreserved)+ ) (?: : (?<password> (?&uriUnreserved)+ )? )? @ )?
(?<host>
| (?\'ipv4\' (?&_ipv4) )
| \[ (?\'ipv6\' (?&_ipv6) ) \]
| (?<hostname> (?i) (?: (?&hostLabel) \. )* (?&hostLabel) (?-i) )
)
(?: : (?<port> (?&port_number) ) )?
)
)?
(?# If an authority component is present, then the path component must either be empty or begin with a slash. If an authority component is absent, then the path cannot begin with an empty segment, that is with two slashes as the following characters would be interpreted as an authority component. )
(?# TODO: Exception: file URI scheme RFC 8089 https://en.wikipedia.org/wiki/File_URI_scheme )
(?<path> (?(authority) (?= [\/?#] | $ ) | (?! \/\/ ) ) (?&pathSequence) )
(?: \? (?\'query\'(?&_query)) )?
(?: \# (?<fragment>(?&_fragment)))?
$
(?(DEFINE)
(?# http://es5.github.io/#A.6 )
(?<uriCharacter> (?&uriReserved) | (?&uriUnescaped) | (?&uriEscaped) )
(?<uriReserved> [;\/?:@&=+$,] )
(?<uriUnescaped> [[:alpha:]\d] | (?&uriMark) )
(?<uriEscaped> %[[:xdigit]]{2} )
(?<uriMark> [-_.!~*`()] )
(?<uriUnreserved> (?&uriUnescaped) | (?&uriEscaped) )
(?# https://en.wikipedia.org/wiki/Uniform_Resource_Identifier )
(?# allowed characters per RFC 952, RFC 1123 )
(?<hostLabel> [a-z0-9-]{1,63} )
(?<_ipv4> (?: (?&octet) \. ){3} (?&octet) )
(?<octet> (?&_250_255) | (?&_200_249) | (?&_0_199) )
(?<_250_255> 25[0-5] )
(?<_200_249> 2[0-4]\d )
(?<_0_199> 1? (?&_0_99) )
(?<_0_99> [1-9]? \d )
(?<_ipv6> (?&ipv6_2) )
(?<ipv6_1> (?&hextet) :: (?: (?&hextet) : ){0,5} (?&hextet) )
(?<ipv6_2> (?&hextet) : (?&hextet) :: (?: (?&hextet) : ){0,4} (?&hextet) )
(?<ipv6_3> (?: (?&hextet) : ){2} :: (?: ) )
(?<ipv6_4> )
(?<ipv6_5> )
(?<ipv6_6> )
(?<ipv6_7> )
(?<ipv6_8> )
(?<hextet> [[:xdigit:]]{1,4} )
(?<port_number> (?&_65530_65535) | (?&_65500_65529) | (?&_65000_65499) | (?&_60000_64999) | (?&_10000_59999) | (?&_1000_9999) | (?&_100_999) | (?&_0_99) )
(?<_65530_65535> 6553[0-5] )
(?<_65500_65529> 655[0-2]\d )
(?<_65000_65499> 65[0-4]\d{2} )
(?<_60000_64999> 6[0-4]\d{3} )
(?<_10000_59999> [1-5]\d{4} )
(?<_1000_9999> [1-9]\d{3} )
(?<_100_999> [1-9]\d{2} )
(?<pathSequence> (?: (?&pathSegment) \/ )* (?&pathSegment)? )
(?<pathSegment> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=] )* )
#(?<pathSegment> (?: (?&uriUnreserved) | [:@] )* )
(?<_query> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=?\/] )* )
(?<_fragment> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=?\/] )* )
)
/mx';
$str = 'https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top
ldap://[2001:db8::7]/c=GB?objectClass?one
http://myuser:inadvisiblepassword@this.test.com:80/dir/filename.xml?var1=foo&var2=bar#zap
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
';
$subst = "{\n\tscheme: \"${scheme}\",\n\tauthority: {\n\t\tuserinfo: {\n\t\t\tusername: \"${username}\",\n\t\t\tpassword: \"${password}\"\n\t\t},\n\t\thost: {\n\t\t\thostname: \"\L${hostname}\E\",\n\t\t\tipv4: \"${ipv4}\",\n\t\t\tipv6: \"\L${ipv6}\E\"\n\t\t},\n\t\tport: \"${port}\"\n\t},\n\tpath: \"${path}\",\n\tquery: \"${query}\",\n\tfragment: \"${fragment}\"\n}\n\n";
$result = preg_replace($re, $subst, $str);
echo "The result of the substitution is ".$result;
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for PHP, please visit: http://php.net/manual/en/ref.pcre.php