const regex = /(?# PCRE 8.x )
^
(?<scheme> (?i) [a-z0-9+.-]+ (?-i) )
:
(?:\/\/
(?<authority>
(?<userinfo> (?<username> (?&uriUnreserved)+ ) (?: : (?<password> (?&uriUnreserved)+ )? )? @ )?
(?<host>
| (?'ipv4' (?&_ipv4) )
| \[ (?'ipv6' (?&_ipv6) ) \]
| (?<hostname> (?i) (?: (?&hostLabel) \. )* (?&hostLabel) (?-i) )
)
(?: : (?<port> (?&port_number) ) )?
)
)?
(?# If an authority component is present, then the path component must either be empty or begin with a slash. If an authority component is absent, then the path cannot begin with an empty segment, that is with two slashes as the following characters would be interpreted as an authority component. )
(?# TODO: Exception: file URI scheme RFC 8089 https://en.wikipedia.org/wiki/File_URI_scheme )
(?<path> (?(authority) (?= [\/?#] | $ ) | (?! \/\/ ) ) (?&pathSequence) )
(?: \? (?'query'(?&_query)) )?
(?: \# (?<fragment>(?&_fragment)))?
$
(?(DEFINE)
(?# http://es5.github.io/#A.6 )
(?<uriCharacter> (?&uriReserved) | (?&uriUnescaped) | (?&uriEscaped) )
(?<uriReserved> [;\/?:@&=+$,] )
(?<uriUnescaped> [[:alpha:]\d] | (?&uriMark) )
(?<uriEscaped> %[[:xdigit]]{2} )
(?<uriMark> [-_.!~*`()] )
(?<uriUnreserved> (?&uriUnescaped) | (?&uriEscaped) )
(?# https://en.wikipedia.org/wiki/Uniform_Resource_Identifier )
(?# allowed characters per RFC 952, RFC 1123 )
(?<hostLabel> [a-z0-9-]{1,63} )
(?<_ipv4> (?: (?&octet) \. ){3} (?&octet) )
(?<octet> (?&_250_255) | (?&_200_249) | (?&_0_199) )
(?<_250_255> 25[0-5] )
(?<_200_249> 2[0-4]\d )
(?<_0_199> 1? (?&_0_99) )
(?<_0_99> [1-9]? \d )
(?<_ipv6> (?&ipv6_2) )
(?<ipv6_1> (?&hextet) :: (?: (?&hextet) : ){0,5} (?&hextet) )
(?<ipv6_2> (?&hextet) : (?&hextet) :: (?: (?&hextet) : ){0,4} (?&hextet) )
(?<ipv6_3> (?: (?&hextet) : ){2} :: (?: ) )
(?<ipv6_4> )
(?<ipv6_5> )
(?<ipv6_6> )
(?<ipv6_7> )
(?<ipv6_8> )
(?<hextet> [[:xdigit:]]{1,4} )
(?<port_number> (?&_65530_65535) | (?&_65500_65529) | (?&_65000_65499) | (?&_60000_64999) | (?&_10000_59999) | (?&_1000_9999) | (?&_100_999) | (?&_0_99) )
(?<_65530_65535> 6553[0-5] )
(?<_65500_65529> 655[0-2]\d )
(?<_65000_65499> 65[0-4]\d{2} )
(?<_60000_64999> 6[0-4]\d{3} )
(?<_10000_59999> [1-5]\d{4} )
(?<_1000_9999> [1-9]\d{3} )
(?<_100_999> [1-9]\d{2} )
(?<pathSequence> (?: (?&pathSegment) \/ )* (?&pathSegment)? )
(?<pathSegment> (?: (?&uriUnreserved) | [:@!$&'()*+,;=] )* )
#(?<pathSegment> (?: (?&uriUnreserved) | [:@] )* )
(?<_query> (?: (?&uriUnreserved) | [:@!$&'()*+,;=?\/] )* )
(?<_fragment> (?: (?&uriUnreserved) | [:@!$&'()*+,;=?\/] )* )
)
/gm;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?# PCRE 8.x )
^
(?<scheme> (?i) [a-z0-9+.-]+ (?-i) )
:
(?:\\\/\\\/
(?<authority>
(?<userinfo> (?<username> (?&uriUnreserved)+ ) (?: : (?<password> (?&uriUnreserved)+ )? )? @ )?
(?<host>
| (?\'ipv4\' (?&_ipv4) )
| \\[ (?\'ipv6\' (?&_ipv6) ) \\]
| (?<hostname> (?i) (?: (?&hostLabel) \\. )* (?&hostLabel) (?-i) )
)
(?: : (?<port> (?&port_number) ) )?
)
)?
(?# If an authority component is present, then the path component must either be empty or begin with a slash. If an authority component is absent, then the path cannot begin with an empty segment, that is with two slashes as the following characters would be interpreted as an authority component. )
(?# TODO: Exception: file URI scheme RFC 8089 https:\/\/en.wikipedia.org\/wiki\/File_URI_scheme )
(?<path> (?(authority) (?= [\\\/?#] | $ ) | (?! \\\/\\\/ ) ) (?&pathSequence) )
(?: \\? (?\'query\'(?&_query)) )?
(?: \\# (?<fragment>(?&_fragment)))?
$
(?(DEFINE)
(?# http:\/\/es5.github.io\/#A.6 )
(?<uriCharacter> (?&uriReserved) | (?&uriUnescaped) | (?&uriEscaped) )
(?<uriReserved> [;\\\/?:@&=+$,] )
(?<uriUnescaped> [[:alpha:]\\d] | (?&uriMark) )
(?<uriEscaped> %[[:xdigit]]{2} )
(?<uriMark> [-_.!~*`()] )
(?<uriUnreserved> (?&uriUnescaped) | (?&uriEscaped) )
(?# https:\/\/en.wikipedia.org\/wiki\/Uniform_Resource_Identifier )
(?# allowed characters per RFC 952, RFC 1123 )
(?<hostLabel> [a-z0-9-]{1,63} )
(?<_ipv4> (?: (?&octet) \\. ){3} (?&octet) )
(?<octet> (?&_250_255) | (?&_200_249) | (?&_0_199) )
(?<_250_255> 25[0-5] )
(?<_200_249> 2[0-4]\\d )
(?<_0_199> 1? (?&_0_99) )
(?<_0_99> [1-9]? \\d )
(?<_ipv6> (?&ipv6_2) )
(?<ipv6_1> (?&hextet) :: (?: (?&hextet) : ){0,5} (?&hextet) )
(?<ipv6_2> (?&hextet) : (?&hextet) :: (?: (?&hextet) : ){0,4} (?&hextet) )
(?<ipv6_3> (?: (?&hextet) : ){2} :: (?: ) )
(?<ipv6_4> )
(?<ipv6_5> )
(?<ipv6_6> )
(?<ipv6_7> )
(?<ipv6_8> )
(?<hextet> [[:xdigit:]]{1,4} )
(?<port_number> (?&_65530_65535) | (?&_65500_65529) | (?&_65000_65499) | (?&_60000_64999) | (?&_10000_59999) | (?&_1000_9999) | (?&_100_999) | (?&_0_99) )
(?<_65530_65535> 6553[0-5] )
(?<_65500_65529> 655[0-2]\\d )
(?<_65000_65499> 65[0-4]\\d{2} )
(?<_60000_64999> 6[0-4]\\d{3} )
(?<_10000_59999> [1-5]\\d{4} )
(?<_1000_9999> [1-9]\\d{3} )
(?<_100_999> [1-9]\\d{2} )
(?<pathSequence> (?: (?&pathSegment) \\\/ )* (?&pathSegment)? )
(?<pathSegment> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=] )* )
#(?<pathSegment> (?: (?&uriUnreserved) | [:@] )* )
(?<_query> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=?\\\/] )* )
(?<_fragment> (?: (?&uriUnreserved) | [:@!$&\'()*+,;=?\\\/] )* )
)
', 'gm')
const str = `https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top
ldap://[2001:db8::7]/c=GB?objectClass?one
http://myuser:inadvisiblepassword@this.test.com:80/dir/filename.xml?var1=foo&var2=bar#zap
mailto:John.Doe@example.com
news:comp.infosystems.www.servers.unix
tel:+1-816-555-1212
telnet://192.0.2.16:80/
urn:oasis:names:specification:docbook:dtd:xml:4.1.2
`;
const subst = `{\n\tscheme: "${scheme}",\n\tauthority: {\n\t\tuserinfo: {\n\t\t\tusername: "${username}",\n\t\t\tpassword: "${password}"\n\t\t},\n\t\thost: {\n\t\t\thostname: "\L${hostname}\E",\n\t\t\tipv4: "${ipv4}",\n\t\t\tipv6: "\L${ipv6}\E"\n\t\t},\n\t\tport: "${port}"\n\t},\n\tpath: "${path}",\n\tquery: "${query}",\n\tfragment: "${fragment}"\n}\n\n`;
// The substituted value will be contained in the result variable
const result = str.replace(regex, subst);
console.log('Substitution result: ', result);
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions