import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?i)(?<scheme>http|https|ftp|sftp|sip|sips|file):\\/\\/(?:(?<username>[^`!@#$^&*()+=,:;'\"\\{\\}\\|\\[\\]\\s\\/\\\\]+)(?::(?<password>[^`!@#$^&*()+=,:;'\"\\{\\}\\|\\[\\]\\s\\/\\\\]+))?@)?(?:(?<ipv4>((?:(?:25[0-5]|2[0-4]\\d|1?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|1?\\d\\d?)))|\\[(?<ipv6>(?i)(?:[\\da-f]{0,4}:){1,7}(?:(?<ipv4_in_ipv6>(?:(?:25[0-5]|2[0-4]\\d|1?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|1?\\d\\d?))|[\\da-f]{0,4}))\\]|(?:(?<sub_domain>[^\\s~`!@#$%^&*()_+=,.?:;'\"\\{\\}\\|\\[\\]\\/\\\\]+\\.)*(?<domain>[^\\s~`!@#$%^&*()_+=,.?:;'\"\\{\\}\\|\\[\\]\\/\\\\]+)(?<tld>\\.[^\\s~`!@#$%^&*()\\-_+=,.?:;'\"\\{\\}\\|\\[\\]\\/\\\\0-9]{2,})))+(?<port>:\\d+)?(?:\\/(?<path>\\/?[^\\s`@#$^&=.?\"\\{\\}\\\\]+\\/)*(?<file>[^\\s`@#$^&=?\"\\{\\}\\/\\\\]+)?(?<query>\\?[^\\s`#$^\"\\{\\}\\\\]+)*(?<fragment>#[^\\s`$^&=?\"\\{\\}\\/\\\\]+)?)?";
final String string = "# Quick Example (full)\n"
+ "https://uSer:pass@sub1.sub-2.EXamPle.uk.com:80/PATH1/paTh2?qUeRy=1&q2=2#ancHor\n"
+ "https://úser@suß.öctúvt.tìd/file-path/FILE_%82%8A.php?query=value&query2=value2#somethig\n"
+ "HTTP://دامنه.ایران/زیردامنه/زیردامنه۲/فایل.پسوند؟درخواست=جواب\n"
+ "http://blog.sergeys.us/beer?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed:+SergeySus+(Sergey+Sus+Photography+%C2%BB+Blog)&utm_content=Google+Reader\n"
+ "http://test.site/wp-admin/post.php?t=1347548645469?t=1347548651124?t=1347548656685?t=1347548662469?t=1347548672300?t=1347548681615?#sdfsdf\n"
+ "http://example.com/wp-admin/load-scripts.php?c=1&load[]=swfobject,jquery,utils&ver=3.5\n"
+ "http://video.google.co.uk:80/videoplay?docid=-7246927612831078230&hl=en#hello\n"
+ "https://example.com/path/resource.txt#fragment\n\n\n"
+ "# Basic\n"
+ "www.example.com\n"
+ "http://example.com\n"
+ "https://example.com\n"
+ "https://example.ac.eu\n"
+ "https://example.gov.ir\n"
+ "ftp://example.com\n"
+ "sftp://example.com\n\n"
+ "# With Path\n"
+ "www.example.com/path1/path2/\n"
+ "http://example.com/path1/path2/\n"
+ "https://example.com/path1/path2\n\n"
+ "# With Path & File\n"
+ "www.example.com/path1/path2/file.pdf\n"
+ "http://example.com/path1/path2/file.c\n"
+ "https://example.com/path1/path2/file.txt\n\n"
+ "# With Path & Query\n"
+ "http://www.example.com/?query=some-path/path1/path2/\n"
+ "http://example.com/path1/path2/?query=something\n"
+ "https://example.com/path1/path2?query=something\n\n"
+ "# With Subdomain\n"
+ "sub1.sub2.example.com\n"
+ "www.example.com\n"
+ "http://www.example.com\n"
+ "ftp://sub1.example.com\n\n"
+ "# With Port\n"
+ "www.example.com:80\n"
+ "http://example.com:8080\n"
+ "ftp://example.com:21\n"
+ "ftp://sub1.example.com:21\n"
+ "http://example.com:8080/path1/path2/\n"
+ "http://example.com:80/path1/path2/file.c\n"
+ "http://example.com:443/path1/path2/?query=something\n\n"
+ "# With Username\n"
+ "user@example.com\n"
+ "http://user@www.example.com\n"
+ "ftp://user@example.com\n"
+ "ftp://user@sub1.example.com:21\n"
+ "http://user@example.com:8080/path1/path2/\n"
+ "http://user@example.com/path1/path2/file.c\n"
+ "http://user@example.com/path1/path2/?query=something\n\n"
+ "# Some Crazy Examples\n"
+ "http://ヒキワリ.ナットウ.ニホン\n"
+ "sip://ßàÁâãóôþüúðæåïçèõöÿýòäœêëìíøù.îûñé\n"
+ "HTTP://دامنه.ایران/زیردامنه/زیردامنه۲/فایل.پسوند؟درخواست=جواب\n"
+ "http://example.com/wp-admin/load-scripts.php?c=1&load[]=swfobject,jquery,utils&ver=3.5\n"
+ "https://úser@sub1.suß2.sub3.öctúvt.tìd.tìd/V3/file-path/P----29BSx_A_%82%8A_D_M1n_a.php?query=value#somethig\n"
+ "http://www.go.com.au/ersdfs?dfd=dfgd@s=1\n"
+ "https://uSer@EXamPle.uk.com:80/?qyery?qUeRy=1&q2=2#ancHor\n"
+ "http://example.com/%E5%BC%95%E3%81%8D%E5%89%B2%E3%82%8A.html\n"
+ "hTTp://eXamPLE.CoM\n"
+ "https://belgië.be\n"
+ "https://belgië.be.gov\n"
+ "http://user@[2001:db8::1]/\n"
+ "https://user@111.222.33.44:49171/Cda-cgi/clientcgi?action=start\n"
+ "http://[2001:db8::1]:80\n"
+ "a2d.dd4d.d5d.www.go.com.au\n"
+ "a.b.c.d.e.go.com.co.uk\n"
+ "go.com.uk\n"
+ "go.go.com\n"
+ "1go-co.com\n"
+ "1-2-3-4--5-go.com\n"
+ "www.go.com.au\n\n"
+ "# These Should not be Seen as Domain\n"
+ "example.c\n"
+ "example..com\n"
+ "123.b\n"
+ "go.12\n"
+ "1.2.3.4\n"
+ "ff:ff:ff:ff\n"
+ "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe\n"
+ "hTTps://111.222.533.44:49171/cda-cgi/clientcgi?action=start\n\n\n"
+ "# These Should be Seen Partially\n"
+ "//example.com.c\n"
+ "example.com.c\n"
+ "http://example.com.com.\n\n"
+ "# It Should Find URLs in a Text too\n"
+ "create Share Links to send your expressions to co-workers or link to them on Twitter or your blog [ex. http://RegExr.com?2rjl655] \n"
+ "Built by gskinner.com with Flex 3 [adobe.com/go/flex] and Spelling Plus Library for text highlighting[gskinner.com/products/spl].\n\n\n"
+ "# Resources\n"
+ "- https://en.wikipedia.org/wiki/Uniform_Resource_Identifie\n"
+ "- https://en.wikipedia.org/wiki/List_of_URI_schemes\n"
+ "- \n"
+ "- https://zencoder.support.brightcove.com/general-information/special-characters-usernames-and-passwords.html\n"
+ "- https://support.microsoft.com/en-us/topic/certain-special-characters-are-not-allowed-in-the-url-entered-into-the-address-bar-of-internet-explorer-a8e2a966-19d6-27af-06cc-e720f25e8b02\n"
+ "- https://perishablepress.com/stop-using-unsafe-characters-in-urls/\n"
+ "- https://help.dragonmetrics.com/en/articles/213986-invalid-characters-in-url";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html