# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"((?:(?# path without \"\" )(?<opening>(?# 'C:/', '//', '//./<UNC/C:>', '//?/<UNC/C:>' or '%VariableWindows%')(?<UNCPrefix>[\/\\][\/\\][?.][\/\\](?:UNC[\/\\])?)?(?<montage>\b[a-zA-Z]:[\/\\])|(?:(?:(?&UNCPrefix))|(?:[\/\\][\/\\](?!(?&montage))))|(?:%\w+%[\/\\]?))(?:(?# directory btw // )[^\/\\<>:\"|?\n\r ][^\/\\<>:\"|?\n\r]*(?<![ ])[\/\\])*(?:(?#we search for fileName only if next character is pathFriendly)(?=[^\/\\<>:\"'|?\n\r;, ])(?:(?#name file ? dir ?)(?:[^\/\\<>:\"|?\n\r;, .](?: (?=[\w\-]))?(?:\*(?!= ))?(?!(?&montage)))+)?(?:(?#extention)\.\w+)*))|(?:(?# path quoted \"\" or '' )[\"'](?&opening)(?=.*?[\"'])(?:[^\/\\<>:'\"|?\n\r]+[\/\\]?)+?[\"']))"
test_str = ("\\\\LOCALHOST\\c$\\temp\\test-file.txt\"\n"
"@\"c:\\temp\\test-file.txt\",\n"
"@\"\\\\127.0.0.1\\c$\\temp\\test-file.txt\",\n"
"@\"\\\\LOCALHOST\\c$\\ temp\\test-file.txt\",\n"
"@\"\\\\.\\c:\\temp\\test-file.txt\",\n"
"@\"\\\\?\\c:\\temp\\est-file.txt\",\n"
"@\"\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
"@\"\\\\127.0.0.1\\c$\\temp\\test-file.txt\"\n\n"
"---------------------------\n"
"Tentative d'accès à C:\\Users\\tpgz4017\\App - Data\\Local\\Temp\\tempShapeFile_CrossWave Calibration Zones - Atoll CrossWave Model.shx après sa fin.\n"
"---------------------------\n\n"
"file:///C:/Users/Downloads/20220516_32289275_1049383.pdf\n"
"file://p-eco2.rd.fr/vol_H0037_01$/599/livraison/20220516_32289275_1049383.pdf\n"
" --> couper les path apres le '\\ ', meme que on est arrivé au fichier\n"
"c:\\temp\\test-file.txt\",\n"
"\\\\127.0.0.1\\c$\\temp\\test-file.txt\",\n"
"\\\\LOCALHOST\\c$\\ temp\\test-file.txt\",\n"
"\\\\LOCALHOST\\c$ \\temp\\test-file.txt\",\n"
"\\\\.\\c:\\temp\\t\\est-file.txt\",\n"
"\\\\?\\c:\\temp\\test-file.txt\",\n"
"\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
"\\\\?\\UNC\\ServerName\\ temp\\test-file.txt\",\n"
"\\\\127.0.0.1\\c$\\temp\\test-file.txt\"\n"
"\\\\?\\server1\\e:\\utilities\\\\filecomparer\\\n\n"
"C:/test\\gvk.hv/fgfjgjj bdsdfus(fg)\n"
"C:/test\\gvkhv/../fgfjgjjb/yfigfi\n"
"C:/test\\gvk.hv/fgfj.gjjb/uhloext C:/test\\gvk.hv/fgfj.gjjb/uhloext.ds\n"
"C:/test\\gvk.hv/fgfj.gjjb/uhloext C:/UNC/test\\gvk.hv/fgfj.gjjb/uhloext.ds\n"
"Site0 / 3: - Warning . See log file 'C:\\ProgramData\\InfoVista\\Planet 7.4\\7.4\\RPE\\Log\\Plugins\\Universal_Model_masked\\log_Universal_Model.txt' for details\n"
"C:/test\\gvk.hv/fgfj.gjjb/uhloext.ds.yi : gf\n"
"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau hgtfhyt \"C:/te-st.html\" \"C:/te-st.html\" gd\"dhbcsk \"C:/te/dsst.ikpo fdsf \"C:\\test\" \"C:// test.html\" gd\n"
"\"//te s t/e, llo.html \n"
"C:/test\\f/uhlo/. \n"
"C://te?st.html\n"
"b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"; dfsdf \"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"; dfsdf \"\\\\\n"
"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.* C: is invalid\n"
"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo NetAct Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.TAB, is invalid\n"
"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Buildi*.*ng.*, is invalid\n"
"C:/test/../hjgbkl C:/test/../hjgbkl.gfgdfgrdgfdgr C:/test/../hjgbkl\n"
"C:/test.html\n"
"C://test/ .h/hel,lo.html//test/./hello.html\n"
"C:/test//hello.html\n"
"//test\n"
"//hello.html\n"
"/test\n"
"\"%tmp%/fsdfs\"\n"
"%tmp%/fsdfs\n"
"ERROR 8/31/2021 - 6:45:39 PM HighResClutter .RasterFile : \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO%dsq%\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil : Le fichier spécifié est introuvable. \n"
"\\\\b-ren ice\\sauv egardes\\..\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil C:\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JER SE.Y_New_York_2 m_Z18N_0_DTM_02_06.bil. \n"
"//test.html\n"
"\\\\10.1.1.107\n"
"//10.1.1.107/test.html\n"
"//10.1.1.107/te st/hello.html\n"
"//10.1.1.107/test/hello\n"
"//test/hello.txt\n"
"//test/hello.txt.\n"
"/test/html\n"
"/tes?t/html\n"
"/test.html\n"
"test.html\n"
"//\n"
"/\n"
"\\\\\\rged\\bfg\n"
"\\\\\n"
"/t!esrtr\n"
"C:/hel**o\n\n"
"\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt \n"
"\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt\n\n"
"Pour les chemins UNC de périphérique, la partie serveur/partage forme le volume. Par exemple, dans \\\\?\\server1\\e:\\utilities\\\\filecomparer\\ , la partie serveur/partage est server1\\utilities . Ceci est important quand\n")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html