import re
regex = re.compile((r"(?############ Let's catch paths without \"\" or '' ############################\n"
r")(?<opening>(?# First, catch the starting path, the <opening> ###################\n"
r" )\b(?<montage>[a-zA-Z]:[\/\\])(?# montage = 'C:/'\n"
r" )|[\/\\][\/\\](?>(?#\n"
r" )[?.][\/\\](?:[^\/\\<>:\"|?\n\r ]+[\/\\])?(?# '//[?or.]/xxxxx' or '//[?or.]/server/'\n"
r" )(?&montage)?(?# '//[?or.]/c:/' or '//[?or.]/server/c:/'\n"
r" )|(?!(?&montage)))(?# '//[addressIP/ or serverName/ but not C:/]'\n"
r" )|%\w+%[\/\\]?(?# '%EnvVariable%[/]'\n"
r"))(?# So, <opening> catch : \n"
r" 'C:/' or\n"
r" '//[?or.]/[UNC/]C:/' or\n"
r" '//[?or.]/[UNC/]' or\n"
r" '//[next characters must be something other than C:/]' or\n"
r" '%EnvironementVariable%[/]'\n"
r")(?:(?# now, we catch each directory name wich is between [/] ########################\n"
r")[^\/\\<>:\"|?\n\r ,'](?# the first character should not be [ ,']\n"
r")[^\/\\<>:\"|?\n\r]*(?# Any pathFrendly character\n"
r")(?<![ ,'])(?# The last directory name's character must not be [ ,']\n"
r")[\/\\](?# End of directory name - who are between '/' -\n"
r"))*(?# Catch most 'directoryName/' as possible\n"
r")(?:(?# Lets catch the End path. There is a file ? a directory ? or just a useless '/' ?\n"
r")(?=[^\/\\<>:\"'|?\n\r;, ])(?#if next character is not pathFriendly or ' ' or [,'], we have reach the end of the path => we don't catch the last '/' and the the Regex end now.\n"
r"You can't catch fileName who begin by [,'] because they are probably a delimiter between 2 path. but '.' is allowed\n"
r")(?:(?#If we are here, that mean there is a fileName or directoryName to catch\n"
r"###### We will catch the last directoryName or the fileName without the extention ######\n"
r" )(?:[^\/\\<>:\"|?\n\r;, .](?# catch any character pathFriendly exept ' ' or [,.]\n"
r" )(?: (?=[\w\-]))?(?# If we find a ' ', we catch him if next charcter is not a delimiter. I see '-' after an ' ' not like a delimiter.\n"
r" )(?:\*(?!= ))?(?# If we find a '*', we stop the catch if next character is an ' '\n"
r" )(?!(?&montage))(?# If we find a string who look like 'C:/', we stop the catch\n"
r"))+(?# We catch theses word delimited by ' ' as much as possible\n"
r"))?(?# it's possible the fileName have no name, but just an extention\n"
r")(?:\.\w+(?# #### an extention begin by '.' and at least one none delimiter chracter\n"
r"))*(?# we can add more extention until the first none '.' delimiter character. So, after the first '.' character inside a fileName, we cannot catch any ' ' character\n"
r"If we don't find one extention, so the filename is a directory name, and we stop the catch.\n"
r"))(?# ############# END OF PATH CATCHING WITHOUT QUOTE \"\" and '' #######################\n"
r")|(?:(?# ######### Catching path quoted '' ###########################\n"
r"Path quoted '' is difficult because ['] is also a pathFrendly character\n"
r")'(?&opening)(?# We catch .* between quote only if string start with an <opening>\n"
r")(?=.*'\W|.*'$)(?# We catch .* between quote only if we are sure we will find end quote. End quote must be ['] and delimiter character or ['] and end string\n"
r")(?:[^\/\\<>:'\"|?\n\r]+(?# We take any pathFriendly character exept quote [']\n"
r")(?:'(?=\w))?(?# we catch quote ['] if next character is not a delimiter\n"
r")[\/\\]?)*(?# Path quoted must respect this patern until end quote character [']\n"
r")')(?# end quoted '' path\n"
r")|(?# ######### Catching path quoted \"\" ###########################\n"
r")\"(?&opening)(?# We catch .* between quote only if string start with an <opening>\n"
r")(?=.*\")(?# We catch .* between quote only if we are sure we will find end quote [\"]\n"
r")(?:[^\/\\<>:\"|?\n\r]+(?# We take any pathFriendly character\n"
r")[\/\\]?(?# pathFriendly characters can be is delimited by '\'\n"
r"))*(?# Path quoted must respect this patern until end quote character\n"
r")\"(?# end quoted path\n"
r")"))
test_str = ("C:/testOk\\dot.Dirname/.nameFileBeginByDot first space after a dot in file name stop the match\n"
"C:/testOk\\_.._AsDirName/../file name.ext1.ext2 first space after a dot stop the match\n"
"C:/testOk\\lastNameWithDouble.. stop the match\n"
"C:/testOk\\lastNameSimple. stop the match like an end sentence.\n"
"C:/testOk\\LastNameIs/DirName C:/testOk\\2Paths_ _separated/f.ext space after extention stop match\n"
"C:/testOK\\Last_/_isNotmatched/fgfj.gjjb/uhloext/ and [ ,] after '\\' stop match\n"
"\\\\127.0.0.1/this_, _isOkInMidDirName\\butSimple_,_stop match in last Name or Name\n"
"\\\\.\\c:/this exotic path begining work\\and\\ space after _\\_ stop the match\n"
"\\\\?\\c:/this exotic path begining work too\\and \\space before _\\_ stop the match too\n"
"\\\\testOk/this' - 'is ok in dirName/and - in lastName .space before dot stop the match\n"
"\\\\?\\server1\\e:\\utilities\\\\filecomparer\\ this double \\\\ is interpretated as new path\n\n"
"@\"c:\\testOk\\double quote character is more permissive/ '' , ; .txt, .ext2\",\n"
"@\"\\\\127.0.0.1\\c$\\temp\\t'est-file.txt, if end double quote is missing, we use unquote match\n"
"@\"\\c:\\LOCALHOST\\c$\\ thisIsNotMatched\" \"temp\\test-file.txt\", quoted path must have a right opening to be matched\n"
"@'\\\\.\\c:\\temp\\te'st-file.txt' simple quoted is ok \n"
"'c:\\simpleQuoteInsideStill'Match\\but' stopMatch if next is space character,\n"
"'c:\\simpleQuoteInsideStill'Match\\but\\'stopMatch if is fisrt character after \\\n"
"'c:\\simpleQuoteInsideStill'Match\\but''stopMatch if he is double\n"
"@\"\\\\?\\c:\\te ' mp\\est-file.txt\",\n"
"@\"\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
"@\"\\\\127.0.0.1\\c$\\temp\\test-file.txt\"\n\n"
"/\\serverName\\mix/and\\still match\" double quote character stop match\n"
"\\\\\\IfMoreThan2_\\_we take only the 2 lasts.ext first space after ext stop the match\n"
"/testNotMatch/html\n"
"/testNotMatch.html\n"
"testNotMatch.html\n"
"// -> this simple // or \\\\ is not matched, but this //isMatched !\n"
"/ -> this simple / is not matched, and this /notMatchedToo\n"
"b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"\"b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"\"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n\n"
"error Message test:\n"
"---------------------------\n"
"Tentative d'accès à C:\\Users\\tpgz4017\\App - Data\\Local\\Temp\\tempShapeFile_CrossWave Calibration Zones - Atoll CrossWave Model.shx après sa fin.\n"
"---------------------------\n\n"
"local url path :\n"
"file://C:/Users/Downloads/20220516_32289275_1049383.pdf\n"
"urlPath :\n"
"file://p-eco2.rd.fr/vol_H0037_01$/599/livraison/20220516_32289275_1049383.pdf\n\n"
"c:\\temp\\test-file.txt\",\n"
"\\\\127.0.0.1\\c$\\temp\\test-file.txt\",\n"
"\\\\LOCALHOST\\c$\\ temp\\test-file.txt\",\n"
"\\\\LOCALHOST\\c$ \\temp\\test-file.txt\",\n"
"\\\\.\\c:\\temp\\t\\est-file.txt\",\n"
"\\\\?\\c:\\temp\\test-file.txt\",\n"
"\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
"\\\\?\\UNC\\ServerName\\ temp\\test-file.txt\",\n"
"\\\\127.0.0.1\\c$\\temp\\test -file.txt\"\n\n\n"
"error Message test:\n"
"Site0 / 3: - Warning . See log file 'C:\\ProgramData\\InfoVista\\Planet 7.4\\7.4\\RPE\\Log\\Plugins\\Universal_Model_masked\\log_Universal_Model.txt' for details\n\n"
"C:/test\\gvk.hv/fgfj.gjjb/uhloext : some random text\n\n"
"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau hgtfhyt \"C:/te-st.html\" \"C:/te-st.html\" gd\"dhbcsk \"C:/te/dsst.ikpo fdsf \"C:\\test\" \"C:// test.html\" gd\n"
"\"//te s t/e, llo.html \n"
"C:/test\\f/uhlo/. \n"
"C://te?st.html\n"
"b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"; dfsdf \"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
"; dfsdf \"\\\\\n"
"\"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.* C: is invalid\n"
"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo NetAct Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.TAB, is invalid\n"
"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Buildi*.*ng.*, is invalid\n"
"C:/test/../hjgbkl C:/test/../hjgbkl.gfgdfgrdgfdgr C:/test/../hjgbkl\n"
"C:/test.html\n"
"C://test/ .h/hel,lo.html//test/./hello.html\n"
"C:/test//hello.html\n"
"//test\n"
"//hello.html\n"
"/test\n"
"\"%tmp%/fsdfs\"\n"
"%tmp%/fsdfs\n"
"ERROR 8/31/2021 - 6:45:39 PM HighResClutter .RasterFile : \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO%dsq%\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil : Le fichier spécifié est introuvable. \n"
"\\\\b-ren ice\\sauv egardes\\..\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil C:\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JER SE.Y_New_York_2 m_Z18N_0_DTM_02_06.bil. \n"
"//test.html\n"
"\\\\10.1.1.107\n"
"//10.1.1.107/test.html\n"
"//10.1.1.107/te st/hello.html\n"
"//10.1.1.107/test/hello\n"
"//test/hello.txt\n"
"//test/hello.txt.\n\n"
"\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt \n"
"\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt\n\n"
"Pour les chemins UNC de périphérique, la partie serveur/partage forme le volume. Par exemple, dans \\\\?\\server1\\e:\\utilities\\filecomparer\\ , la partie serveur/partage est server1\\utilities . Ceci est important quand\n"
"'\\\\127.0.0.1\\c$\\temp\\test-fi'le.txt'")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html