# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
(,\s*(*SKIP))?+
# Attempts to match a comma and advanced whitespaces,
# without backtracking;
# And if the comma is matched, use (*SKIP) verb,
# which advances the pointer if we fail to match the comma.
# Key - Value pairs not worthy of keeping.
(
"(?!jobID"|exec)[^"]+" # Check if we like this key.
\s*+:\s*+ # The colon, advance whitespaces.
( # Check keys recursively.
"[^"]*"
# String literals, boring.
| {(?2)?+(?>,\s*(?2))*}
# Or: An object storing some key-value pairs
# we don't care about.
| \[(?3)?+(?>,\s*(?3))*\]
# Or: An array storing some values
# we don't care about.
)
)
(?(1)|,?)
# Balance the comma (so the result string is still valid JSON)
"""
test_str = (" \"2597401\":[{\"jobID\":\"2597401\",\n"
" \"account\":\"TG-CCR120014\",\n"
" \"user\":\"charngda\",\n"
" \"pkgT\":{\"pgi/7.2- 5\":{\"libA\":[\"libpgc.so\"],\n"
" \"flavor\":[\"default\"]}}, \n"
" \"startEpoch\":\"1338497979\",\n"
" \"runTime\":\"1022\",\n"
" \"execType\":\"user:binary\", \n"
" \"exec\":\"ft.D.64\",\n"
" \"numNodes\":\"4\",\n"
" \"sha1\":\"5a79879235aa31b6a46e73b43879428e2a175db5\",\n"
" \"execEpoch\":1336766742,\n"
" \"execModify\":\"Fri May 11 15:05:42 2012\",\n"
" \"startTime\":\"Thu May 31 15:59:39 2012\",\n"
" \"numCores\":\"64\",\n"
" \"sizeT\":{\"bss\":\"1881400168\",\"text\":\"239574\",\"data\":\"22504\"}}, \n"
" {\"jobID\":\"2597401\",\n"
" \"account\":\"TG-CCR120014\",\n"
" \"user\":\"charngda\",\n"
" \"pkgT\":{\"pgi/7.2-5\":{\"libA\":[\"libpgc.so\"],\n"
" \"flavor\":[\"default\"]}},\n"
" \"startEpoch\":\"1338497946\",\n"
" \"runTime\":\"33\" \"execType\":\"user:binary\",\n"
" \"exec\":\"cg.C.64\",\n"
" \"numNodes\":\"4\",\n"
" \"sha1\":\"caf415e011e28b7e4e5b050fb61cbf71a62a9789\",\n"
" \"execEpoch\":1336766735,\n"
" \"execModify\":\"Fri May 11 15:05:35 2012\",\n"
" \"startTime\":\"Thu May 31 15:59:06 2012\",\n"
" \"numCores\":\"64\",\n"
" \"sizeT\":{\"bss\":\"29630984\",\"text\":\"225749\",\"data\":\"20360\"}},\n"
" {\"jobID\":\"2597401\",\n"
" \"account\":\"TG-CCR120014\",\n"
" \"user\":\"charngda\",\n"
" \"pkgT\":{\"pgi/7.2-5\": {\"libA\":[\"libpgc.so\"],\n"
" \"flavor\":[\"default\"]}},\n"
" \"startEpoch\":\"1338500447\",\n"
" \"runTime\":\"145\",\n"
" \"execType\":\"user:binary\",\n"
" \"exec\":\"mg.D.64\",\n"
" \"numNodes\":\"4\",\n"
" \"sha1\":\"173de32e1514ad097b1c051ec49c4eb240f2001f\",\n"
" \"execEpoch\":1336766756,\n"
" \"execModify\":\"Fri May 11 15:05:56 2012\",\n"
" \"startTime\":\"Thu May 31 16:40:47 2012\",\n"
" \"numCores\":\"64\",\n"
" \"sizeT\":{\"bss\":\"456954120\",\"text\":\"426186\",\"data\":\"22184\"}},{\"jobID\":\"2597401\",\n"
" \"account\":\"TG-CCR120014\",\n"
" \"user\":\"charngda\",\n"
" \"pkgT\":{\"pgi/7.2-5\":{\"libA\":[\"libpgc.so\"],\n"
" \"flavor\":[\"default\"]}},\n"
" \"startEpoch\":\"1338499002\",\n"
" \"runTime\":\"1444\",\n"
" \"execType\":\"user:binary\",\n"
" \"exec\":\"lu.D.64\",\n"
" \"numNodes\":\"4\",\n"
" \"sha1\":\"c6dc16d25c2f23d2a3321d4feed16ab7e10c2cc1\",\n"
" \"execEpoch\":1336766748,\n"
" \"execModify\":\"Fri May 11 15:05:48 2012\",\n"
" \"startTime\":\"Thu May 31 16:16:42 2012\",\n"
" \"numCores\":\"64\",\n"
" \"sizeT\":{\"bss\":\"199850984\",\"text\":\"474218\",\"data\":\"27064\"}}],")
matches = re.finditer(regex, test_str, re.VERBOSE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html