import re
regex = re.compile(r"(\"Data\": \[{\".*})\",")
test_str = ("[so much data exists in the same single line ] ,\"Comments\": \"New alert\", \"Data\": [{\"etype\":\"MalwareFamily\",\"at\":\"2024-06-21T11:34:07.0000000Z\",\"md\":\"2024-06-21T11:34:07.0000000Z\",\"Investigations\":[{\"$id\":\"1\",\"Id\":\"urn:ZappedUrlInvestigation:2cc87ae3\",\"InvestigationStatus\":\"Running\"}],\"InvestigationIds\":[\"urn:ZappedUrlInvestigation:2cc8782d063\"],\"Intent\":\"Probing\",\"ResourceIdentifiers\":[{\"$id\":\"2\",\"AadTenantId\":\"2dfb29-729c918\",\"Type\":\"AAD\"}],\"AzureResourceId\":null,\"WorkspaceId\":null,\"Metadata\":{\"CustomApps\":null,\"GenericInfo\":null},\"Entities\":[{\"$id\":\"3\",\"MailboxPrimaryAddress\":\"abc@gmail.com\",\"Upn\":\"abc@gmail.com\",\"AadId\":\"6eac3b76357\",\"RiskLevel\":\"None\",\"Type\":\"mailbox\",\"Urn\":\"urn:UserEntity:10338af2b6c\",\"Source\":\"TP\",\"FirstSeen\":\"0001-01-01T00:00:00\"}, \"StartTimeUtc\": \"2024-06-21T10:12:37\", \"Status\": \"Investigation Started\"}\", \"EntityType\": \"MalwareFamily\", [so much data exists in the same single line ]\n")
subst = "$1],"
result = regex.sub(subst, test_str)
if result:
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html