# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?s)\S+@\S+?((?:says?|:)?\s.*?)(?:\s+Message ID|\* +\* +\*)"
test_str = ("Message ID: SM9MatRNTnMAYaylR0QgOH///qUUveBCbw== \n"
" 2021-07-10T20:48:23.997Z john s (X Y Bank) -\n"
" john.s@xy.com: \n"
" [EVENT] 347376954900491 (john.s@xy.com) created room\n"
" (roomName='CSTest' roomDescription='CS Test Chat Room' COPY_DISABLED=false\n"
" READ_ONLY=false DISCOVERABLE=false MEMBER_ADD_USER_ENABLED=false\n"
" roomType=PRIVATE conversationScope=internal owningCompany=X Y\n"
" Bank)\n"
" \n"
" Message ID: nsabNaqeXfuEj9mBEhvS0n///qUUveAhbw== \n"
" 2021-07-10T20:48:23.997Z john s (X Y Bank) -\n"
" john.s@xy.comsays \n"
" [EVENT] 347376954900491 (john.s@xy.com) invited 347376954900486\n"
" (kerren.n@xy.com) to room (CSTest|john s|16091907435583)\n"
" \n"
" Message ID: Nu/EYTkTQ5qdbqzZ0Rig8n///qUUvQ42dA== \n"
" 2021-07-10T20:48:23.997Z john s (X Y Bank) -\n"
" john.s@xy.comsays \n"
" \n"
" Catchyou later\n"
" \n"
" \n"
" \n"
" Message ID: dy2yaByqhm+n88Gd3VQOhH///qUUrz8odA== \n"
" 2021-07-10T20:48:23.997Z kerren n (X Y Bank) -\n"
" nancy.n@xy.comsays \n"
" \n"
" KeywordContent_ Cricket is a bat-and-ball game played between two teams of\n"
" eleven players on a field at the centre of which is a 20-metre (22-yard) pitch\n"
" with a wicket at each end, each comprising two bails balanced on three stumps.\n"
" The batting side scores runs by striking the ball bowled at the wicket with\n"
" the bat, while the bowling and fielding side tries to prevent this and dismiss\n"
" each player (so they are \"out\").\n"
" \n"
" \n"
" \n"
" * * *\n"
" \n"
" Generated by Content Export Service | Stream Type: SymphonyPost |\n"
" Stream ID: ZZo5pRRPFC18uzlonFjya3///qUUveBHdA== | Room Type: Private |\n"
" Conversation Scope: internal | Owning Company: X Y Bank | File\n"
" Generated Date: 2021-07-10T20:48:23.997Z | Content Start Date:\n"
" 2021-07-10T20:48:23.997Z | Content Stop Date: 2021-07-10T20:48:23.997Z \n"
" \n"
" * * *\n"
" \n"
" *** (780787) Disclaimer: \n"
" (incorporated in paris with Ref. No. ZC18, is authorised by Prudential Regulation\n"
" Authority (PRA) and regulated by Financial Conduct Authority and PRA. oyp and\n"
" its affiliates (We) monitor this confidential message meant for your\n"
" information only. We make no recommendation or offer. You should get\n"
" independent advice. We accept no liability for loss caused hereby. See market\n"
" commentary disclaimers (\n"
" http://wholesalebanking.com/en/utility/Pages/d-mkt.aspx ),\n"
" Dodd-Frank and EMIR disclosures (\n"
" http://wholesalebanking.com/en/capabilities/financialmarkets/Pages/default.aspx\n"
" ) ")
matches = re.finditer(regex, test_str, re.MULTILINE | re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html