# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\b[A-Z][a-z]+\b\s+\b[A-Z][a-z]+\b"
test_str = ("54. Mikhail Stepanovich ZUSKO\n"
"(Михаил Степанович ЗУСЬКО)\n"
"Function: Russian military leader – Lieutenant- General\n"
"Chief of staff of the “West” grouping\n"
"Former Commander of the 58th Combined Arms Army of the Southern Military District\n"
"DOB: 24.5.1972\n"
"POB: Vetly, Volyn region, former Ukrainian SSR (now Ukraine)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"55. Mikhail Yuryevich TEPLINSKIY\n"
"(Михаил Юрьевич ТЕПЛИНСКИЙ) \n"
"Function: Russian military leader – Colonel-General\n"
"Commander of Airborne Forces of the Armed Forces of the Russian Federation\n"
"DOB: 9.1.1969\n"
"POB: Mospino, Donetsk oblast, former Ukrainian SSR (now Ukraine)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"56. Nikolay Mikhailovich PARSHIN\n"
"(Николай Михайлович ПАРШИН) \n"
"Function: Russian military leader – Lieutenant- General\n"
"Head of the Main Rocket and Artillery Directorate of the Ministry of Defense of the Russian Federation\n"
"DOB: 20.12.1962\n"
"POB: The village of Mordovskoye- Kolomasovo, former Mordovian ASSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"57. Oleg Yurievich TSOKOV\n"
"(Олег Юрьевич ЦОКОВ) \n"
"Function: Russian military leader – Major-General\n"
"Commander of the 144th Motor Rifle Division of the 20th Combined Arms Army of the Armed Forces of the Russian Federation\n"
"DOB: 23.9.1971\n"
"POB: former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"58. Ramil Rakhmatulovich IBATULLIN\n"
"(Рамиль Рахматуллович ИБАТУЛЛИН)\n"
"Function: Russian military leader – Major-General\n"
"Commander of the 90th Guards Tank Division\n"
"DOB: 22.10.1976\n"
"POB: Bagishevo, Apastovsky District, Tatarstan, former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"59. Sergey Viktorovich KARAKAEV\n"
"(Сергей Викторович КАРАКАЕВ) \n"
"Function: Russian military leader – Colonel-General\n"
"Commander of the Strategic Missile Forces\n"
"DOB: 4.6.1961\n"
"POB: The village of Ivano-Slyusarevka, Krasnodar Territory, former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"SHEN, Weisheng, Zhejiang, China; (a.k.a \"Shen shen\") DOB 01 Nov 1957; POB Haimen, China; nationality China; Additional Sanctions Information - Subject to Secondary Sanctions; Gender Male; Passport G23381737 (China) issued 13 Jun 2007 expires 12 Jun 2017; National ID No. 330103195711011317 (China) (individual) [NPWMD] [IFSR] (Linked To: ZHEJIANG QINGJI IND. CO., LTD).\n\n"
"WEI, Zunyi (a.k.a. WEI, Zun Yi; a.k.a. \"WEI, David\"), Beijing, China; DOB 20 Dec 1975; POB Shandong, China; nationality China; Additional Sanctions Information - Subject to Secondary Sanctions; Gender Male; Passport EE1650028 (China) issued 28 Aug 2018 expires 27 Aug 2028; National ID No. 370922197512201811 (China) (individual) [NPWMD] [IFSR] (Linked To: HONG KONG KE.DO INTERNATIONAL TRADE CO., LIMITED).\n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html