# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^\d*\n(.*)\n([^\d].*|)"
test_str = ("1\n"
"Mecca (مكة)\n"
"Trading hub and sanctuary in pre-Islamic Arabia; holy city to Muslims; current capital of Makkah Province, Saudi Arabia\n"
"2\n"
"Medina (المدينة)\n"
"Political seat of Muhammad, and first capital of the Rashidun Caliphate; current capital of Al Madinah Province, Saudi Arabia\n"
"3\n"
"Damascus (دمشق)\n"
"Capital of the Umayyad dynasty; current capital of Syria\n"
"4\n"
"Baghdad (بغداد)\n"
"Second capital of the Abbasid dynasty, and actual seat of Harun al-Rashid; current capital of Iraq\n"
"5\n"
"Najran (نجران)\n"
"Christian center in 5th-7th century Arabia; current capital of Najran Province, Saudi Arabia \n"
"6\n"
"Kufah (Ø§Ù„ÙƒÙˆÙØ©)\n"
"Second capital of the Rashidun Caliphate under Ali's rule; first capital of the Abbasid dynasty; city in modern-day Iraq\n"
"7\n"
"Basra (البصرة)\n"
"City in Iraq\n"
"8\n"
"Khurasan (خراسان)\n"
"Region corresponding to modern Afghanistan and northeast Iran\n"
"9\n"
"Anjar (عنجر)\n"
"City in Lebanon\n"
"10\n"
"Fustat (Ø§Ù„ÙØ³Ø·Ø§Ø·)\n"
"Old city of Cairo\n"
"11\n"
"Aden (عدن)\n"
"City in Yemen\n"
"12\n"
"Yamama (اليمامة)\n"
"Modern Najd region of Saudi Arabia\n"
"13\n"
"Muscat (مسقط)\n"
"Capital of Oman\n"
"14\n"
"Mansura (المنصورة)\n"
"City in Egypt\n"
"15\n"
"Bukhara (بخارى)\n"
"Modern day Buxoro, capital of Buxoro Province, Uzbekistan\n"
"16\n"
"Fez (ÙØ§Ø³)\n"
"Shared with Morocco, not buildable if they are in the game\n"
"17\n"
"Shiraz (شيراز)\n"
"Capital of FÄrs Province, Iran\n"
"18\n"
"Merw (ميرÙ)\n"
"Modern day Mary; capital of Mary Province, Turkmenistan\n"
"19\n"
"Balkh (بلخ)\n"
"City in Afghanistan\n"
"20\n"
"Mosul (الموصل)\n"
"City in Iraq\n"
"21\n"
"Aydab (؟؟؟؟؟)\n"
"22\n"
"Bayt Ras (؟؟؟؟؟؟)\n"
"23\n"
"Suhar (ØµØØ§Ø±)\n"
"City in Oman\n"
"24\n"
"Taif (طائÙ)\n"
"City in Saudi Arabia\n"
"25\n"
"Hama (ØÙ…اة)\n"
"Capital of HamÄh Governorate, Syria\n"
"26\n"
"Tabuk (تبوك)\n"
"Capital of Tabūk Province, Saudi Arabia\n"
"27\n"
"Sana'a (صنعاء)\n"
"Capital of Yemen\n"
"28\n"
"Shihr (Ø§Ù„Ø´ØØ±)\n"
"City in Yemen\n"
"29\n"
"Tripoli (طرابلس)\n"
"Capital of Libya\n"
"30\n"
"Tunis (تونس)\n"
"Capital of Tunisia\n"
"31\n"
"Kairouan (القيروان)\n"
"City in Tunisia\n"
"32\n"
"Algiers (الجزائر)\n"
"Capital of Algeria\n"
"33\n"
"Oran (وهران)\n"
"City in Algeria\n"
"34\n"
"Tangier (طنجة)\n"
"Shared with Morocco, not buildable if they are in the game\n"
"35\n"
"Casablanca (الدار البيضاء)\n"
"Shared with Morocco, not buildable if they are in the game\n"
"36\n"
"Marrakech (مراكش)\n"
"Shared with Morocco, not buildable if they are in the game")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html