import re
regex = re.compile(r"""
([a-z]\s*)?
(
(
[零幺一二两三四五六七八九十百千万点比]
|[零一二三四五六七八九十][ ]
|(?<=[一二两三四五六七八九十])[年月日号]
|(分之)
)+
(
(?<=[一二两三四五六七八九十])[a-zA-Z年月日号个只分万亿秒]
|(?<=[一二两三四五六七八九十]\s)[a-zA-Z]
)?
(?(1)
|(?(5)
|(
[零幺一二两三四五六七八九十百千万亿点比]
|(分之)
)
)+
)
)
""", flags=re.VERBOSE | re.IGNORECASE)
test_str = ("压缩软件七 zip \n"
"这是一部 iPhone 四 s \n"
"这是一部苹果四 s\n"
"我要玩 gta五\n"
"索尼相机 mark 五\n"
"三星 s 七\n"
"数字测试一万三千七百零二\n"
"不管三七二十一\n"
"九九八十一难\n"
"时间测试二零一八年五月二十三号上午十点十分\n"
"一万零七十六\n"
"百分之三十二点六八\n"
"比分测试十七比十六\n"
"比分测试三十七比十六\n"
"一点一\n"
"一点一滴\n"
"八九十\n"
"一个人一定要\n"
"一五八七三六九零\n"
"二七九三零一二二\n"
"八五三零七四九九\n"
"二六一四九七八七\n"
"一五九六四八六二\n"
"四五六九八七二三\n"
"四八六一五九六四\n"
"一七八六四五八九\n"
"一二三加四五六\n"
"九七八六加三三八四\n"
"发电站每天发电三万零二十九度电\n"
"银行月交易总额七千九百零三亿元\n"
"深圳每月平均工资在一万三千元\n"
"每月房租要交一千伍佰元\n"
"我每月交通费用在四百元左右\n"
"本月开销费用是五万一千三百二十八元\n"
"如果你中了五千万元奖金会分我一半吗\n"
"这个月工资我发了三千五百二十九元\n"
"学会了这个技能你至少可以涨薪五千元\n"
"我们的会议时间定在九点二十五分开始\n"
"上课时间是八点十五分请不要迟到\n"
"昨天你九点二十一分才到教室\n"
"今天是二零一九年一月三十一号\n"
"今年的除夕夜是二零一九年二月四号\n"
"这根水管的长度不超过三十五米\n"
"四百米是最短的长跑距离\n"
"最高的撑杆跳为十一米\n"
"等会请在十二点零五分请通知我\n"
"二十三点十五分开始\n"
"你生日那天我会送你九百九十九朵玫瑰\n"
"给我一双鞋我可以跳九十六米远\n"
"虽然我们的身高相差三百五十六毫米也不影响我们交往\n"
"我们班的最高总分为五百八十三分\n"
"今天考试老师多扣了我二十一分\n"
"我量过这张桌子总长为一点三七米\n"
"乘务员身高必须超过一百八十五公分\n"
"这台电脑分辨率为一零二四\n"
"手机价格不超过一千五百元\n"
"一百零一点二三\n"
"一百二十三点一一六\n"
"四百五十六点一四七\n"
"零点一五九四\n"
"三点一四一五\n"
"零点一一二二三三\n"
"零点一\n"
"四万零一点九八七\n"
"五十六点八七八\n"
"零点零零一二三\n"
"零点零零零一\n"
"零点九二零一五\n"
"九百九十九点零零零一\n"
"一万点一二三\n"
"六百六十六点五五五\n"
"四百四十四点七八九\n"
"七百八十九点六六六\n"
"零点一二三四五\n"
"一点零五六四九\n"
"环比上调百分之一点八六\n"
"环比分别下跌百分之三点四六及微涨百分之零点七零\n"
"单价在三万元的二手房购房个案当中\n"
"六月仍有百分之七单价在三万元的房源\n"
"最终也只是以总积分一分之差屈居第二\n"
"中新网八月二十九日电今日\n"
"自六月底呼和浩特市率先宣布取消限购后\n"
"仅一个多月的时间里\n"
"除了北京上海广州深圳四个一线城市和三亚之外\n"
"四十六个限购城市当中\n"
"四十一个已正式取消或变相放松了限购\n"
"其中包括对拥有一套住房并已结清相应购房贷款的家庭\n"
"这个后来被称为九三零新政策的措施\n"
"今年有望超三百亿美元\n"
"就连一向看多的任志强\n"
"近期也一反常态地发表看空言论\n"
"这块黄金重达三百二十四点七五克\n"
"她出生于八六年八月十八日她弟弟出生于一九九五年三月一日\n"
"电影中梁朝伟扮演的陈永仁的编号二七一四九\n"
"现场有十二分之七的观众投出了赞成票\n"
"随便来几个价格十二块五和三十四点五元和二十点一万\n"
"明天有百分之六十二的概率降雨\n"
"这是固话零四二一三三四四一一二二\n"
"这是手机八六一八五四四一三九一二一\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html