# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?:Term\sFreq\sVector:\s<br>(?:(([a-zA-Z0-9_\.]+)):([0-9]+),\s(?:[0-9\.]+)\s?)+(:?.*))+"
test_str = ("<ul><li><div class=\"ResultURLStyle\"><a href=\"DocumentViewer?id=147522\" target=\"_blank\">FR941206-0-00005</a></div><div class=\"ResultURLStyle\">Term Freq Vector: <br>bone:39, 169.98874 meat:24, 126.80792 separ:13, 17.930304 remov:12, 20.017244 tissu:10, 449.1114 fsi:9, 2708.487 mechan:9, 30.070314 muscl:9, 196.8524 debon:8, 8658.278 hand:7, 9.698391 ms:7, 60.36747 high:5, 5.665258 knive:5, 1002.19165 machineri:5, 108.53987 product:5, 6.3933544 result:5, 5.424931 health:4, 16.835772 includ:4, 3.6064584 livestock:4, 264.0775 manufactur:4, 14.436775 safeti:4, 26.27114 advanc:3, 16.430393 aspect:3, 35.604355 conform:3, 88.36456 crush:3, 159.37085 determin:3, 9.746535 differ:3, 18.565628 emerg:3, 14.869228 finish:3, 26.112677 machin:3, 41.111153 natur:3, 13.428124 oper:3, 4.980621 physic:3, 43.387413 process:3, 9.041892 pulver:3, 3971.0903 recoveri:3, 24.429 regul:3, 11.099191 report:3, 3.6940117 skelet:3, 2445.162 speed:3, 36.789845 surfac:3, 50.190536 system:3, 6.8322706 action:2, 7.82278 cfr:2, 23.517454 consist:2, 19.387527 deriv:2, 68.228264 descript:2, 65.22043 e.g:2, 113.28936 equip:2, 17.05211 essenti:2, 32.713223 expect:2, 5.1537876 grind:2, 448.72983 held:2, 8.619702 inspect:2, 28.522709 intact:2, 240.72699 intend:2, 16.388079 knife:2, 354.94287 marrow:2, 1814.9656 materi:2, 16.067507 miner:2, 88.914986 obtain:2, 19.321566 occur:2, 23.650143 particl:2, 528.155 past:2, 8.308373 press:2, 12.855492 pressur:2, 13.348708 recogn:2, 40.658585 scrape:2, 615.56525 shave:2, 516.2805 siev:2, 3181.6567 techniqu:2, 69.439255 tradit:2, 19.159653 wizard:2, 1281.9296 address:1, 14.368437 agenc:1, 9.194739 amount:1, 11.925466 appli:1, 14.673825 applic:1, 13.707986 attach:1, 52.235683 background:1, 41.210598 believ:1, 11.743041 boneless:1, 4126.211 broken:1, 63.51834 classifi:1, 130.11949 clerk:1, 191.49927 close:1, 6.44453 comminut:1, 11237.341 commonli:1, 226.57872 connect:1, 22.29631 consequ:1, 42.92896 constitu:1, 81.44256 consum:1, 18.714964 content:1, 38.61347 contrast:1, 42.265926 copi:1, 26.827602 counter:1, 49.46661 cover:1, 13.109487 data:1, 20.2957 decad:1, 24.477684 detail:1, 16.240429 develop:1, 4.924017 devic:1, 61.527843 difficulti:1, 26.50316 docket:1, 59.840813 effect:1, 6.4812245 effici:1, 29.289873 emploi:1, 15.682959 emul:1, 500.1468 entitl:1, 51.23739 establish:1, 8.490146 exerci:1, 34.371666 fed:1, 103.094864 final:1, 7.307171 forc:1, 6.1383395 fr:1, 19.05596 fragment:1, 177.5311 fundament:1, 41.453182 good:1, 6.0453844 ground:1, 17.304644 hard:1, 14.797988 ii:1, 25.01326 incorpor:1, 49.666634 involv:1, 8.61141 led:1, 12.976781 linkag:1, 504.92828 loin:1, 3827.2102 longstand:1, 469.8888 made:1, 3.9924936 maintain:1, 12.610849 manner:1, 37.199253 minut:1, 20.582813 modern:1, 32.888412 monitor:1, 33.31788 normal:1, 30.871815 occurr:1, 348.61716 offic:1, 5.2373466 open:1, 6.2924285 panel:1, 62.253063 passag:1, 29.371315 percept:1, 124.85934 piec:1, 38.94087 possibl:1, 16.787077 potenti:1, 14.408812 practic:1, 12.41987 pre:1, 18.724916 preci:1, 45.311855 procedur:1, 18.64559 public:1, 4.9878173 qualiti:1, 18.36997 recommend:1, 20.264551 render:1, 114.96626 resist:1, 36.608788 rib:1, 532.95154 safe:1, 43.083042 screen:1, 59.530544 select:1, 21.929705 shape:1, 45.339085 sight:1, 80.49916 similar:1, 15.7118845 small:1, 9.254025 soft:1, 88.9749 stride:1, 322.43896 structur:1, 17.082443 supermarket:1, 152.42569 support:1, 5.8038373 term:1, 6.77817 touch:1, 39.420437 tremend:1, 113.143745 type:1, 6.134703 unavoid:1, 386.92673 volum:1, 23.309868 wholesom:1, 1496.189</div><div class=\"ResultSnippetStyle\">past decad fsi monitor tremend stride modern meat <B>bone</B> separ machineri fsi determin meat <B>bone</B> separ skelet muscl tissu <B>bone</B> remov muscl tissu livestock <B>bone</B> machin classifi meat <B>bone</B> separ develop emul physic action hand held high speed knive remov skelet muscl tissu <B>bone</B> e.g wizard knife advanc meat <B>bone</B> separ led recoveri system separ meat <B>bone</B> crush grind pulver <B>bone</B> meat remov shave press scrape muscl tissu <B>bone</B> surfac similar action hand held high speed knive meat obtain manner obtain tradit</div></li><li><div class=\"ResultURLStyle\"><a href=\"DocumentViewer?id=142466\" target=\"_blank\">FR940303-1-00058</a></div><div class=\"ResultURLStyle\">Term Freq Vector: <br>bone:36, 169.98874 meat:23, 126.80792 mechan:12, 30.070314 tissu:11, 449.1114 debon:10, 8658.278 separ:9, 17.930304 ms:8, 60.36747 muscl:8, 196.8524 advanc:7, 16.430393 machineri:7, 108.53987 process:7, 9.041892 materi:6, 16.067507 recoveri:6, 24.429 regul:6, 11.099191 remov:6, 20.017244 skelet:6, 2445.162 adher:5, 111.75518 fsi:5, 2708.487 livestock:5, 264.0775 result:5, 5.424931 system:5, 6.8322706 carcass:4, 1509.0143 crush:4, 159.37085 hand:4, 9.698391 health:4, 16.835772 product:4, 6.3933544 pulver:4, 3971.0903 report:4, 3.6940117 safeti:4, 26.27114 soft:4, 88.9749 tradit:4, 19.159653 appli:3, 14.673825 aspect:3, 35.604355 deriv:3, 68.228264 equip:3, 17.05211 grind:3, 448.72983 hard:3, 14.797988 high:3, 5.665258 includ:3, 3.6064584 oper:3, 4.980621 pressur:3, 13.348708 size:3, 20.973513 type:3, 6.134703 amount:2, 11.925466 applic:2, 13.707986 attach:2, 52.235683 believ:2, 11.743041 broken:2, 63.51834 cfr:2, 23.517454 consist:2, 19.387527 differ:2, 18.565628 emerg:2, 14.869228 final:2, 7.307171 finish:2, 26.112677 fr:2, 19.05596 gener:2, 4.0578303 inspect:2, 28.522709 intact:2, 240.72699 intend:2, 16.388079 knive:2, 1002.19165 machin:2, 41.111153 manufactur:2, 14.436775 marrow:2, 1814.9656 part:2, 4.197703 particl:2, 528.155 raw:2, 78.95874 siev:2, 3181.6567 small:2, 9.254025 start:2, 6.998397 techniqu:2, 69.439255 accur:1, 81.00537 address:1, 14.368437 agenc:1, 9.194739 anatom:1, 4632.9385 apertur:1, 3162.6047 background:1, 41.210598 button:1, 304.23676 capabl:1, 33.452938 characterist:1, 67.45274 classif:1, 232.36032 clerk:1, 191.49927 compar:1, 11.829044 compat:1, 171.4789 composit:1, 60.471146 conform:1, 88.36456 consequ:1, 42.92896 content:1, 38.61347 contrast:1, 42.265926 convent:1, 42.03048 copi:1, 26.827602 cover:1, 13.109487 current:1, 9.162041 data:1, 20.2957 depend:1, 18.96904 detail:1, 16.240429 determin:1, 9.746535 devic:1, 61.527843 due:1, 11.871586 e.g:1, 113.28936 effect:1, 6.4812245 effici:1, 29.289873 emploi:1, 15.682959 entitl:1, 51.23739 essenti:1, 32.713223 fact:1, 10.2055 feather:1, 544.4897 fed:1, 103.094864 feder:1, 7.2581663 forc:1, 6.1383395 fragment:1, 177.5311 handl:1, 25.016815 hear:1, 20.273108 ii:1, 25.01326 incorpor:1, 49.666634 interconnect:1, 575.3322 involv:1, 8.61141 label:1, 72.55873 limit:1, 10.105716 link:1, 17.927868 loin:1, 3827.2102 manner:1, 37.199253 medium:1, 45.703964 method:1, 25.768686 miner:1, 88.914986 natur:1, 13.428124 occur:1, 23.650143 offic:1, 5.2373466 open:1, 6.2924285 origin:1, 16.382486 panel:1, 62.253063 pass:1, 15.116921 passag:1, 29.371315 past:1, 8.308373 physic:1, 43.387413 piec:1, 38.94087 potenti:1, 14.408812 powder:1, 273.23074 pre:1, 18.724916 press:1, 12.855492 public:1, 4.9878173 push:1, 24.280756 qualiti:1, 18.36997 recommend:1, 20.264551 reflect:1, 15.122981 relev:1, 43.739544 resist:1, 36.608788 rib:1, 532.95154 scrape:1, 615.56525 screen:1, 59.530544 select:1, 21.929705 shave:1, 516.2805 smaller:1, 33.1714 suffici:1, 30.671022 support:1, 5.8038373 surfac:1, 50.190536 technolog:1, 17.411894 term:1, 6.77817 typic:1, 48.848965 understand:1, 17.468332 vari:1, 63.16888 volum:1, 23.309868 warrant:1, 71.410</div><div class=\"ResultSnippetStyle\">contrast mechan tradit mechan debon machin ms result involv mechan separ remov <B>bone</B> attach skelet muscl livestock applic high pressur crush grind pulver <B>bone</B> meat remov high pressur forc result past siev separ <B>bone</B> particl fragment result crush pulver <B>bone</B> process due mechan machineri manufactur ms <B>bone</B> <B>bone</B> particl includ <B>bone</B> marrow incorpor finish product regul ms 9 cfr 319.5 type equip separ remov <B>bone</B> intend cover product manufactur machineri oper differ resist hard <B>bone</B> soft tissu passag</div></li></ul>\n")
matches = re.finditer(regex, test_str, re.IGNORECASE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html