# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(\S+)=[\"']?((?:.(?![\"']?\s+(?:\S+)=|[>\"']))+.)[\"']?"
test_str = ("<article>\n"
" <main>\n"
" <p>##유비벨록스##가52주 신고가를 경신했다.전일 대비2.24%상승한8,680원에 거래되고 있다.</p>\n"
" <img src=\"http://chart.finance.dunamu.com/images/kr/candle/d/A089850.png \" >\n"
" <h2>유비벨록스 연관 테마</h2>\n"
" <div>\n"
" <img align=\"left\" src=\"https://s3-ap-northeast-2.amazonaws.com/stockplus-seoul/themes/images/000/000/260/medium/%EC%8A%A4%EB%A7%88%ED%8A%B8%EC%B9%B4_%EB%B6%80%ED%92%88_%EC%88%98%EC%A0%95.jpg?1459836459\" />\n"
" <div>\n"
" <p>##유니트론텍##,##삼화콘덴서##,##모바일어플라이언스##,##텔레칩스##,##모트렉스##,##만도##,##THE MIDONG##,##유니퀘스트##,##에이스테크##,##캠시스##,##이에스브이##,##유비벨록스##,##팅크웨어##,##픽셀플러스##,##한컴MDS##,##인포뱅크##,##켐트로닉스##,##삼보모터스##,##세종공업##,##엔지스테크널러지##,##앤씨앤##,##라닉스##,##코리아에프티##</p>\n"
" </div>\n"
" </div>\n"
" <p>※ 토픽 데이터 출처: 증권플러스</p>\n"
" <p>유비벨록스의 최근 1주일간 외국인?기관 매매내역을 살펴보면 외국인은 15,024주를순매수했으며,기관은 79주를순매도했다. 같은 기간 주가는0.71상승했다.</p>\n"
" <section class=\"table\">\n"
" <p>(단위: 원)</p>\n"
" <div>\n"
" <div>날짜</div>\n"
" <div>주가</div>\n"
" <div>등락률</div>\n"
" </div>\n"
" <div>\n"
" <div>10월 18일</div>\n"
" <div>8,490</div>\n"
" <div>\n"
" +3.54%</div>\n"
" </div>\n"
" <div>\n"
" <div>10월 17일</div>\n"
" <div>8,200</div>\n"
" <div>\n"
" -2.96%</div>\n"
" </div>\n"
" <div>\n"
" <div>10월 16일</div>\n"
" <div>8,450</div>\n"
" <div>\n"
" +0.36%</div>\n"
" </div>\n"
" <div>\n"
" <div>10월 15일</div>\n"
" <div>8,420</div>\n"
" <div>\n"
" -0.12%</div>\n"
" </div>\n"
" <div>\n"
" <div>10월 14일</div>\n"
" <div>8,430</div>\n"
" <div>\n"
" +0.60%</div>\n"
" </div>\n"
" </section>\n"
" </main>\n"
" <p>[이 기사는 조선비즈와 증권플러스(두나무)가 자체 개발한 로봇 기자인 'C-Biz봇'이 실시간으로 작성했습니다.]</p>\n"
"</article>")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html