# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^\s+$\n^([ \t]+)Summary.*(?:\n\1[ \t]*\S.*)+"
test_str = ("\n"
"COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The\n"
" reference sequence was derived from AC105339.9 and FJ695193.1.\n"
" This sequence is a reference standard in the RefSeqGene project.\n"
" \n"
" Summary: Adaptor protein complex 3 (AP-3 complex) is a\n"
" heterotrimeric protein complex involved in the formation of\n"
" clathrin-coated synaptic vesicles. The protein encoded by this gene\n"
" represents the beta subunit of the neuron-specific AP-3 complex and\n"
" was first identified as the target antigen in human paraneoplastic\n"
" neurologic disorders. The encoded subunit binds clathrin and is\n"
" phosphorylated by a casein kinase-like protein, which mediates\n"
" synaptic vesicle coat assembly. Defects in this gene are a cause of\n"
" early-onset epileptic encephalopathy. [provided by RefSeq, Feb\n"
" 2017].\n"
" \n"
" Another paragrph\n\n"
"PRIMARY REFSEQ_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP\n"
" 1-35060 AC105339.9 88079-123138\n"
" 35061-35259 FJ695193.1 1-199 c\n"
" 35260-57628 AC105339.9 123337-145705\n")
subst = ""
# You can manually specify the number of replacements by changing the 4th argument
result = re.sub(regex, subst, test_str, 0, re.MULTILINE)
if result:
print (result)
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html