# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^[\p{Pd}\xAD\x{2212}]+"
test_str = ("-s < intorstring >\n"
"-t < string >\n"
"(GTF.featureType)\n"
"Specify the feature type. Only rows which have the matched\n"
"feature type in the provided GTF annotation file will be included for read counting. ‘exon’ by default.\n"
"-T < int >\n"
"(nthreads)\n"
"Number of the threads. The value should be between 1 and\n"
"32. 1 by default.\n"
"-v Output version of the program.\n"
"−−byReadGroup\n"
"(byReadGroup)\n"
"Count reads by read group. Read group information is identified from the header of BAM/SAM input files and the generated count table will include counts for each group in each\n"
"library.\n"
"−−donotsort\n"
"(autosort)\n"
"If specified, paired end reads will not be re-ordered even if\n"
"reads from the same pair were found not to be next to each\n"
"other in the input.\n"
"−−extraAttributes\n"
"< string >\n"
"(GTF.attrType.extra)\n"
"Extract extra attribute types from the provided GTF annotation and include them in the counting output. These attribute\n"
"types will not be used to group features. If more than one attribute type is provided they should be separated by comma\n"
"(in Rsubread featureCounts its value is a character vector).\n"
"−−fraction\n"
"(fraction)\n"
"Assign fractional counts to features. This option must be used\n"
"together with ‘-M’ or ‘-O’ or both. When ‘-M’ is specified,\n"
"each reported alignment from a multi-mapping read (identified via ‘NH’ tag) will carry a count of 1/x, instead of 1 (one),\n"
"where x is the total number of alignments reported for the\n"
"same read. When ‘-O’ is specified, each overlapping feature\n"
"will receive a count of 1/y, where y is the total number of\n"
"features overlapping with the read. When both ‘-M’ and ‘-O’\n"
"are specified, each alignment will carry a count of 1/(x*y)")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html