#include <StringConstants.au3> ; to declare the Constants of StringRegExp
#include <Array.au3> ; UDF needed for _ArrayDisplay and _ArrayConcatenate
Local $sRegex = "(?mux)^" & @CRLF & _
"(?:" & @CRLF & _
" [ \t]*[(][ \t]*" & @CRLF & _
" (\d+)" & @CRLF & _
" [ \t]*,[ \t]*" & @CRLF & _
" (\d+)" & @CRLF & _
" [ \t]*,[ \t]*" & @CRLF & _
" (\S+)" & @CRLF & _
" [ \t]*[)][ \t]*" & @CRLF & _
")?" & @CRLF & _
"$"
Local $sString = "( 0, 12, Tokenization ) " & @CRLF & _
"( 13 , 15 , is ) " & @CRLF & _
" ( 16, 22, widely)" & @CRLF & _
"( 23, 31, regarded )" & @CRLF & _
"(32, 34, as )" & @CRLF & _
"(35, 36, a) " & @CRLF & _
"(37, 43, solved ) " & @CRLF & _
"(44, 51, problem)" & @CRLF & _
"(52, 55, due)" & @CRLF & _
"(56, 58, to)" & @CRLF & _
"(59, 62, the)" & @CRLF & _
"(63, 67, high)" & @CRLF & _
"(68, 76, accuracy)" & @CRLF & _
"(77, 81, that)" & @CRLF & _
"(82, 91, rulebased)" & @CRLF & _
"(92, 102, tokenizers)" & @CRLF & _
"(103, 110, achieve)" & @CRLF & _
"(110, 111, .)" & @CRLF & _
"" & @CRLF & _
"(0, 3, But)" & @CRLF & _
"(4, 14, rule-based)" & @CRLF & _
"(15, 25, tokenizers)" & @CRLF & _
"(26, 29, are)" & @CRLF & _
"(30, 34, hard)" & @CRLF & _
"(35, 37, to)" & @CRLF & _
"(38, 46, maintain)" & @CRLF & _
"(47, 50, and)" & @CRLF & _
"(51, 56, their)" & @CRLF & _
"(57, 62, rules)" & @CRLF & _
"(63, 71, language)" & @CRLF & _
"(72, 80, specific)" & @CRLF & _
"(80, 81, .)" & @CRLF & _
"" & @CRLF & _
"(0, 2, We)" & @CRLF & _
"(3, 7, show)" & @CRLF & _
"(8, 12, that)" & @CRLF & _
"(13, 17, high)" & @CRLF & _
"(18, 26, accuracy)" & @CRLF & _
"(27, 31, word)" & @CRLF & _
"(32, 35, and)" & @CRLF & _
"(36, 44, sentence)" & @CRLF & _
"(45, 57, segmentation)" & @CRLF & _
"(58, 61, can)" & @CRLF & _
"(62, 64, be)" & @CRLF & _
"(65, 73, achieved)" & @CRLF & _
"(74, 76, by)" & @CRLF & _
"(77, 82, using)" & @CRLF & _
"(83, 93, supervised)" & @CRLF & _
"(94, 102, sequence)" & @CRLF & _
"(103, 111, labeling)" & @CRLF & _
"(112, 114, on)" & @CRLF & _
"(115, 118, the)" & @CRLF & _
"(119, 128, character)" & @CRLF & _
"(129, 134, level)" & @CRLF & _
"(135, 143, combined)" & @CRLF & _
"(144, 148, with)" & @CRLF & _
"(149, 161, unsupervised)" & @CRLF & _
"(162, 169, feature)" & @CRLF & _
"(170, 178, learning)" & @CRLF & _
"(178, 179, .)" & @CRLF & _
"" & @CRLF & _
"(0, 2, We)" & @CRLF & _
"(3, 12, evaluated)" & @CRLF & _
"(13, 16, our)" & @CRLF & _
"(17, 23, method)" & @CRLF & _
"(24, 26, on)" & @CRLF & _
"(27, 32, three)" & @CRLF & _
"(33, 42, languages)" & @CRLF & _
"(43, 46, and)" & @CRLF & _
"(47, 55, obtained)" & @CRLF & _
"(56, 61, error)" & @CRLF & _
"(62, 67, rates)" & @CRLF & _
"(68, 70, of)" & @CRLF & _
"(71, 75, 0.27)" & @CRLF & _
"(76, 77, ‰)" & @CRLF & _
"(78, 79, ()" & @CRLF & _
"(79, 86, English)" & @CRLF & _
"(86, 87, ))" & @CRLF & _
"(87, 88, ,)" & @CRLF & _
"(89, 93, 0.35)" & @CRLF & _
"(94, 95, ‰)" & @CRLF & _
"(96, 97, ( )" & @CRLF & _
"(97, 102, Dutch)" & @CRLF & _
"(102, 103, ) )" & @CRLF & _
"(104, 107, and)" & @CRLF & _
"(108, 112, 0.76)" & @CRLF & _
"(113, 114, ‰)" & @CRLF & _
"(115, 116, ()" & @CRLF & _
"(116, 123, Italian)" & @CRLF & _
"(123, 124, ))" & @CRLF & _
"(125, 128, for)" & @CRLF & _
"(129, 132, our)" & @CRLF & _
"(133, 137, best)" & @CRLF & _
"(138, 144, models)" & @CRLF & _
"(144, 145, .)"
Local $aArray = StringRegExp($sString, $sRegex, $STR_REGEXPARRAYGLOBALFULLMATCH)
Local $aFullArray[0]
For $i = 0 To UBound($aArray) -1
_ArrayConcatenate($aFullArray, $aArray[$i])
Next
$aArray = $aFullArray
; Present the entire match result
_ArrayDisplay($aArray, "Result")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for AutoIt, please visit: https://www.autoitscript.com/autoit3/docs/functions/StringRegExp.htm