use strict;
my $str = 'hoạ
hoà
haọ
keó
meọ
coí
gían
gíong
gíêng
qúa
qúan
gìau
gìêng
chụyên
chuỵên
lúyên
luỵên
túê
tụê
hòang
hùynh
tòan
tươí
toaì
tóai
chuyện
nguyễn
hào
pháo
táo
tợn
huỳnh
hoàng
choáng
láng
ngáng';
my $regex = qr/((g[íìỉĩị]|q[úùủũụứừửữự])[aăâeêoôơuưiy])[a-z]*|([bcdfhjklmnprstvxzđ]*[aăâeoôơuưiy][áàảãạắằẳẵặấầẩẫậéèẻẽẹóòỏõọốồổỗộúùủũụứừửữựýỳỷỹỵíìỉĩị])$|([a-zđ]*[áàảãạắằẳẵặấầẩẫậéèẻẽẹóòỏõọốồổỗộúùủũụứừửữựýỳỷỹỵíìỉĩị][aăâêeoôơuưiy][aăâeêoôơuưiybcdfghklmnpqrstvx]+)|([bcdfghjklmnpqrstvxzđ]*[aăâêeoôơuưiy][aăâêeoôơuưiy][áàảãạắằẳẵặấầẩẫậéèẻẽẹóòỏõọốồổỗộúùủũụứừửữựýỳỷỹỵíìỉĩị][bcdfghjklmnpqrstvxz]*)|([a-zăâôơưđ]*[áàảãạắằẳẵặấầẩẫậóòỏõọốồổỗộờớởỡợúùủũụứừửữựýỳỷỹỵíìỉĩị][a-z]*ê[a-zăâôơư]*)|([a-zăâôêưđ]*[áàảãạắằẳẵặấầẩẫậếềểễệóòỏõọốồổỗộúùủũụứừửữựýỳỷỹỵíìỉĩị][a-z]*ơ[a-zăâôơư]*)/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html