import re
regex = re.compile(r"\G(?|(?:(?:\s+)(*MARK:T_WHITESPACE))|(?:(?:\/\/[^\n]*\n)(*MARK:T_COMMENT))|(?:(?:(?<=\b)true\b)(*MARK:T_BOOL_TRUE))|(?:(?:(?<=\b)false\b)(*MARK:T_BOOL_FALSE))|(?:(?:(?<=\b)null\b)(*MARK:T_NULL))|(?:(?:(?<=\b)Project\b)(*MARK:T_PROJECT))|(?:(?:(?<=\b)Table\b)(*MARK:T_TABLE))|(?:(?:(?<=\b)as\b)(*MARK:T_TABLE_ALIAS))|(?:(?:(?<=\b)(Indexes|indexes)\b)(*MARK:T_TABLE_INDEXES))|(?:(?:(Ref|ref))(*MARK:T_TABLE_REF))|(?:(?:(?<=\b)TableGroup\b)(*MARK:T_TABLE_GROUP))|(?:(?:(?<=\b)(Enum|enum)\b)(*MARK:T_ENUM))|(?:(?:(?<=\b)(primary\ske|pk)\b)(*MARK:T_TABLE_SETTING_PK))|(?:(?:(?<=\b)unique\b)(*MARK:T_TABLE_SETTING_UNIQUE))|(?:(?:(?<=\b)increment\b)(*MARK:T_TABLE_SETTING_INCREMENT))|(?:(?:(?<=\b)default\b)(*MARK:T_TABLE_SETTING_DEFAULT))|(?:(?:(?<=\b)null\b)(*MARK:T_TABLE_SETTING_NULL))|(?:(?:(?<=\b)not\snull\b)(*MARK:T_TABLE_SETTING_NOT_NULL))|(?:(?:(?<=\b)cascade\b)(*MARK:T_REF_ACTION_CASCADE))|(?:(?:(?<=\b)restrict\b)(*MARK:T_REF_ACTION_RESTRICT))|(?:(?:(?<=\b)set\snull\b)(*MARK:T_REF_ACTION_SET_NULL))|(?:(?:(?<=\b)set\default\b)(*MARK:T_REF_ACTION_SET_DEFAULT))|(?:(?:(?<=\b)no\saction\b)(*MARK:T_REF_ACTION_NO_ACTION))|(?:(?:(?<=\b)delete\b)(*MARK:T_REF_ACTION_DELETE))|(?:(?:(?<=\b)update\b)(*MARK:T_REF_ACTION_UPDATE))|(?:(?:note:)(*MARK:T_SETTING_NOTE))|(?:(?:(?<=\b)Note\b)(*MARK:T_NOTE))|(?:(?:[0-9]+\.[0-9]+)(*MARK:T_FLOAT))|(?:(?:[0-9]+)(*MARK:T_INT))|(?:(?:('{3}|[\"']{1})([^'\"][\s\S]*?)\1)(*MARK:T_QUOTED_STRING))|(?:(?:(`{1})([\s\S]+?)\1)(*MARK:T_EXPRESSION))|(?:(?:[a-zA-Z0-9_]+)(*MARK:T_WORD))|(?:(?:\\n)(*MARK:T_EOL))|(?:(?:\()(*MARK:T_LPAREN))|(?:(?:\))(*MARK:T_RPAREN))|(?:(?:{)(*MARK:T_LBRACE))|(?:(?:})(*MARK:T_RBRACE))|(?:(?:\[)(*MARK:T_LBRACK))|(?:(?:\])(*MARK:T_RBRACK))|(?:(?:\>)(*MARK:T_GT))|(?:(?:\<)(*MARK:T_LT))|(?:(?:,)(*MARK:T_COMMA))|(?:(?::)(*MARK:T_COLON))|(?:(?:\-)(*MARK:T_MINUS))|(?:(?:\.)(*MARK:T_DOT))|(?:(?:.+?)(*MARK:T_UNKNOWN)))", flags=re.DOTALL | re.UNICODE | re.MULTILINE)
test_str = ("Project test {\n"
" database_type: 'PostgreSQL'\n"
" Note: 'Description of the project'\n"
"}\n\n"
"//// -- LEVEL 1\n"
"//// -- Tables and References\n\n"
"// Creating tables\n"
"Table users as U {\n"
" id int [pk, unique, increment] // auto-increment\n"
" full_name varchar [not null, unique, default: 1]\n"
" created_at timestamp\n"
" country_code int\n"
" type int\n"
" note int\n"
" Note: 'khong hieu duoc'\n"
"}\n\n"
"Table merchants {\n"
" id int\n"
"}\n\n"
"Table countries {\n"
" code int [pk]\n"
" name varchar\n"
" continent_name varchar\n"
"}\n\n"
"// Creating references\n"
"// You can also define relationship separately\n"
"// > many-to-one; < one-to-many; - one-to-one\n"
"Ref{\n"
" U.country_code > countries.code\n"
" merchants.country_code > countries.code\n"
"}\n\n"
"//----------------------------------------------//\n\n"
"//// -- LEVEL 2\n"
"//// -- Adding column settings\n\n"
"Table order_items {\n"
" order_id int [ref: > orders.id]\n"
" product_id int\n"
" quantity int [default: 1] // default value\n"
"}\n\n"
"Ref: order_items.product_id > products.id\n\n"
"Table orders {\n"
" id int [pk] // primary key\n"
" user_id int [not null, unique]\n"
" status varchar\n"
" created_at varchar [note: '''When order created'''] // add column note\n"
"}\n\n"
"Table int {\n"
" id int\n"
"}\n\n"
"//----------------------------------------------//\n\n"
"//// -- Level 3\n"
"//// -- Enum, Indexes\n\n"
"// Enum for 'products' table below\n"
"Enum products_status {\n"
" out_of_stock\n"
" in_stock\n"
" running_low [note: 'less than 20'] // add column note\n"
"}\n\n"
"// Indexes: You can define a single or multi-column index\n"
"Table products {\n"
" id int [pk]\n"
" name varchar\n"
" merchant_id int [not null]\n"
" price int\n"
" status products_status\n"
" created_at datetime [default: `now()`]\n\n"
" Indexes {\n"
" (merchant_id, status) [name:'product_status', type: hash]\n"
" id [unique]\n"
" }\n"
"}\n\n"
"Ref: products.merchant_id > merchants.id // many-to-one\n\n"
"TableGroup hello_world {\n"
" just_test\n"
" just_a_test\n"
"}\n")
matches = regex.finditer(test_str)
for match_num, match in enumerate(matches, start=1):
print(f"Match {match_num} was found at {match.start()}-{match.end()}: {match.group()}")
for group_num, group in enumerate(match.groups(), start=1):
print(f"Group {group_num} found at {match.start(group_num)}-{match.end(group_num)}: {group}")
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html