# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
# Chess game validation regex (PCRE)
(?!^(?!rnbqkbnr\np{8}(?:\n-{8}){4}\nP{8}\nRNBQKBNR\n\n)) # assert initial state of board, if this is the first move
(?>
(\n)? # \1 = capture \n if black moves next (ensuring that white moves first), or unset/NPCG if white moves next
| # subroutines:
((.|\n(?=.)){2}) # (?3) = for moving within the board, without wrapping to the next board, (?2) = (?3){2}
((?= # (?4) = assert that position of just-consumed piece is vacated on the next turn
(\X{72}) # (?5) = skip to the position of the just-consumed piece on the next turn
-))
((?! # (?6) = assert that the piece at the current position belongs to the current player
((?=(?(1)[-A-Z]|[-a-z]))) # (?7) = assert that the piece at the current position belongs to the current player's opponent or is empty
))
((?5)(?(?=(.*\n)\n)[qnrb]|p)) # (?8) = black pawn that might be promoted, (?9) = .*\n
( # (?10) = assert that the just-consumed piece never moved in all past turns
(?<=(.))
((?<!(?!_|\11(?12))\PM{74}))
)
((?5)(?(?=(?9){8}\n)[QNRB]|P)) # (?13) = white pawn that might be promoted
)
( # check if we're in check; \14 will be captured and empty if we're not in check, or unset if we are; call (?14) to assert we're not in check on the next turn
(?!
\X{,70}
(?:
# pawns (capture diagonally)
(?(1)k|p)(?=(?3){7}(?2)?(?(1)P|K)) |
# bishops, rooks, queens, or knights
(?i:
(?<E>(?6)K)? # decide between scanning forward (<E> is unset) or backwards (<E> is captured)
(?:
(?(E)|((?7)[BQ])) (?<B>()?((?(-1)-)(?3){7}(?(-2)(?2)))+) (?(E)(?-4)) | # bishops or queens
(?(E)|((?7)[RQ])) (?<R>-*|((?(-1)-)(?3){8})+) (?(E)(?-3)) | # rooks or queens
(?(E)|((?7) N )) (?<N>(?<=..)(?2){3}|(?=.)(?2){5}|(?2){8}(?2)?) (?(E)(?-2)) # knights
)
(?(E)|(?&E)) |
K(?<K>(?3){7,9})?K # kings
)
)
)
)??
(?>
(?>
# Handle squares that don't change (empty->empty or pieces that doesn't move)
(.)(?=(?5)\g{-1}) |
# Handle a piece that moves (and optionally captures an enemy piece)
(?(m)$) # allow only one move to be made per turn
(?>
(?(1)
(?:p(?4)(?: # black pawn
(?=(?3){8}((?3){9})? - (?8))(?(-1)(?=(?9){7}\n)) | # move 1 or 2 spaces forward
(?=(?3){7}(?2)? [A-Z](?8)) ) | # capture diagonally
(?=\X{73}--(?9){4}\n)(?|P(?=(p))|pP())(?<=(?=(?:-\X{8}){2}P)\PM{74}(?=\X{8}-(?5)p))\g{-1} ) # en passant
|
(?: # white pawn
- (?=(?13))(?=(?3){8}((?3){9})?P(?4))(?(-1)(?=(?9){4}\n)) | # move 1 or 2 spaces forward
[a-z](?=(?13))(?=(?3){7}(?2)? P(?4)) | # capture diagonally
(?=\X{73}--(?9){5}\n)(?|p(?=(P))|Pp())(?<=(?=p(?:\X{8}-){2})\PM{82}(?=-(?5)P)\PM{10})\g{-1} ) # en passant
) |
# Castling - must be tried before rook moves unless we give up atomicity
# Leaves the rightmost changed square unconsumed, so that "only allow matching one of these per turn" below can't silently consume an illegal move
(?6)([KkRr])(?10)
(?!
(?:--)?-
# Verify that we aren't castling through check
# There's no need to check for kings, because they can't threaten check at this spot without our king already being in check
()? # decide between bishops/queens or rooks/queens
()? # for bishops/queens, decide between one diagonal or the other
(?(1)
(?|\X{7}(?2)?P # pawns (capture diagonally)
|(\X{6}|\X{10}|\X{16}(?2)?)N # knights
|((?(-1)-)\X{7}(?(-2)(?2)))+[BQ] # bishops or queens
|((?(-1)-)\X{8} )+[RQ]) # rooks or queens (only look downward, because left/right are impossible in this context)
| (?<=p\PM{8}|p\PM{10} # pawns (capture diagonally)
|(?=(?:|(?2)|\X{8}|\X{12})n)\PM{20} # knights
|((?<= # bishops/queens or rooks/queens
(?!.*\n\n)
(?=
(?(-4)
\X # rooks or queens (only look upward, because left/right are impossible in this context)
|
(?(-3)(?2)) # bishops or queens
)
(?:(?(-4)[rq]|[bq])|-(?-1))
)
\PM{11}
)))
)
)
(?=(?5)-(-[Kk]|[Rr])\g{-6}-)--(?=\g{-1}(?10))-?\14 |
# bishops, rooks, queens, knights, or kings
(?<e>(?7).)? # decide between scanning forward (<e> is unset) or backwards (<e> is captured)
(?=
(?i:
(?|
(?(e)|(B|Q)) (?&B) (?(e)(B|Q)) | # bishops or queens
(?(e)|(R|Q)) (?&R) (?(e)(R|Q)) | # rooks or queens
(?(e)|(N )) (?&N) (?(e)(N )) | # knights
(?(e)|(K )) (?&K)? (?(e)(K )) # kings
)
)
(?(e)(?<=(?!(?7)).)(?4)|(?7).(?5)\g{-2}) # verify that the piece moved, and optionally captured piece, are of the correct color
)
(?(e)(?=(?5)\g{-1})|(?6).(?4)) # verify that the piece moved is the same type and color at its destination in the next turn's board position
)(?<m>) |
(?(+1)$)(.) # handle the destination/source square that a piece moved to/from (only allow matching one of these per turn)
)+\n
)+
\k<m> # assert that a move has taken place
(?=\n(?14)) # don't allow moving into check
\1? # if white moved last, black moves next, and vice-versa
"""
test_str = ("rnbqkbnr\n"
"pppppppp\n"
"--------\n"
"--------\n"
"--------\n"
"--------\n"
"PPPPPPPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pppppppp\n"
"--------\n"
"--------\n"
"----P---\n"
"--------\n"
"PPPP-PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pppp-ppp\n"
"----p---\n"
"--------\n"
"----P---\n"
"--------\n"
"PPPP-PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pppp-ppp\n"
"----p---\n"
"--------\n"
"---PP---\n"
"--------\n"
"PPP--PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"ppp--ppp\n"
"----p---\n"
"---p----\n"
"---PP---\n"
"--------\n"
"PPP--PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"ppp--ppp\n"
"----p---\n"
"---pP---\n"
"---P----\n"
"--------\n"
"PPP--PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pp---ppp\n"
"----p---\n"
"--ppP---\n"
"---P----\n"
"--------\n"
"PPP--PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pp---ppp\n"
"----p---\n"
"--ppP---\n"
"---P----\n"
"--P-----\n"
"PP---PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pp---ppp\n"
"----p---\n"
"---pP---\n"
"---p----\n"
"--P-----\n"
"PP---PPP\n"
"RNBQKBNR\n\n"
"rnbqkbnr\n"
"pp---ppp\n"
"----p---\n"
"---pP---\n"
"---P----\n"
"--------\n"
"PP---PPP\n"
"RNBQKBNR\n\n"
"rnbqk-nr\n"
"pp---ppp\n"
"----p---\n"
"---pP---\n"
"-b-P----\n"
"--------\n"
"PP---PPP\n"
"RNBQKBNR\n\n"
"rnbqk-nr\n"
"pp---ppp\n"
"----p---\n"
"---pP---\n"
"-b-P----\n"
"--N-----\n"
"PP---PPP\n"
"R-BQKBNR\n\n"
"r-bqk-nr\n"
"pp---ppp\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--N-----\n"
"PP---PPP\n"
"R-BQKBNR\n\n"
"r-bqk-nr\n"
"pp---ppp\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--N--N--\n"
"PP---PPP\n"
"R-BQKB-R\n\n"
"r-bqk--r\n"
"pp--nppp\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--N--N--\n"
"PP---PPP\n"
"R-BQKB-R\n\n"
"r-bqk--r\n"
"pp--nppp\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--NB-N--\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-rk-\n"
"pp--nppp\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--NB-N--\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-rk-\n"
"pp--nppB\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--N--N--\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--nppk\n"
"--n-p---\n"
"---pP---\n"
"-b-P----\n"
"--N--N--\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--nppk\n"
"--n-p---\n"
"---pP-N-\n"
"-b-P----\n"
"--N-----\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--npp-\n"
"--n-p-k-\n"
"---pP-N-\n"
"-b-P----\n"
"--N-----\n"
"PP---PPP\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--npp-\n"
"--n-p-k-\n"
"---pP-N-\n"
"-b-P---P\n"
"--N-----\n"
"PP---PP-\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--npp-\n"
"----p-k-\n"
"---pP-N-\n"
"-b-n---P\n"
"--N-----\n"
"PP---PP-\n"
"R-BQK--R\n\n"
"r-bq-r--\n"
"pp--npp-\n"
"----p-k-\n"
"---pP-N-\n"
"-b-n--QP\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n-p-\n"
"----p-k-\n"
"---pPpN-\n"
"-b-n--QP\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n-p-\n"
"----p-k-\n"
"---pPpNP\n"
"-b-n--Q-\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n-p-\n"
"----p--k\n"
"---pPpNP\n"
"-b-n--Q-\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n-p-\n"
"----N--k\n"
"---pPp-P\n"
"-b-n--Q-\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n---\n"
"----N--k\n"
"---pPppP\n"
"-b-n--Q-\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"r-bq-r--\n"
"pp--n---\n"
"----N-Pk\n"
"---pPp--\n"
"-b-n--Q-\n"
"--N-----\n"
"PP---PP-\n"
"R-B-K--R\n\n"
"WIN BY CHECKMATE")
matches = re.finditer(regex, test_str, re.VERBOSE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html