– Albert Einstein
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_profiling
import plotly.graph_objects as go
sns.set_style('darkgrid')
chess=pd.read_csv('chessgames.csv')
chess.head(2)
chess_corr=chess[['turns','white_rating','black_rating','opening_ply','mean_rating','rating_diff','winner','victory_status']]
sns.pairplot(chess_corr,hue='victory_status',kind='scatter',palette='Set2')
plt.show()
chess.describe()
chess = chess[chess.rated] # only rated games
chess['mean_rating'] = (chess.white_rating + chess.black_rating) / 2
chess['rating_diff'] = abs(chess.white_rating - chess.black_rating)
chess.info()
plt.figure(1,figsize=(12,6))
sns.countplot(x='rated',data=chess)
plt.show()
plt.figure(1,figsize=(12,6))
sns.kdeplot(chess.mean_rating,shade=True,color='orange')
plt.xlabel('mean rating')
plt.title('distribution of ratings in games played on lichess')
plt.show()
winner=chess.winner.value_counts()
fig = go.Figure(data=[go.Pie(labels=winner.index, values=winner,hole=0.2)])
fig.show()
victory_status=chess[chess.mean_rating<1500].victory_status.value_counts()
victory_status1=chess[(chess.mean_rating>1500)&(chess.mean_rating<2000)].victory_status.value_counts()
victory_status2=chess[chess.mean_rating>2000].victory_status.value_counts()
colors=['darkorange','yellow','skyblue','cyan']
explode = (0, 0.1, 0,0)
plt.figure(1,figsize=(20,8))
plt.subplot(131)
plt.pie(victory_status, explode=explode,colors=colors,labels=victory_status.index,autopct='%1.1f%%',
shadow=True,startangle=150)
plt.axis('equal')
plt.title('Rating Under 1500')
plt.subplot(132)
plt.pie(victory_status1, explode=explode,colors=colors,labels=victory_status1.index,autopct='%1.1f%%',
shadow=True,startangle=150)
plt.axis('equal')
plt.title('Rating between 1500 - 2000')
plt.subplot(133)
plt.pie(victory_status2, explode=explode,colors=colors,labels=victory_status2.index,autopct='%1.2f%%',
shadow=True,startangle=120)
plt.axis('equal')
plt.title('Rating above 2000')
plt.show()
plt.figure(1,figsize=(12,6))
sns.countplot(x='winner',data=chess)
plt.figure(1,figsize=(18,8))
sns.countplot(x='rated',data=chess,hue='victory_status')
plt.figure(1,figsize=(18,8))
sns.countplot(x='winner',data=chess,hue='victory_status')
plt.figure(1,figsize=(20,8))
plt.subplot(121)
sns.countplot(x='winner',data=chess[(chess.turns<15)&(chess.rated==True)],hue='victory_status')
plt.title('Rated players')
plt.subplot(122)
sns.countplot(x='winner',data=chess[(chess.turns<15)&(chess.rated==False)],hue='victory_status')
plt.title('Unrated players')
plt.show()
plt.figure(1,figsize=(20,8))
plt.subplot(121)
sns.countplot(x='winner',data=chess[(chess.turns>150)&(chess.rated==True)],hue='victory_status')
plt.title('Rated players')
plt.subplot(122)
sns.countplot(x='winner',data=chess[(chess.turns>150)&(chess.rated==False)],hue='victory_status')
plt.title('Unrated players')
plt.show()
plt.figure(1,figsize=(20,8))
plt.subplot(121)
sns.countplot(x='winner',data=chess[(chess.turns>200)&(chess.rated==True)],hue='victory_status')
plt.title('Rated players')
plt.subplot(122)
sns.countplot(x='winner',data=chess[(chess.turns>200)&(chess.rated==False)],hue='victory_status')
plt.title('Unrated players')
plt.show()
chess.head(2)
dfwhite=chess[chess.winner=='white']
dfblack=chess[chess.winner=='black']
dfdraw=chess[chess.winner=='draw']
plt.figure(1,figsize=(18,8))
plt.subplot(141)
sns.boxplot(x='winner',y='white_rating',data=dfwhite,color='red')
plt.ylim(750,2800)
plt.subplot(142)
sns.boxplot(x='winner',y='black_rating',data=dfblack,color='green')
plt.ylim(750,2800)
plt.subplot(143)
sns.boxplot(x='winner',y='white_rating',data=dfdraw,color='orange')
plt.ylim(750,2800)
plt.subplot(144)
sns.boxplot(x='winner',y='black_rating',data=dfdraw,color='violet')
plt.ylim(750,2800)
plt.show()
plt.figure(1,figsize=(18,6))
plt.subplot(121)
sns.barplot(x='winner',y='turns',data=chess,estimator=np.mean)
#plt.ylim(55,100)
plt.title("avrage games lasted about 62 moves")
plt.subplot(122)
sns.barplot(x='winner',y='turns',data=chess,estimator=np.std)
plt.ylim(28,49)
plt.title("standard deviation of black,white")
plt.show()
chess.turns.mean()
chess[(chess.winner=='white')|(chess.winner=='black')].turns.mean()
king=chess.groupby('winner')
king.turns.std()
plt.figure(1,figsize=(18,8))
plt.subplot(131)
sns.boxplot(x='winner',y='turns',data=dfwhite,color='red')
plt.ylim(0,380)
plt.subplot(132)
sns.boxplot(x='winner',y='turns',data=dfblack,color='green')
plt.ylim(0,380)
plt.subplot(133)
sns.boxplot(x='winner',y='turns',data=dfdraw,color='orange')
plt.ylim(0,380)
plt.show()
plt.figure(1,figsize=(18,8))
plt.subplot(141)
sns.violinplot(x='winner',y='white_rating',data=dfwhite,color='red')
plt.ylim(750,2800)
plt.subplot(142)
sns.violinplot(x='winner',y='black_rating',data=dfblack,color='green')
plt.ylim(750,2800)
plt.subplot(143)
sns.violinplot(x='winner',y='white_rating',data=dfdraw,color='orange')
plt.ylim(750,2800)
plt.subplot(144)
sns.violinplot(x='winner',y='black_rating',data=dfdraw,color='violet')
plt.ylim(750,2800)
plt.show()
plt.figure(1,figsize=(18,8))
plt.subplot(131)
sns.violinplot(x='winner',y='turns',data=dfwhite,color='red')
plt.ylim(0,380)
plt.subplot(132)
sns.violinplot(x='winner',y='turns',data=dfblack,color='green')
plt.ylim(0,380)
plt.subplot(133)
sns.violinplot(x='winner',y='turns',data=dfdraw,color='orange')
plt.ylim(0,380)
plt.show()
fig1 = px.density_heatmap(chess[(chess.winner=='white')|(chess.winner=='black')], x="white_rating", y="black_rating", marginal_x="violin", marginal_y="histogram",
title='Density heatmap of White and Black rating where Game ended in a Win')
fig1.show()
#plt.figure(1,figsize=(10,8))
sns.jointplot(x='white_rating',y='black_rating',height=8,data=chess[(chess.winner=='white')|(chess.winner=='black')],kind='kde')
plt.show()
chess[(chess.victory_status=='mate')&(chess.turns==4)&((chess.winner=='white')|(chess.winner=='black'))]
white_upsets = chess[(chess.winner == 'white') & (chess.white_rating < chess.black_rating)]
black_upsets = chess[(chess.winner == 'black') & (chess.black_rating < chess.white_rating)]
upsets = pd.concat([white_upsets, black_upsets])
End = 900
Start = 50
u_percentages = []
print(f'Ratings difference : Percentage of wins by weak players')
for i in range(0+Start, End, Start):
th_upsets = upsets[upsets.rating_diff > i]
th_chess = chess[chess.rating_diff > i]
upsets_percentage = (th_upsets.shape[0] / th_chess.shape[0]) * 100
u_percentages.append([i, upsets_percentage])
print(f'{str(i).ljust(18)}: {upsets_percentage:.2f}%')
line_dict={}
Up=[]
rat=[]
for i in u_percentages:
Up.append(i[1])
line_dict[i[0]]=i[1]
rat.append(i[0])
plt.figure(figsize=(10,6))
sns.lineplot(rat,Up,color='red')
plt.fill_between(rat,Up,color='orange')
plt.xlabel('Rating difference')
plt.ylabel('Upsets percentage')
plt.title('Plot of Upsets percentage vs rating diffrence')
plt.show()
import re
p = re.compile('([a-h][1-8])')
squares = {}
for moves in chess.moves:
for move in moves.split():
try:
square = re.search(p, move).group()
except AttributeError: # castling
square = move.replace('+', '')
squares[square] = squares.get(square, 0) + 1
squares_df = pd.DataFrame.from_dict(squares, orient='index', columns=['count'])
# add castling
total_shorts = int(squares_df.loc['O-O'])
total_longs = int(squares_df.loc['O-O-O'])
half_shorts = total_shorts//2
half_longs = total_longs//2
# white short castling
squares_df.loc['f1'] = squares_df.loc['f1'] + half_shorts
squares_df.loc['g1'] = squares_df.loc['g1'] + half_shorts
# black short castling
squares_df.loc['f8'] = squares_df.loc['f8'] + half_shorts
squares_df.loc['g8'] = squares_df.loc['g8'] + half_shorts
# white long castling
squares_df.loc['c1'] = squares_df.loc['c1'] + half_longs
squares_df.loc['d1'] = squares_df.loc['d1'] + half_longs
# black long castling
squares_df.loc['c8'] = squares_df.loc['c8'] + half_longs
squares_df.loc['d8'] = squares_df.loc['d8'] + half_longs
squares_df.drop(['O-O', 'O-O-O'], inplace=True)
total_castles = total_shorts + total_longs
print(f'Short: {(total_shorts/total_castles)*100:.2f}%')
print(f'Long: {(total_longs/total_castles)*100:.2f}%')
labels=['Short Castle', 'Long Castle']
values=[86.15,13.84]
plt.figure(1,figsize=(15,8))
explode=(0,0.1)
colors=('orange','skyblue')
plt.pie(values, explode=explode,labels=labels,autopct='%1.1f%%',
shadow=True,startangle=150,colors=colors)
plt.axis('equal')
plt.title('Short castling vs Long castling')
plt.show()
squares_df = squares_df.pivot('number', 'letter', 'count')
squares_df.sort_index(level=0, ascending=False, inplace=True) # to get right chessboard orientation
squares_df
plt.figure(1,figsize=(18,10))
sns.heatmap(squares_df,cmap="YlGnBu",annot=True,linewidth=0.8,cbar_kws={'label':'Square Occupation'})
plt.title("Heatmap of chess games where the majority action takes place")
plt.show()
If you control the center you will generally also be able to exert more control over the rest of the board.
Importance of above strategy can be proved by the above occupancy heatmap of chess board.
chess_corr.corr()
plt.figure(1,figsize=(10,8))
chess_corr=chess[['turns','white_rating','black_rating','opening_ply','mean_rating','rating_diff']]
sns.heatmap(chess_corr.corr(),cmap="YlGnBu",annot=True,linewidth=0.8)
plt.show()
opening=list(chess.opening_name.unique())
len(opeing)
opening_eco=list(chess.opening_eco.unique())
len(opening_eco)
chess.opening_eco.mode()
chess.opening_name.mode()
chess[chess.winner=='black'].opening_eco.mode()
chess[chess.winner=='white'].opening_eco.mode()
chess[chess.winner=='white'].opening_name.mode()
- e4 d5
- In the Scandinavian Defense, Black meets 1.e4 by immediately putting the question to the e4 pawn, attacking it with 1..d5.
- This opening often leads to tricky, scrappy play by Black. #### Pros:
chess[chess.winner=='black'].opening_name.mode()
print(chess[chess.rated==True].opening_eco.mode())
chess[chess.rated==False].opening_eco.mode()
plt.figure(figsize=(16,8))
plot = sns.countplot(y ="opening_name",data=chess,order=chess['opening_name'].value_counts().iloc[:10].index, palette = "Set1")
plt.xlim(175,380)
plt.figure(figsize=(16,8))
plot = sns.countplot(y ="opening_name",data=chess[chess.rated==True],order=chess['opening_name'].value_counts().iloc[:10].index, palette = "Set3")
plt.title('rated')
plt.xlim(155,310)
The Sicilian is the most popular and best-scoring response to White's first move 1.e4.
1.d4 is a statistically more successful opening for White due to the high success rate of the Sicilian defence against 1.e4.
Grandmaster John Nunn attributes the Sicilian Defence's popularity to its combative nature.
Over 75% of games beginning with 1.e4 c5 continue with 2.Nf3.
plt.figure(figsize=(16,8))
plot = sns.countplot(y ="opening_name",data=chess[chess.rated==False],order=chess['opening_name'].value_counts().iloc[:10].index, palette = "Set2")
plt.title('unrated')
plt.xlim(15,80)
Sicilian Defense
The Sicilian is one of the major answers to 1.e4. Black takes control of the d4 square with a pawn from the side - thus he imbalances the position and avoids giving White a central target.
plt.figure(figsize=(16,8))
plot = sns.countplot(y ="opening_name",data=chess[chess.winner=='white'],order=chess['opening_name'].value_counts().iloc[:10].index, palette = "Set2")
plt.title('winner white')
plt.xlim(75,170)
plt.figure(figsize=(16,8))
plot = sns.countplot(y ="opening_name",data=chess[chess.winner=='black'],order=chess['opening_name'].value_counts().iloc[:10].index, palette = "Set2")
plt.title('winner black')
plt.xlim(75,230)
plt.figure(figsize=(16,8))
sns.set_style('darkgrid')
plot = sns.countplot(y ="opening_eco",data=chess,order=chess['opening_eco'].value_counts().iloc[:10].index)
plt.xlim(500,1020)
plt.title('most common opening eco')
plt.figure(figsize=(16,8))
sns.set_style('darkgrid')
plot = sns.countplot(y ="opening_eco",data=chess[chess.winner=='white'],order=chess['opening_eco'].value_counts().iloc[:10].index)
plt.xlim(200,450)
plt.title('most common opening eco where winner is white')
- e4 e6 The French Defense meets 1.e4 with 1...e6, preparing to counter the e4 pawn with 2...d5. Black blocks in their light-squared bishop, but gains a solid pawn chain and counter-attacking possibilities. The French Defense is named after a 1834 correspondence game between the cities of London and Paris, in which the French defense was utilized.
Famous Practitioners: Viktor Korchnoi, Aron Nimzowitsch