Pandas
data:image/s3,"s3://crabby-images/c27c0/c27c0062db1c9b63a7396163e2d9c9f12d7d2cba" alt="Cross Tabulation in Pandas Cross Tabulation in Pandas"
Cross Tabulation in Pandas
The data
Importing the Library and loading the data
import pandas as pd
path = 'path to data'
df = pd.read_csv(path)
df.head()
data:image/s3,"s3://crabby-images/422ab/422ab7bea4b6faca3d9bd1d7c5d6d7f5ebf948b7" alt=""
df.columns
data:image/s3,"s3://crabby-images/e19b4/e19b4c5decf652a4032c73c04fafbb7bfdf3c604" alt=""
Crosstab Basic
#Single index and single column
pd.crosstab(index=df.Country,columns=df.Gender)
data:image/s3,"s3://crabby-images/57116/57116b7ada23a995cea1bd9815dee4fc439cb1d7" alt=""
#Different ways of selecting index and columns
pd.crosstab(index=df['Country'],columns=df['Gender'])
pd.crosstab(df['Country'],df['Gender'])
pd.crosstab(df.Country,df.Gender)
#Multiple index single column
pd.crosstab([df.Country,df.City],df.Gender)
data:image/s3,"s3://crabby-images/3e646/3e6464df4fbe7000fbf4febbb01339e8ced46d94" alt=""
#Single index multiple columns
pd.crosstab([df.Country],[df.Gender,df.Cheese])
data:image/s3,"s3://crabby-images/e645c/e645c6cfd1b8d1fe68f4d7dd6df2c40baa1dbf71" alt=""
#Multipe index multiple columns
pd.crosstab([df.Country,df.City],[df.Gender,df.Cheese])
data:image/s3,"s3://crabby-images/71486/71486157791d7457fc2d9e65fa682b93bb4db1d6" alt=""
Selection with .loc and .iloc
pd.crosstab(df.Country,df.Gender).loc['Germany']
data:image/s3,"s3://crabby-images/ac86a/ac86a0461a6a663dba239ff54f4e0edb63dafd94" alt=""
pd.crosstab(df.Country,df.Gender).loc[['Germany']]
data:image/s3,"s3://crabby-images/1c7e5/1c7e5db6e2f965b21da7ffcbf29bd6e71d8c9fb6" alt=""
pd.crosstab([df.Country,df.City],df.Gender).loc['Germany']
data:image/s3,"s3://crabby-images/438b3/438b3cbc162f24f5f3401df2aed30faeeb6a8830" alt=""
pd.crosstab([df.Country,df.City],df.Gender).loc['Germany','Berlin']
data:image/s3,"s3://crabby-images/fc444/fc4443ff1ccb74fb37b8d4c7ae5a6a67d4d42e05" alt=""
pd.crosstab([df.Country],df.Gender).loc[['Germany','Somalia']]
data:image/s3,"s3://crabby-images/52a23/52a23bd536f4fb02304fc35dcf5c8e60e0c63337" alt=""
pd.crosstab([df.Country,df.City],[df.Gender,df.Cheese]).loc[['Germany','Peru'],['F']]
data:image/s3,"s3://crabby-images/c259a/c259a965955ef9e4864f39d94d566546beb15cdd" alt=""
pd.crosstab([df.Country,df.City],df.Gender).loc[[('Germany','Berlin'),('Afganisthan','Kabul')]]
data:image/s3,"s3://crabby-images/254d8/254d8d87d55a9cc171ab0bf0a0fa99368fcbc285" alt=""
tuple1 = ('Germany','Berlin')
tuple2 = ('Peru','Lima')
tuple3 = ('Afganisthan','Kabul')
tuple4 = ('Afganisthan','Kandhar')
list_of_selection = [tuple1,tuple2,tuple3,tuple3]
pd.crosstab([df.Country,df.City],df.Gender).loc[list_of_selection]
data:image/s3,"s3://crabby-images/deca8/deca8ba1e5598cf6031b85966bab8983fa7c072e" alt=""
pd.crosstab([df.Country,df.City],df.Gender).loc[[tuple1,tuple2,tuple3]]
data:image/s3,"s3://crabby-images/a73ab/a73ab38faceeef31b0669ad47c5abff35e098519" alt=""
pd.crosstab(df.Country,df.Gender).iloc[0]
data:image/s3,"s3://crabby-images/8a70d/8a70d9a4c12101133e27cf761f4394d0d7d192c8" alt=""
pd.crosstab([df.Country,df.City],df.Gender).iloc[0]
data:image/s3,"s3://crabby-images/bfc3a/bfc3a2d493222706d4b892d3a2bc58cd0a551dcd" alt=""
Using basic function with cross tab
pd.crosstab([df.City],[df.Gender]).loc[['Kabul','Lima','Marka']]
data:image/s3,"s3://crabby-images/4c8b1/4c8b1fcadfa953820fb8ea938b31eb47cd5fbe96" alt=""
pd.crosstab([df.City],[df.Gender]).loc[['Kabul','Lima','Marka']].max()
data:image/s3,"s3://crabby-images/0b59a/0b59a6eec84ea77f0a2f6f173179e0bb532a9e15" alt=""
Margins are for total
pd.crosstab([df.Country,df.City],df.Gender,margins=True)
data:image/s3,"s3://crabby-images/11274/11274d0f3619738a157b7203b0f2d55aa9b80017" alt=""
#Renaming the margin
pd.crosstab([df.Country,df.City],df.Gender,margins=True,margins_name="Total")
data:image/s3,"s3://crabby-images/aaa18/aaa18143d0d268f35483fa82e2a0e16d92d4d1b7" alt=""
Normalizing Results
pd.crosstab(index=df.Country,columns=df.Gender,normalize=True)
data:image/s3,"s3://crabby-images/47e3b/47e3b360c0c3bb6ea9218b7fa64c2878de24331d" alt=""
#Percentage calculation
pd.crosstab(index=df.Country,columns=df.Gender,normalize=True)*100
data:image/s3,"s3://crabby-images/dea63/dea63109fbf9221eaa8b20888b7524008c5b9a8b" alt=""
#Normalize by index
pd.crosstab(index=df.Country,columns=df.Gender,normalize='index')
data:image/s3,"s3://crabby-images/9f19d/9f19d599f937289f6814d9c1302aec2ad205cd39" alt=""
#Normalize by index : the case of multiple indices
pd.crosstab([df.Country,df.City],[df.Gender],normalize='index')
data:image/s3,"s3://crabby-images/5c869/5c869f98421aaad9b2e05b03a1b434f875a18056" alt=""
#Normalize by columns
pd.crosstab([df.Country,df.City],[df.Gender],normalize='columns')
data:image/s3,"s3://crabby-images/f4791/f479110b11adf7da2720af5a2c174dd0844856e4" alt=""
Aggregate function
pd.crosstab([df.Country],[df.Gender],values=df.ELO,aggfunc='sum')
data:image/s3,"s3://crabby-images/33436/334364bbe036ed3216a6c143c5d328bd3605f26b" alt=""
Sorting
pd.crosstab(df.Country.sort_values(ascending=True), df.ELO)
data:image/s3,"s3://crabby-images/78b22/78b22985914d55b709f8c5afe24e08611ea6dae0" alt=""
Unique values
pd.crosstab(df.Country,df.Movie).nunique(axis=1)
data:image/s3,"s3://crabby-images/6dfd0/6dfd0bfd9142f8a4257bd05cf6186fbe2104b60f" alt=""
Visualization
(pd.crosstab([df.Country],[df.Gender])).style.background_gradient(cmap='viridis',axis=None)
data:image/s3,"s3://crabby-images/71bca/71bcabe28f4c03d3353b8616968f3fade99363c2" alt=""
import seaborn as sns
sns.heatmap(pd.crosstab([df.Country], [df.Cheese]),
cmap="YlGnBu", annot=True, cbar=False)
data:image/s3,"s3://crabby-images/e09c2/e09c2ea3afa1d198f4b1c6366a11a61c9c9d94ed" alt=""
sns.set(rc = {'figure.figsize':(8,8)})
sns.heatmap(pd.crosstab([df.Country], [df.Language],values=df.Income,aggfunc='sum'),
cmap="YlGnBu", annot=True, cbar=False)
data:image/s3,"s3://crabby-images/df647/df647a6cc49786778e959c644f310d5bbff2fc59" alt=""
Plotting
df1 = pd.crosstab([df.Country],df.Gender)
df1.plot(kind='bar',figsize=(10,5))
data:image/s3,"s3://crabby-images/c255d/c255db2667dfb61230e9c60fd0ecc28239978bfc" alt=""
df1 = pd.crosstab([df.Country,df.City],df.Gender)
df1.plot(kind='bar',figsize=(10,5))
data:image/s3,"s3://crabby-images/8db19/8db19eedc377ea67d4b8efc019591929e9bca328" alt=""
df1 = pd.crosstab([df.Country,df.City],df.Gender)
df1.plot(kind='bar',figsize=(10,5),stacked=True)
data:image/s3,"s3://crabby-images/3abf9/3abf97e85f42a997675859b9f152b6d6955c9dee" alt=""
df1 = pd.crosstab([df.Country,df.City],[df.Gender,df.Language])
df1.plot(kind='bar',figsize=(10,5),stacked=True)
data:image/s3,"s3://crabby-images/e9bad/e9badc73246654b9f95f320b93d60577ee6f05a4" alt=""
pontu
0
Tags :