Pandas
data:image/s3,"s3://crabby-images/d449f/d449f3efc8d6a8d2ee03d912ba8239df12512d81" alt="Data Exploring And Analysis Data Exploring And Analysis"
Data Exploring And Analysis
Exploring and Analyzing a Series
import pandas as pd
import numpy as np
S1 = pd.Series([5,8,9,5,4,2])
print("Series mean Value : ",S1.mean())
print("Series max value : ",S1.max())
print("Series min value : ",S1.min())
print("Series standard deviation : ",S1.std())
data:image/s3,"s3://crabby-images/10ca2/10ca276f976701b4b95e23d7f2e9adef08a3b26f" alt=""
Operation on a Series
print(5 in S1)
print(256 in S1)
S2 = S1 <5
print(S2)
S3 = S1[S1<5]
print('\n',S3)
S4 = S1[S1<5]*10
print('\n',S4)
data:image/s3,"s3://crabby-images/46b74/46b743a75b6c7e51df42e2f6b0b92c818d25ddc4" alt=""
Exploring and Analyzing a Data Frame
#Creating a data fram with five attributes
data = {'Age':[30,32,35,63],
'Salary':[25,36,52,89],
'Height':[150,180,172,175],
'Weight':[98,85,75,82],
'Gender':['Male','Female','Female','Male']}
data = pd.DataFrame(data,index=['Adlof','Kate','Neha','Bir'])
data
data:image/s3,"s3://crabby-images/cb2d9/cb2d96e01a56763757a71e99c21c9cfb3aeea329" alt=""
#Adding another row (or DataFrame with single Row)
df2 = pd.DataFrame([[23,32,145,55,'Female']],
columns = ['Age','Salary','Height','Weight','Gender'],
index=['Mona'])
data = data.append(df2)
data
data:image/s3,"s3://crabby-images/7a215/7a215399f026976ff5690aea72cf84fddce76b96" alt=""
data.describe()
data:image/s3,"s3://crabby-images/9cfc9/9cfc951dfc9cd18c250bdb17efe465a6dd4e7c43" alt=""
data.describe(include='all')
data:image/s3,"s3://crabby-images/2dc64/2dc646cd104c63df9fecd7d8fa22ad1fd42cc2d1" alt=""
data.Salary.describe()
data:image/s3,"s3://crabby-images/ab085/ab0859fb6edcd0cb4ce49c579d80939b9877145c" alt=""
data['Salary'].describe()
data:image/s3,"s3://crabby-images/fdc3f/fdc3f842ac5af56ce3689b7f03bce2caf89549de" alt=""
#Analyzing only numerical patterns
data.describe(include=[np.number])
data:image/s3,"s3://crabby-images/43cb0/43cb0b0a6bbaad828d6d859a35bc7c2a88eeab85" alt=""
#Analyzing Strings Patterns Only
data.describe(include=[np.object])
data:image/s3,"s3://crabby-images/3010c/3010ce4bdc0b8b56fa615fec040e250eeaec6390" alt=""
data.describe(exclude=[np.number])
data:image/s3,"s3://crabby-images/3d264/3d2647fce1aed58df7b810874ff9509a592fa73e" alt=""
Optimal_Salary = data['Salary'] >= 35
Optimal_Salary
data:image/s3,"s3://crabby-images/7e7b1/7e7b12ade8394ef18af77f0790b8958bd01237a4" alt=""
#Correlation
data.corr()
data:image/s3,"s3://crabby-images/2ebdc/2ebdcb0985640aebfeb93e26a80b010077551a1a" alt=""
data.count()
data:image/s3,"s3://crabby-images/99159/99159badeb8cd81975f1e667a87a6468f608d8c6" alt=""
data.min()
data:image/s3,"s3://crabby-images/8ecd2/8ecd21832ef73439e3191602da860e5bb1b146f4" alt=""
Data Grouping
Number = [1,2,3,4,5,6,7,8,9,10]
Names = ['Ali','Aryan','Arya','Bipul','Vivek','Sahil',
'Surya','Shere','Sahid','Reema']
City = ['Dubai','Karanchi','Paris','Perth','Oslo',
'Lisbon','Berlin','Dubai','Oslo','Dubai']
Gender = ['Male','Male','Female','Male','Male',
'Male','Male','Male','Male','Female']
Height = [120,130,150,200,180,
175,178,172,168,150]
Weight = [85,95,62,54,15,
96,123,41,52,65]
dataset = pd.DataFrame({'Number':Number,'Names':Names,'City':City,
'Height':Height,'Weight':Weight,'Gender':Gender})
dataset.head()
data:image/s3,"s3://crabby-images/29821/298215da521308b30343c003e3b05c7a91f12536" alt=""
dataset.groupby('City').count()
data:image/s3,"s3://crabby-images/16bd5/16bd54a1f4e05806eec91fe6a63eab5d8a65fd80" alt=""
dataset.groupby(['City','Gender']).count()
data:image/s3,"s3://crabby-images/c42ab/c42abb21b3fb421bc6e6c4c6a30aeff7ae026d8b" alt=""
grouped = dataset.groupby('City')
print(grouped.get_group('Dubai'))
data:image/s3,"s3://crabby-images/fd13d/fd13d95e2954a3bb5186c253fef44cbceae41fae" alt=""
Data Aggregation
dataset
data:image/s3,"s3://crabby-images/957ed/957ede2acc34789a93ed7afb745e62372caf25cf" alt=""
dataset.set_index('Number')
data:image/s3,"s3://crabby-images/a43a9/a43a963320f453d743f6278e2349741c5fa8bfb5" alt=""
grouped = dataset.groupby('Gender')
print(grouped['Height'].agg(np.mean))
print("\n")
print(grouped['Weight'].agg([np.mean,np.sum,np.std]))
print("\n")
data:image/s3,"s3://crabby-images/b3db1/b3db15dc46dc08f71218a355a37c9b0ad4c75aa3" alt=""
#Transforming data
dataset = dataset.set_index('Number')
score = lambda x:(x-x.mean())/x.std()*10
print(grouped.transform(score))
data:image/s3,"s3://crabby-images/1a9cf/1a9cff9730a3ea24800b468e9f3acda582bd7b84" alt=""
Filtration
dataset.groupby('City').filter(lambda x :len(x)>=2)
data:image/s3,"s3://crabby-images/c1deb/c1debd8da82d0b328df7bbb7b1399df6481b0eba" alt=""
dataset.filter(['City','Height'])
data:image/s3,"s3://crabby-images/06cb6/06cb6d83601f983761bcc920db480892278a8360" alt=""
# Using regular expression to extract all
# columns which has letter 'a' or 'A' in its name.
dataset.filter(regex ='[aA]')
data:image/s3,"s3://crabby-images/49411/49411d3ada7192e226da7fe3753b4274f0d3ff71" alt=""
dataset[(dataset.Height > 150) & (dataset.Weight < 90)]
data:image/s3,"s3://crabby-images/5408a/5408a75621a3f3adb6338c9c556637501becfd89" alt=""
dataset[(dataset.Height > 150) | (dataset.Weight > 90)]
data:image/s3,"s3://crabby-images/121df/121df9ad92d4a112509a43a609120187423560e3" alt=""
dataset[dataset.City.str.startswith('K')]
data:image/s3,"s3://crabby-images/715e2/715e20d6301b095bdcf71caf75df3ca46eb03063" alt=""
dataset[dataset.City.str.contains('i')]
data:image/s3,"s3://crabby-images/836d9/836d9f2c3f68e2821b72edcd9570d3d6c041ff88" alt=""
dataset[dataset.Names.str.contains('i','a')]
data:image/s3,"s3://crabby-images/7c48c/7c48cd595756cc6b2f9977192a99334b20a59ad9" alt=""
dataset[~dataset.City.str.startswith('K')]
data:image/s3,"s3://crabby-images/68cd2/68cd2f4ee218deceb92a9d6297c0725d5f8bb87b" alt=""
dataset.query('City == "Dubai" and Height > 120')
data:image/s3,"s3://crabby-images/13adf/13adfe057bdb9fbd4ecd227d5313f51fc839ff15" alt=""
#Selecting row with 3 largest values in column Height
dataset.nlargest(3, 'Height')
data:image/s3,"s3://crabby-images/06886/068867b554499f6e050acbe81a0c1509b1d56180" alt=""
#Selecting row with 2 smallest values in column Height
dataset.nsmallest(2, 'Height')
data:image/s3,"s3://crabby-images/8d53c/8d53cf052a920a7629e87907104602d930388c95" alt=""
dataset.iloc[3:5, :] #rows 3 and 4, all columns
data:image/s3,"s3://crabby-images/7723f/7723f01e3ccee08eb41207e63d0c6720fd9ecc1e" alt=""
#rows 3 and 4, all columns
dataset.loc[3:5,:]
data:image/s3,"s3://crabby-images/0b84e/0b84e4521be17cede00261348567506cf1651ab6" alt=""
#Changing index of dataset to show difference between loc and iloc
dataset.index = ['a','b','c','d','e','f','g','h','i','j']
#rows 3 and 4, all columns
dataset.loc[3:5,:]
#This will generetae error
#rows 3 and 4, all columns
dataset.iloc[3:5,:]
data:image/s3,"s3://crabby-images/d5474/d54747d7bf9163cb60782162dcff47aad02c0749" alt=""
#rows 3 and 4, all columns
dataset.iloc['c':'e',:]
#This will generate error
#rows 3 and 4, all columns
dataset.loc['c':'e',:]
data:image/s3,"s3://crabby-images/d44d4/d44d4d23c2776e2314137d173c311f26064f4545" alt=""
ponru
0