# -*- coding: utf-8 -*- """ Created on Wed Dec 14 11:41:52 2022 @author: user """ import pandas as pd # data as a list of lists data=[['David',23,1.92,95,'male'],['Sam',22,1.76,80,'male'],\ ['Gabriele',23,1.76,67,'male'],['Victoria',52,1.70,61,'female']] # create the pandas DataFrame from the variable data df=pd.DataFrame(data=data) # specify column names # the default index names is 0, 1, 2, 3 df.columns=['name','age','height','weight','gender'] # ways to reference columns print(df['age']) print(df.name) print('The max age is',max(df.age)) # reference first row print(df[0:1]) # reference cells print(df['age'][1]) # using loc and the index,column names print(df.loc[1,'age']) # only row with index 0 print(df.loc[0]) # print rows with index 0,1,2 and colums from age to weight print(df.loc[0:2,'age':'weight']) # descriptive statistics stats=df.describe() # find median i.e. 50% percentile print(stats.loc['50%']) df.loc[:,'age':'weight'].median() # filtering df.loc[df.age>50,'name'] # average age of students with age <50 print('Average is',df.loc[df.age<50,'age'].mean()) #ploting histogram df.age.plot.hist() # distinct counts print(df['age'].value_counts()) #pivoting df.pivot_table('age',index='gender',aggfunc='sum') # file location in D:\python\vgsales.csv vgames=pd.read_csv('D:\\python\\vgsales.csv') test=vgames.pivot_table('Global_Sales', index='Year', columns='Genre', aggfunc='sum',margins='all')