# -*- coding: utf-8 -*-
"""
Created on Wed Dec 14 11:41:52 2022

@author: user
"""
import pandas as pd

# data as a list of lists

data=[['David',23,1.92,95,'male'],['Sam',22,1.76,80,'male'],\
      ['Gabriele',23,1.76,67,'male'],['Victoria',52,1.70,61,'female']]
# create the pandas DataFrame from the variable data
df=pd.DataFrame(data=data)
# specify column names
# the default index names is 0, 1, 2, 3
df.columns=['name','age','height','weight','gender']

# ways to reference columns
print(df['age'])
print(df.name)
print('The max age is',max(df.age))
# reference first row
print(df[0:1])
# reference cells
print(df['age'][1])
# using loc and the index,column names
print(df.loc[1,'age'])
# only row with index 0
print(df.loc[0])
# print rows with index 0,1,2 and colums from age to weight
print(df.loc[0:2,'age':'weight'])

# descriptive statistics
stats=df.describe()
# find median i.e. 50% percentile 
print(stats.loc['50%'])
df.loc[:,'age':'weight'].median()

# filtering
df.loc[df.age>50,'name']
# average age of students with age <50
print('Average is',df.loc[df.age<50,'age'].mean())

#ploting histogram
df.age.plot.hist()
# distinct counts
print(df['age'].value_counts())

#pivoting
df.pivot_table('age',index='gender',aggfunc='sum')

# file location in D:\python\vgsales.csv
vgames=pd.read_csv('D:\\python\\vgsales.csv')

test=vgames.pivot_table('Global_Sales', index='Year', columns='Genre', aggfunc='sum',margins='all')