###################################################################### # SAS tasks by python # 10-19-2021 ###################################################################### import pandas as pd """################################################################### filename fname url "http://www.auburn.edu/~zengpen/teaching/datasets/grades.csv"; proc import out = grades datafile = fname dbms = csv replace; run; ###################################################################""" filename = 'http://www.auburn.edu/~zengpen/teaching/datasets/grades.csv' grades = pd.read_csv(filename) """################################################################### proc sql; create table grades_updated as select *, count(name) as dept_num, mean(exam1) as dept_mean from grades group by department; quit; ###################################################################""" grades['dept_num'] = grades.groupby(['department'])['name'].transform('count') grades['dept_mean'] = grades.groupby(['department'])['exam1'].transform('mean') """################################################################### proc sql; create table grades_summary as select department, mean(exam1) as dept_mean, sum(exam2) as dept_sum, mean(exam1 / (exam1 + exam2)) as ratio from grades group by department; quit; ###################################################################""" a = grades.groupby(['department'], as_index = False). \ agg(dept_mean = ('exam1', 'mean'), dept_sum = ('exam2', 'sum')) b = grades.eval('ratio = exam1 / (exam1 + exam2)'). \ groupby(['department'], as_index = False).agg({'ratio': 'mean'}) a.merge(b, on = 'department') ###################################################################### # THE END ######################################################################