#S5600-6600_SummStat_Graphs.r
#R Code to compute summary statistics and plot graphs

#R Example Code: summary statistics and graphs
# Reading the data into a temporary file called "my.datafile":

my.datafile <- tempfile()
cat(file=my.datafile, "  
Brocas    
Anomic    
Anomic    
Conduction
Brocas    
Conduction
Conduction
Anomic    
Conduction
Anomic    
Conduction
Brocas    
Anomic    
Brocas    
Anomic    
Anomic    
Anomic    
Conduction
Brocas    
Anomic    
Conduction
Anomic    
", sep=" ")

options(scipen=999) # suppressing scientific notation

# Name the data set and give the variable (column) a name:
aphas <- read.table(my.datafile, header=FALSE, col.names=c("type"))

# Alternatively, could type:
# aphas <- read.table("C:/Users/ezc0066/Documents/AURelated/AUAcademic/STAT5600_6600-F2021/STAT5600_6600-RCode/aphasia.txt", header=FALSE, col.names=c("type"))

attach(aphas)  # attaching the data frame

# Simple frequency counts:
table(type)

# making a pie chart for the aphasia data:
pie(table(type))

# Making a bar graph for the aphasia data:
barplot(table(type))

###################################################################################

# Reading the data into a temporary file called "my.datafile":
my.datafile <- tempfile()
cat(file=my.datafile, "  
 36.3
 41
 36.9
 37.1
 44.9
 36.8
 30
 37.2
 42.1
 36.7
 32.7
 37.3
 41.2
 36.6
 32.9
 36.5
 33.2
 37.4
 37.5
 33.6
 40.5
 36.5
 37.6
 33.9
 40.2
 36.4
 37.7
 37.7
 40
 34.2
 36.2
 37.9
 36
 37.9
 35.9
 38.2
 38.3
 35.7
 35.6
 35.1
 38.5
 39
 35.5
 34.8
 38.6
 39.4
 35.3
 34.4
 38.8
 39.7
 36.3
 36.8
 32.5
 36.4
 40.5
 36.6
 36.1
 38.2
 38.4
 39.3
 41
 31.8
 37.3
 33.1
 37
 37.6
 37
 38.7
 39
 35.8
 37
 37.2
 40.7
 37.4
 37.1
 37.8
 35.9
 35.6
 36.7
 34.5
 37.1
 40.3
 36.7
 37
 33.9
 40.1
 38
 35.2
 34.8
 39.5
 39.9
 36.9
 32.9
 33.8
 39.8
 34
 36.8
 35
 38.1
 36.9
", sep=" ")

options(scipen=999) # suppressing scientific notation

# Name the data set and give the variable (column) a name:
gasdata <- read.table(my.datafile, header=FALSE, col.names=c("mileage"))

# Alternatively, could type:
# gasdata <- read.table("C:/Users/ezc0066/Documents/AURelated/AUAcademic/STAT5600_6600-F2021/STAT5600_6600-RCode/gasdata.txt", header=FALSE, col.names=c("mileage"))

attach(gasdata)  # attaching the data frame

# Making a stem and leaf plot for the gas mileage data:
stem(mileage)

# Making a histogram for the gas mileage data:
hist(mileage)

# Making a boxplot for the gas data:
boxplot(mileage)

# Mean, median, standard deviation, variance, and interquartile range for the gas mileage data:
mean(mileage)
median(mileage)
sd(mileage)
var(mileage)
IQR(mileage)

# The 5-number summary for the gas mileage data:
fivenum(mileage)

# the 'summary' command gives the 5-number summary, plus the sample mean:
summary(mileage)

# Making a normal Q-Q plot for the gas mileage data:
qqnorm(mileage)
abline(a=37.5, b=(44.9-30)/5, col = "steelblue", lwd = 2)

###################################################################################

# Reading the data into a temporary file called "my.datafile":

my.datafile <- tempfile()
cat(file=my.datafile, "  
7
7.2
7.8
7.8
11.5
13
14
14.6
15
15.2
16.2
17.2
17.4
20.7
22.9
24.7
25.9
29.1
30.2
30.2
30.6
30.8
31
31.4
31.7
32.5
33.4
34.4
35
35.1
35.9
35.9
36.1
36.2
36.2
37
37
37.6
38.7
38.8
38.9
39
39.9
40.2
40.2
40.8
40.8
41.8
42.5
42.5
42.6
42.7
42.8
43.1
43.4
44.7
45.5
46
46.4
48.2
48.3
48.5
49.1
49.2
54.5
54.7
56.8
59.2
59.8
67
", sep=" ")

options(scipen=999) # suppressing scientific notation

# Name the data set and give the variable (column) a name:
raindata <- read.table(my.datafile, header=FALSE, col.names=c("rainfall"))

# Alternatively, could type:
# raindata <- read.table("C:/Users/ezc0066/Documents/AURelated/AUAcademic/STAT5600_6600-F2021/STAT5600_6600-RCode/rainfall.txt", header=FALSE, col.names=c("rainfall"))

attach(raindata)  # attaching the data frame

# Making a stem and leaf plot for the rain data:
stem(rainfall)

# Making a histogram for the rain data:
hist(rainfall)

# Making a boxplot for the rain data:
boxplot(rainfall)

# Mean, median, standard deviation, variance, and interquartile range for the rain data:
mean(rainfall)
median(rainfall)
sd(rainfall)
var(rainfall)
IQR(rainfall)

# The 5-number summary for the rain data:
fivenum(rainfall)

# the 'summary' command gives the 5-number summary, plus the sample mean:
summary(rainfall)

# Making a normal Q-Q plot for the rain data:
qqnorm(rainfall)

################################################################################

# An example with an outlier:

# Here the data are listed all in one row, separated by spaces.
# The scan() function will allow R to read in such data:

emissions<-scan("")
#copy paste the below numbers in Console and also enter an empty line at the end
#3.3  4.2  5.6  5.6  5.7  5.7  6.2  6.3  7.0  7.6  8.0  8.1  8.3  8.6  8.7  9.4  9.7  9.9 10.3 10.3 10.4 11.3 12.7 13.1 24.5

summary(emissions)
print("sd:");sd(emissions)
print("IQR:");IQR(emissions)

boxplot(emissions)
# Note the boxplot:  Luxembourg with 24.5 metric tons is an outlier. 

# Let's delete Luxembourg:
emiss.no.Lux<-emissions[emissions!=24.5]

summary(emissions)
summary(emiss.no.Lux)

print("sd:");sd(emiss.no.Lux)
print("IQR:");IQR(emiss.no.Lux)
# Which measure of center (and which measure of spread?) was more affected by the outlier?
