import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import iqr
import statsmodels.api as sm
import io

#Python Code to compute summary statistics and plot graphs

#Python Example Code: summary statistics and graphs
# Reading the data into a temporary file called "data_content":

# Define the data
data_content = """
Brocas
Anomic
Anomic
Conduction
Brocas
Conduction
Conduction
Anomic
Conduction
Anomic
Conduction
Brocas
Anomic
Brocas
Anomic
Anomic
Anomic
Conduction
Brocas
Anomic
Conduction
Anomic
"""

# Using io.StringIO to convert the string data into a file-like object so it can be read into a pandas dataframe
data = io.StringIO(data_content)

# Now use pandas read_csv with the file-like object
aphas = pd.read_csv(data, names=['type'])

#print(aphas)

# Simple frequency counts:
type_counts = aphas['type'].value_counts()
print(type_counts)

# Making a pie chart for the aphasia data:
plt.figure(figsize=(7, 7))
type_counts.plot.pie(autopct='%1.1f%%')
plt.title("Pie Chart for Aphasia Data")
plt.ylabel("")  # To remove the label "type"
plt.show()

plt.clf()

# Making a bar graph for the aphasia data:
type_counts.plot(kind='bar', color=['red', 'blue', 'green'])
plt.title("Bar Graph for Aphasia Data")
plt.xlabel("Type")
plt.ylabel("Frequency")
plt.show()

plt.clf()

###################################################################################
# Define the data
data_content = """
 36.3
 41
 36.9
 37.1
 44.9
 36.8
 30
 37.2
 42.1
 36.7
 32.7
 37.3
 41.2
 36.6
 32.9
 36.5
 33.2
 37.4
 37.5
 33.6
 40.5
 36.5
 37.6
 33.9
 40.2
 36.4
 37.7
 37.7
 40
 34.2
 36.2
 37.9
 36
 37.9
 35.9
 38.2
 38.3
 35.7
 35.6
 35.1
 38.5
 39
 35.5
 34.8
 38.6
 39.4
 35.3
 34.4
 38.8
 39.7
 36.3
 36.8
 32.5
 36.4
 40.5
 36.6
 36.1
 38.2
 38.4
 39.3
 41
 31.8
 37.3
 33.1
 37
 37.6
 37
 38.7
 39
 35.8
 37
 37.2
 40.7
 37.4
 37.1
 37.8
 35.9
 35.6
 36.7
 34.5
 37.1
 40.3
 36.7
 37
 33.9
 40.1
 38
 35.2
 34.8
 39.5
 39.9
 36.9
 32.9
 33.8
 39.8
 34
 36.8
 35
 38.1
 36.9
"""  

# Using io.StringIO to convert the string data into a file-like object so it can be read into a pandas dataframe
data = io.StringIO(data_content)

# Now use pandas read_csv with the file-like object
gasdata = pd.read_csv(data, names=['mileage'])

# Making a histogram for the gas mileage data:
plt.hist(gasdata['mileage'], bins=10, edgecolor='k', alpha=0.7)
plt.title("Histogram for Gas Mileage Data")
plt.xlabel("Mileage")
plt.ylabel("Frequency")
plt.show()

plt.clf()

# Making a boxplot for the gas data:
gasdata.boxplot(column='mileage')
plt.title("Boxplot for Gas Mileage Data")
plt.show()

plt.clf()

# Mean, median, standard deviation, variance, and interquartile range for the gas mileage data:
print("Mean:", gasdata['mileage'].mean())
print("Median:", gasdata['mileage'].median())
print("Standard Deviation:", gasdata['mileage'].std())
print("Variance:", gasdata['mileage'].var())
print("Interquartile Range:", iqr(gasdata['mileage']))

# The 5-number summary for the gas mileage data:
print("5-number summary:", gasdata['mileage'].describe()[['min', '25%', '50%', '75%', 'max']])

# Making a normal Q-Q plot for the gas mileage data:
import statsmodels.api as sm

# Then you can use sm.qqplot() as you intended:
sm.qqplot(gasdata['mileage'], line='s')
plt.title("Q-Q Plot for Gas Mileage Data")
plt.show()

plt.clf()

###################################################################################
# Define the data
data_content = """
7
7.2
7.8
7.8
11.5
13
14
14.6
15
15.2
16.2
17.2
17.4
20.7
22.9
24.7
25.9
29.1
30.2
30.2
30.6
30.8
31
31.4
31.7
32.5
33.4
34.4
35
35.1
35.9
35.9
36.1
36.2
36.2
37
37
37.6
38.7
38.8
38.9
39
39.9
40.2
40.2
40.8
40.8
41.8
42.5
42.5
42.6
42.7
42.8
43.1
43.4
44.7
45.5
46
46.4
48.2
48.3
48.5
49.1
49.2
54.5
54.7
56.8
59.2
59.8
67
""" 

# Using io.StringIO to convert the string data into a file-like object so it can be read into a pandas dataframe
data = io.StringIO(data_content)

# Now use pandas read_csv with the file-like object
raindata = pd.read_csv(data, names=['rainfall'])

# Making a histogram for the rain data:
plt.hist(raindata['rainfall'], bins=10, edgecolor='k', alpha=0.7)
plt.title("Histogram for Rainfall Data")
plt.xlabel("Rainfall")
plt.ylabel("Frequency")
plt.show()

plt.clf()

# Making a boxplot for the rain data:
raindata.boxplot(column='rainfall')
plt.title("Boxplot for Rainfall Data")
plt.show()

plt.clf()

# Mean, median, standard deviation, variance, and interquartile range for the rain data:
print("Mean:", raindata['rainfall'].mean())
print("Median:", raindata['rainfall'].median())
print("Standard Deviation:", raindata['rainfall'].std())
print("Variance:", raindata['rainfall'].var())
print("Interquartile Range:", iqr(raindata['rainfall']))

# The 5-number summary for the rain data:
print("5-number summary:", raindata['rainfall'].describe()[['min', '25%', '50%', '75%', 'max']])

# Making a normal Q-Q plot for the rain data:
sm.qqplot(raindata['rainfall'], line='s')
plt.title("Q-Q Plot for Rainfall Data")
plt.show()

plt.clf()

################################################################################

# An example with an outlier:
# Define the data
emissions = [3.3, 4.2, 5.6, 5.6, 5.7, 5.7, 6.2, 6.3, 7.0, 7.6, 8.0, 8.1, 8.3, 8.6, 8.7, 
             9.4, 9.7, 9.9, 10.3, 10.3, 10.4, 11.3, 12.7, 13.1, 24.5]

# Convert to a pandas Series for easier data manipulation
emissions_series = pd.Series(emissions)

print(emissions_series.describe())
print("sd:", emissions_series.std())
print("IQR:", iqr(emissions_series))

# Boxplot for emissions
plt.boxplot(emissions_series)
plt.title("Boxplot for Emissions")
plt.show()

plt.clf()

# Removing Luxembourg's data
emiss_no_Lux = emissions_series[emissions_series != 24.5]

print("\nSummary after removing Luxembourg:")
print(emiss_no_Lux.describe())
print("sd:", emiss_no_Lux.std())
print("IQR:", iqr(emiss_no_Lux))

# Observing the summary statistics, you can decide which measure of center (and which measure of spread) 
# was more affected by the outlier.
