###Code for Lecture 3

### how to build binary (logistic) models

### start with simple regression

datum=read.csv(file.choose()) ### first import some data

head(datum) ### see what we imported

plot(datum$Elevation,datum$Presence)

### Note that not much point in plotting relationship

results=glm(Proportion~Elevation,data=datum,family=binomial)         ### runs a simple logistic regression


summary(results)  ### spits out the analysis

### note individual coefficient estimates and constant as well of Wald-tests of estimates
### note doesn't give F-statistic of full model, instead gives deviance and AIC

anova(results, test='Chisq') ### generate an anova table of logistic regression 
###Instead of f-test, does a 'partial-likelihood ratio test' (chi-square), which is appropriate for maximum likelihood

### Note can plot residuals, but not very useful in logistic regression

### run a simple logistic regression on categorical variable

results=glm(Proportion~PreyDens,data=datum,family=binomial)

summary(results)


### Note that output just tells you difference between one group and reference
### Doesn't tell you difference between two groups listed in output
### to test for differences in groups, must combine them
### in order to combine must use dummy coded variables

results=glm(Proportion~Low+Medium+High),data=datum,family=binomial)
summary(results)

results2=glm(Proportion~Low+I(Medium+High),data=datum,family=binomial)  # combine groups in dummy coding; NOTE THAT PLUS DOESN'T REALLY MEAN ADD, JUST MEANS INCLUDE BOTH IN MODEL
summary(results2)
anova(results2,results,test='Chisq') ### compares simpler model (combined groups) to more complex model
### Note must use "test='Chisq'" when comparing glm's in order to get a p-value - partial likelihood ratio test
### if p-value is significant, then complex model is better, groups are different

### Also can remove any groups that are not different from the reference
results3=glm(Proportion~I(Medium+High),data=datum,family=binomial)
summary(results3)
anova(results3,results,test='Chisq')

### general linear modeling is a multi-variable procedure, analyzes all variables at once
### reasons to do 'multiple regression' rather than univariate analyses:
### faster, more efficient
### accounts for collinearity among variables
### allows one to test for interactions
### allows one to account for pseudoreplication, random effects, and nested designs

results4=glm(Proportion~Elevation+Low+Medium+High,data=datum,family=binomial)   #NOTE THAT PLUS DOESN'T REALLY MEAN ADD, JUST MEANS INCLUDE BOTH IN MODEL
summary(results4)