# lecturenotes8.r



# -----------------------------------------
# Beer  
# following Cesur & Kelly (2013), "Who Pays the Bar Tab? Beer Consumption and Economic Growth in the United States," Economic Inquiry. 
# -----------------------------------------
# of course, those authors are judged innocent of anything I do!

rm(list = ls(all = TRUE))

setwd("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\beer_data")
# each csv has years 1997-2011 as columns, 51 states (incl DC) as rows
growth_rates <- as.matrix(read.csv("growth.csv", header = FALSE))
beertax <- as.matrix(read.csv("beertax.csv", header = FALSE))
gdp_pc <- as.matrix(read.csv("gdp_pc.csv", header = FALSE))
spirits_pc <- as.matrix(read.csv("spirits_pc.csv", header = FALSE))
wine_pc <- as.matrix(read.csv("wine_pc.csv", header = FALSE))
beer_pc <- as.matrix(read.csv("beer_pc.csv", header = FALSE))

# make lags - long coding but a bit more evident what's going on
gdp_L <- c(rep(999999,51),gdp_pc[,1],gdp_pc[,2],gdp_pc[,3],gdp_pc[,4],gdp_pc[,5],gdp_pc[,6],gdp_pc[,7],gdp_pc[,8],gdp_pc[,9],gdp_pc[,10],gdp_pc[,11],gdp_pc[,12],gdp_pc[,13],gdp_pc[,14])
is.na(gdp_L) <- (gdp_L == 999999)

spirits_L <- c(rep(999999,51),spirits_pc[,1],spirits_pc[,2],spirits_pc[,3],spirits_pc[,4],spirits_pc[,5],spirits_pc[,6],spirits_pc[,7],spirits_pc[,8],spirits_pc[,9],spirits_pc[,10],spirits_pc[,11],spirits_pc[,12],spirits_pc[,13],spirits_pc[,14])
is.na(spirits_L) <- (spirits_L == 999999)
wine_L <- c(rep(999999,51),wine_pc[,1],wine_pc[,2],wine_pc[,3],wine_pc[,4],wine_pc[,5],wine_pc[,6],wine_pc[,7],wine_pc[,8],wine_pc[,9],wine_pc[,10],wine_pc[,11],wine_pc[,12],wine_pc[,13],wine_pc[,14])
is.na(wine_L) <- (wine_L == 999999)
beer_L <- c(rep(999999,51),beer_pc[,1],beer_pc[,2],beer_pc[,3],beer_pc[,4],beer_pc[,5],beer_pc[,6],beer_pc[,7],beer_pc[,8],beer_pc[,9],beer_pc[,10],beer_pc[,11],beer_pc[,12],beer_pc[,13],beer_pc[,14])
is.na(beer_L) <- (beer_L == 999999)


NN_TT <- dim(growth_rates)
tot_N <- NN_TT[1]*NN_TT[2]

dim(growth_rates) <- c(tot_N,1)
dim(gdp_pc) <- c(tot_N,1)
dim(beertax) <- c(tot_N,1)
dim(spirits_pc) <- c(tot_N,1)
dim(wine_pc) <- c(tot_N,1)
dim(beer_pc) <- c(tot_N,1)

# state fixed effects
st_fixedeff <- rep(1:51,15)

frac_metal <- as.matrix(read.csv("frac_metal.csv", header = FALSE))
frac_glass <- as.matrix(read.csv("frac_glass.csv", header = FALSE))
frac_refill <- as.matrix(read.csv("frac_refill.csv", header = FALSE))
frac_draft <- as.matrix(read.csv("frac_draft.csv", header = FALSE))

# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# up to here, saved workspace

#beer_states <- data.frame(cbind(growth_rates, gdp_pc, beertax, spirits_pc, wine_pc, beer_pc, gdp_L, spirits_L, wine_L, beer_L, frac_metal, frac_glass, frac_refill, frac_draft))


regression1 <- lm(growth_rates ~ beer_pc + gdp_L + as.factor(st_fixedeff))
summary(regression1)
# but want instruments for beer consumption per capita (beer_pc), authors suggest beertax
# simple summary in http://www.r-bloggers.com/a-simple-instrumental-variables-problem/
iv_reg1 <- lm(beer_pc ~ beertax)
summary(iv_reg1)
pred_beer <- predict(iv_reg1)
iv_reg2 <- lm(growth_rates ~ pred_beer + gdp_L + as.factor(st_fixedeff))
summary(iv_reg2)

library(AER)
iv_reg_package1 <- ivreg(growth_rates ~ beer_pc + gdp_L + as.factor(st_fixedeff) | beertax + gdp_L + as.factor(st_fixedeff))
summary(iv_reg_package1)


# more details

# beer tax is weak instrument - state fixed effects explain 94% of var
reg1 <- lm(beertax ~ as.factor(st_fixedeff))
summary(reg1)

iv_reg3 <- lm(beer_pc ~ beertax + beer_L + spirits_L + wine_L)
summary(iv_reg3)
pred_beer3 <- predict(iv_reg3)
pred_beer4 <- c(rep(999999,51),pred_beer3)
is.na(pred_beer4) <- (pred_beer4 == 999999)

iv_reg4 <- lm(growth_rates ~ pred_beer4 + beer_L + spirits_L + wine_L + gdp_L + as.factor(st_fixedeff))
summary(iv_reg4)

# what if instrument instead for fraction sold on draft not cans or bottles?
iv_reg5 <- lm(frac_draft ~ beertax)
summary(iv_reg5)
pred_beer5 <- predict(iv_reg5)

iv_reg6 <- lm(growth_rates ~ pred_beer5 + gdp_L + as.factor(st_fixedeff))
summary(iv_reg6)

# try other variations: logs? interactions? lags of packaging? get data for other variables? More years?


# --------------------------------------------------------
# --------------------------------------------------------
# Multi Level Models
# --------------------------------------------------------
# need to install this package then open
library(multilevel)
rm(list = ls(all = TRUE))

# read SPSS data
library(foreign)
data1 <-read.spss("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\wvs1981_2008_work.sav", use.value.labels=F, to.data.frame=T)
# obviously, change your directory as appropriate - just note double back-slash

str(data1)
head(data1)
summary(data1)

# c006 is satisfaction with financial situation, scale 0-10 (assume for now it's continuous); look at influenced by a189-a198 various things important to the person about job etc, also x001 (gender), x003 Age, x025 (Educ), x036 (Profession/Job), x047 (Scale of incomes), s003 (Country/Region)

data2<-na.exclude(data1[,c("c006", "a189", "a190",  "a191",  "a192",  "a193",  "a194",  "a195",  "a196",  "a197",  "a198", "x001", "x003", "x025", "x036", "x047", "s003")])


model1 <- lme(c006 ~ 1, random=~1|x025, data=data2)
summary(model1)
vc1 <- as.numeric(VarCorr(model1))
ICC_m1 <- vc1[1]/(vc1[1]+vc1[2])

# compare a simple regression
model1lm <- lm(c006 ~ factor(x025), data=data2)
summary(model1lm)

# find averages by ed_category
tmp1 <-aggregate(cbind(data2$c006,data2$x047),list(data2$x025),mean)
names(tmp1) <- c("x025","grp_c006","grp_x047") 
data3 <- merge(data2,tmp1, by = "x025")

model2 <- lme(c006 ~ x001 + x003, random=~1|x025, data=data3)
summary(model2)

# random slopes
model3 <- lme(c006 ~ x001 + x003, random=~x001|x025, data=data3)
summary(model3)