# lecturenotes8.r # ----------------------------------------- # Beer # following Cesur & Kelly (2013), "Who Pays the Bar Tab? Beer Consumption and Economic Growth in the United States," Economic Inquiry. # ----------------------------------------- # of course, those authors are judged innocent of anything I do! rm(list = ls(all = TRUE)) setwd("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\beer_data") # each csv has years 1997-2011 as columns, 51 states (incl DC) as rows growth_rates <- as.matrix(read.csv("growth.csv", header = FALSE)) beertax <- as.matrix(read.csv("beertax.csv", header = FALSE)) gdp_pc <- as.matrix(read.csv("gdp_pc.csv", header = FALSE)) spirits_pc <- as.matrix(read.csv("spirits_pc.csv", header = FALSE)) wine_pc <- as.matrix(read.csv("wine_pc.csv", header = FALSE)) beer_pc <- as.matrix(read.csv("beer_pc.csv", header = FALSE)) # make lags - long coding but a bit more evident what's going on gdp_L <- c(rep(999999,51),gdp_pc[,1],gdp_pc[,2],gdp_pc[,3],gdp_pc[,4],gdp_pc[,5],gdp_pc[,6],gdp_pc[,7],gdp_pc[,8],gdp_pc[,9],gdp_pc[,10],gdp_pc[,11],gdp_pc[,12],gdp_pc[,13],gdp_pc[,14]) is.na(gdp_L) <- (gdp_L == 999999) spirits_L <- c(rep(999999,51),spirits_pc[,1],spirits_pc[,2],spirits_pc[,3],spirits_pc[,4],spirits_pc[,5],spirits_pc[,6],spirits_pc[,7],spirits_pc[,8],spirits_pc[,9],spirits_pc[,10],spirits_pc[,11],spirits_pc[,12],spirits_pc[,13],spirits_pc[,14]) is.na(spirits_L) <- (spirits_L == 999999) wine_L <- c(rep(999999,51),wine_pc[,1],wine_pc[,2],wine_pc[,3],wine_pc[,4],wine_pc[,5],wine_pc[,6],wine_pc[,7],wine_pc[,8],wine_pc[,9],wine_pc[,10],wine_pc[,11],wine_pc[,12],wine_pc[,13],wine_pc[,14]) is.na(wine_L) <- (wine_L == 999999) beer_L <- c(rep(999999,51),beer_pc[,1],beer_pc[,2],beer_pc[,3],beer_pc[,4],beer_pc[,5],beer_pc[,6],beer_pc[,7],beer_pc[,8],beer_pc[,9],beer_pc[,10],beer_pc[,11],beer_pc[,12],beer_pc[,13],beer_pc[,14]) is.na(beer_L) <- (beer_L == 999999) NN_TT <- dim(growth_rates) tot_N <- NN_TT[1]*NN_TT[2] dim(growth_rates) <- c(tot_N,1) dim(gdp_pc) <- c(tot_N,1) dim(beertax) <- c(tot_N,1) dim(spirits_pc) <- c(tot_N,1) dim(wine_pc) <- c(tot_N,1) dim(beer_pc) <- c(tot_N,1) # state fixed effects st_fixedeff <- rep(1:51,15) frac_metal <- as.matrix(read.csv("frac_metal.csv", header = FALSE)) frac_glass <- as.matrix(read.csv("frac_glass.csv", header = FALSE)) frac_refill <- as.matrix(read.csv("frac_refill.csv", header = FALSE)) frac_draft <- as.matrix(read.csv("frac_draft.csv", header = FALSE)) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # up to here, saved workspace #beer_states <- data.frame(cbind(growth_rates, gdp_pc, beertax, spirits_pc, wine_pc, beer_pc, gdp_L, spirits_L, wine_L, beer_L, frac_metal, frac_glass, frac_refill, frac_draft)) regression1 <- lm(growth_rates ~ beer_pc + gdp_L + as.factor(st_fixedeff)) summary(regression1) # but want instruments for beer consumption per capita (beer_pc), authors suggest beertax # simple summary in http://www.r-bloggers.com/a-simple-instrumental-variables-problem/ iv_reg1 <- lm(beer_pc ~ beertax) summary(iv_reg1) pred_beer <- predict(iv_reg1) iv_reg2 <- lm(growth_rates ~ pred_beer + gdp_L + as.factor(st_fixedeff)) summary(iv_reg2) library(AER) iv_reg_package1 <- ivreg(growth_rates ~ beer_pc + gdp_L + as.factor(st_fixedeff) | beertax + gdp_L + as.factor(st_fixedeff)) summary(iv_reg_package1) # more details # beer tax is weak instrument - state fixed effects explain 94% of var reg1 <- lm(beertax ~ as.factor(st_fixedeff)) summary(reg1) iv_reg3 <- lm(beer_pc ~ beertax + beer_L + spirits_L + wine_L) summary(iv_reg3) pred_beer3 <- predict(iv_reg3) pred_beer4 <- c(rep(999999,51),pred_beer3) is.na(pred_beer4) <- (pred_beer4 == 999999) iv_reg4 <- lm(growth_rates ~ pred_beer4 + beer_L + spirits_L + wine_L + gdp_L + as.factor(st_fixedeff)) summary(iv_reg4) # what if instrument instead for fraction sold on draft not cans or bottles? iv_reg5 <- lm(frac_draft ~ beertax) summary(iv_reg5) pred_beer5 <- predict(iv_reg5) iv_reg6 <- lm(growth_rates ~ pred_beer5 + gdp_L + as.factor(st_fixedeff)) summary(iv_reg6) # try other variations: logs? interactions? lags of packaging? get data for other variables? More years? # -------------------------------------------------------- # -------------------------------------------------------- # Multi Level Models # -------------------------------------------------------- # need to install this package then open library(multilevel) rm(list = ls(all = TRUE)) # read SPSS data library(foreign) data1 <-read.spss("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\wvs1981_2008_work.sav", use.value.labels=F, to.data.frame=T) # obviously, change your directory as appropriate - just note double back-slash str(data1) head(data1) summary(data1) # c006 is satisfaction with financial situation, scale 0-10 (assume for now it's continuous); look at influenced by a189-a198 various things important to the person about job etc, also x001 (gender), x003 Age, x025 (Educ), x036 (Profession/Job), x047 (Scale of incomes), s003 (Country/Region) data2<-na.exclude(data1[,c("c006", "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", "a198", "x001", "x003", "x025", "x036", "x047", "s003")]) model1 <- lme(c006 ~ 1, random=~1|x025, data=data2) summary(model1) vc1 <- as.numeric(VarCorr(model1)) ICC_m1 <- vc1[1]/(vc1[1]+vc1[2]) # compare a simple regression model1lm <- lm(c006 ~ factor(x025), data=data2) summary(model1lm) # find averages by ed_category tmp1 <-aggregate(cbind(data2$c006,data2$x047),list(data2$x025),mean) names(tmp1) <- c("x025","grp_c006","grp_x047") data3 <- merge(data2,tmp1, by = "x025") model2 <- lme(c006 ~ x001 + x003, random=~1|x025, data=data3) summary(model2) # random slopes model3 <- lme(c006 ~ x001 + x003, random=~x001|x025, data=data3) summary(model3)