# lecturenotes9.r rm(list = ls(all = TRUE)) setwd("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\NHIS2009") # read SPSS data library(foreign) data1 <-read.spss("NHIS_2009_v1.sav", use.value.labels=T, to.data.frame=T) # obviously, change your directory as appropriate - just note double back-slash str(data1[1:47]) #head(data1) #summary(data1) # fugly but does the job dat_names1 <- names(data1) for(indx in 1:47){ print(dat_names1[indx]) print(mean(data1[[indx]], na.rm = TRUE)) } regn_logit1 <- glm(HI_private ~ AGE_P + I(AGE_P^2) + female + afam + Amindian + asian_in + asian_ch + asian_ph + asian_oth + race_oth + Hispanic + mexican + boricua + cuban + dominican + ed_hs + ed_collnd + ed_ASvoc + ed_ASacad + ed_coll + ed_adv + immig + marrd + divwidsp + veteran + region2 + region3 + region4, family = binomial, data = data1) summary(regn_logit1) regn_probit1 <- glm(HI_private ~ AGE_P + I(AGE_P^2) + female + afam + Amindian + asian_in + asian_ch + asian_ph + asian_oth + race_oth + Hispanic + mexican + boricua + cuban + dominican + ed_hs + ed_collnd + ed_ASvoc + ed_ASacad + ed_coll + ed_adv + immig + marrd + divwidsp + veteran + region2 + region3 + region4, family = binomial(link = 'probit'), data = data1) summary(regn_probit1) # ---------------------- # Quantile Regressions # ---------------------- # kernel density (nonparametric estimators) ? rm(list = ls(all = TRUE)) setwd("C:\\Users\\Kevin\\Documents\\CCNY\\data for classes\\CPS2010") data1 <-read.spss("cps_m2010_work3.sav", use.value.labels=T, to.data.frame=T) str(data1) data1$WEXP <- as.numeric(data1$WEXP) # restrict to prime-age 25-55 and IncomeWage > 0 and FT worker restrict1 <- ((data1$A_AGE >= 25) & (data1$A_AGE <= 55) & (data1$WSAL_VAL > 0) & ((data1$WEXP == 1) | (data1$WEXP == 2) | (data1$WEXP == 3) )) data2 <- subset(data1, restrict1) str(data2) #Note WEXP labels: # 0 'Not In Universe' # 1 'FT/50-52 weeks' # 2 'FT/48-49 weeks' # 3 'FT/40-47 weeks' # 4 'FT/27-39 weeks' # 5 'FT/14-26 weeks' # 6 'FT/13 weeks or less' # 7 'PT/50-52 weeks' # 8 'PT/48-49 weeks' # 9 'PT/40-47 weeks' # 10 'PT/27-39 weeks' # 11 'PT/14-26 weeks' # 12 'PT/13 weeks or less' # 13 'Nonworker' restrict2 <- as.logical(data2$ed_coll) data3 <- subset(data2, restrict2) Age25 <- data3$WSAL_VAL[data3$A_AGE == 25] Age30 <- data3$WSAL_VAL[data3$A_AGE == 30] Age35 <- data3$WSAL_VAL[data3$A_AGE == 35] Age40 <- data3$WSAL_VAL[data3$A_AGE == 40] Age45 <- data3$WSAL_VAL[data3$A_AGE == 45] Age50 <- data3$WSAL_VAL[data3$A_AGE == 50] p_tiles <- c(0.1, 0.25, 0.5, 0.75, 0.9) q_25 <- quantile(Age25, p_tiles) q_30 <- quantile(Age30, p_tiles) q_35 <- quantile(Age35, p_tiles) q_40 <- quantile(Age40, p_tiles) q_45 <- quantile(Age45, p_tiles) q_50 <- quantile(Age50, p_tiles) q_plot <- cbind(q_25,q_30,q_35,q_40,q_45,q_50) x_val <- as.matrix(c(25, 30, 35, 40, 45, 50)) matplot(x_val,t(q_plot), type = "l", xlab= "Age", ylab = "Wage of college grads at Percentiles 10 25 50 75 90") # so it would be sensible to look at quantiles in regression context regression1 <- lm(WSAL_VAL ~ A_AGE + I(A_AGE^2) + female + afam + asian+ Amindian + Hispanic + immig + imm2gen + ed_hs + ed_collnd + ed_ASvoc + ed_ASacad + ed_coll + ed_adv + union + veteran, data=data2) summary(regression1) regression2 <- lm(WSAL_VAL ~ A_AGE + I(A_AGE^2) + female + afam + asian+ Amindian + Hispanic + immig + imm2gen + ed_hs + ed_collnd + ed_ASvoc + ed_ASacad + ed_coll + ed_adv + union + veteran + as.factor(WEIND) + as.factor(POCCU2), data=data2) summary(regression2) library(quantreg) quantreg1 <- rq(WSAL_VAL ~ A_AGE + I(A_AGE^2) + female + afam + asian+ Amindian + Hispanic + immig + imm2gen + ed_hs + ed_collnd + ed_ASvoc + ed_ASacad + ed_coll + ed_adv + union + veteran, tau=p_tiles, data=data2) summary(quantreg1) plot(quantreg1)