# source("load_IPUMSdata.R", echo=TRUE) # load packages zoo, sandwich, lmtest # library("zoo") # library("sandwich") # library("lmtest") load_data_new <- 0 if(load_data_new){ rm(list = ls(all = TRUE)) dat1 = read.csv("ACS_2008_2011_NYC_med.csv") # first few lines: # AGE,female,educ_nohs,educ_hs,educ_somecoll,educ_collassoc,educ_coll,educ_adv,educ_indx,boro_bx,boro_m,boro_si,boro_bk,boro_qns,boroughs,commute_car,commute_bus,commute_subway,commute_other,commute_type,africanamerican,nativeamerican,asianamerican,Hispanic,raceother,kids_under5,has_kids,non_citizen,veteran,own_dwelling,rent_dwelling,anc_euro,anc_centamerica,anc_southamerica,anc_spanishcarib,anc_carib,anc_MENA,anc_africa,anc_asiaindia,anc_seasia,anc_asiachinese,anc_asiajapankorea,anc_asiapacific,anc_nativeindian,YEAR,DATANUM,SERIAL,NUMPREC,HHWT,HHTYPE,REGION,PUMA,PUMARES2MIG,PUMASUPR,GQ,OWNERSHP,OWNERSHPD,MORTGAGE,MORTGAG2,MORTAMT1,MORTAMT2,OWNCOST,RENT,RENTGRS,HHINCOME,FOODSTMP,ROOMS,BUILTYR2,UNITSSTR,BEDROOMS,VEHICLES,PERNUM,PERWT,SPLOC,NCHILD,NCHLT5,SEX,MARST,RACE,RACED,BPL,BPLD,ANCESTR1,ANCESTR2,CITIZEN,YRIMMIG,LANGUAGE,HISPAN,HISPAND,EDUC,EDUCD,EMPSTAT,EMPSTATD,OCC,IND,WKSWORK2,INCTOT,FTOTINC,INCWAGE,POVERTY,MIGRATE1,MIGRATE1D,MIGMET1,MIGPUMA1,VETSTAT,VETSTATD,VETOTHER,TRANWORK,TRANTIME,DEPARTS,ARRIVES # 94,1,1,0,0,0,0,0,1,0,0,0,0,1,5,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,2011,5,3801258,2,14,3,12,4106,41,36112,1,2,22,0,0,0,0,99999,559,559,19556,1,4,5,9,3,9,1,15,0,1,0,2,6,7,700,260,26010,275,999,2,1972,12,4,498,0,2,3,30,0,0,0,4242,19556,0,153,1,10,0,0,1,11,0,0,0,0,0 # 54,1,1,0,0,0,0,0,1,0,0,0,0,1,5,0,1,0,0,2,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,2011,5,3801258,2,14,3,12,4106,41,36112,1,2,22,0,0,0,0,99999,559,559,19556,1,4,5,9,3,9,2,27,0,0,0,2,3,7,700,260,26010,275,999,2,1972,12,4,498,0,2,1,10,9640,1770,6,15314,19556,15314,153,1,10,0,0,1,13,0,31,120,617,819 # 41,1,0,0,0,0,1,0,5,0,0,0,0,1,5,0,0,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,2011,5,3801260,4,36,1,12,4105,41,36113,1,1,13,3,1,2587,0,2932,0,0,391531,1,6,3,3,4,1,1,36,2,2,1,2,1,2,200,260,26057,335,999,2,1985,1,0,0,10,101,2,20,560,9590,0,517,391531,0,501,1,10,0,0,1,11,0,0,0,0,0 } Y <- dat1$INCWAGE X <- dat1$AGE summary(Y) summary(X) regression1 <- lm(Y ~ X) summary(regression1) coeftest(regression1, df = Inf, vcov = vcovHC(regression1, type = "const")) coeftest(regression1, df = Inf, vcov = vcovHC(regression1)) # alt version, where labels help a bit more summary(dat1$INCWAGE) summary(dat1$AGE) regression1 <- lm(dat1$INCWAGE ~ dat1$AGE) summary(regression1) coeftest(regression1, df = Inf, vcov = vcovHC(regression1)) # worry about zeros subgroup2 <- (dat1$INCWAGE > 0) summary(dat1$INCWAGE[subgroup2]) summary(dat1$AGE[subgroup2]) regression2 <- lm(dat1$INCWAGE[subgroup2] ~ dat1$AGE[subgroup2]) summary(regression2) coeftest(regression2, df = Inf, vcov = vcovHC(regression2)) # prime-age subgroup3 <- (dat1$INCWAGE > 0) & (dat1$AGE >= 25) & (dat1$AGE <= 55) summary(dat1$INCWAGE[subgroup3]) summary(dat1$AGE[subgroup3]) regression3 <- lm(dat1$INCWAGE[subgroup3] ~ dat1$AGE[subgroup3]) summary(regression3) coeftest(regression3, df = Inf, vcov = vcovHC(regression3))