* clicked on household, person, family variables with no restrictions except utah
* using the same name as CPS default in the read statement
* out of space? use "par 4000" without quotes,on the first line
par 4000
file path e:\
read (utah2002.txt) A_GRSWK PMHRUSLT A_MJIND A_MJOCC A_AGE A_HGA A_MARITL &
A_RACE A_SEX DIS_HP FOWNU6 GMSTCEN HUNDER15 FTOTVAL WSAL_VAL WKSWORK LKNONE HRSWK
* labor market variables include
* a_grswk=usually earnings per week (lots of missing observations)
* wsal_val=total wage/salary earnings of individual last year
* ftotval=total income amount of the family last year
* wkswork=how many weeks did you work last yr, include paid vaca, sick leave
* lknone=how many weeks did you spend looking for work last year
* hrswk=how many hours did you usually work per week
*HENCE, total hours worked last year=wkswork*hrswk
*other family variables include
* hunder15=number of persons under 15 yr old in the household
* fownu6=number of own childrend in family under 6 years of age
*DIS_HP=disability that prevents or limits work
if (dis_hp.eq.1) disab=1
if (dis_hp.eq.2) disab=0
if (a_sex.eq.1) male=1
if (a_sex.eq.2) male=0
if (a_race.eq.1) white=1
if (a_race.ne.1) white=0
if ((a_maritl.eq.1).or.(a_maritl.eq.2)) married=1
if ((a_maritl.ne.1).and.(a_maritl.ne.2)) married=0
* dummy variables for educational attainment follow, based on CPS codes
* the first one indicates to skip those who are children (A_HGA=0)
* the omitted category are those with graduate training (beyond college)
rename a_hga school
rename a_age age
*skipif (school.eq.0)
if ((school.eq.31).or.(school.eq.32).or.(school.eq.33).or.(school.eq.34).or.&
(school.eq.35).or.(school.eq.36).or.(school.eq.37).or.(school.eq.38)) no_hi_sc=1
if ((school.ne.31).and.(school.ne.32).and.(school.ne.33).and.(school.ne.34).and.&
(school.ne.35).and.(school.ne.36).and.(school.ne.37).and.(school.ne.38)) no_hi_sc=0
if (school.eq.39) high_sch=1
if (school.ne.39) high_sch=0
if ((school.eq.40).or.(school.eq.41).or.(school.eq.42)) some_col=1
if ((school.ne.40).and.(school.ne.41).and.(school.ne.42)) some_col=0
if (school.eq.43) college=1
if (school.ne.43) college=0
******************EDUCATIONAL ATTAINMENT VARIABLES FOLLOW *****************
if (school.eq.31) educ_att=0
if (school.eq.32) educ_att=4
if (school.eq.33) educ_att=6
if (school.eq.34) educ_att=8
if (school.eq.35) educ_att=9
if (school.eq.36) educ_att=10
if (school.eq.37) educ_att=11
if ((school.eq.38).or.(school.eq.39)) educ_att=12
if (school.eq.40) educ_att=13
if ((school.eq.41).or.(school.eq.42)) educ_att=14
if (school.eq.43) educ_att=16
if (school.eq.44) educ_att=18
if (school.eq.45) educ_att=22
if (school.eq.46) educ_att=21
*the occupational controls follow. these are dummy variables based on A_MJOCC codes
* the first line indicates to skip those who are children or in the armed forces
* eq means equal; ne means not equal; omitted category=laborers, farmers, forresters
*skipif (a_mjocc.eq.14).or.(a_mjocc.eq.0)
if ((a_mjocc.eq.1).or.(a_mjocc.eq.2)) exec=1
if ((a_mjocc.ne.1).and.(a_mjocc.ne.2)) exec=0
if ((a_mjocc.eq.3).or.(a_mjocc.eq.4)) tech_sal=1
if ((a_mjocc.ne.3).and.(a_mjocc.ne.4)) tech_sal=0
if ((a_mjocc.eq.5).or.(a_mjocc.eq.6).or.(a_mjocc.eq.7).or.(a_mjocc.eq.8)) serv_occ=1
if ((a_mjocc.ne.5).and.(a_mjocc.ne.6).and.(a_mjocc.ne.7).and.(a_mjocc.ne.8)) serv_occ=0
if ((a_mjocc.eq.9).or.(a_mjocc.eq.10).or.(a_mjocc.eq.11)) oper_occ=1
if ((a_mjocc.ne.9).and.(a_mjocc.ne.10).and.(a_mjocc.ne.11)) oper_occ=0
*the industry controls follow. these are dummy variables based on A_MJIND codes
* the first line indicates to skip those who are children or not employed
* eq means equal; ne means not equal; omitted category is utilities & services
*skipif (a_mjind.eq.0)
if ((a_mjind.eq.1).or.(a_mjind.eq.2).or.(a_mjind.eq.3).or.(a_mjind.eq.21)) ag_cnstr=1
if ((a_mjind.ne.1).and.(a_mjind.ne.2).and.(a_mjind.ne.3).and.(a_mjind.ne.21)) ag_cnstr=0
if ((a_mjind.eq.4).or.(a_mjind.eq.5)) manuf=1
if ((a_mjind.ne.4).and.(a_mjind.ne.5)) manuf=0
if ((a_mjind.eq.9).or.(a_mjind.eq.10)) trade=1
if ((a_mjind.ne.9).and.(a_mjind.ne.10)) trade=0
if (a_mjind.eq.22) pub_admn=1
if (a_mjind.ne.22) pub_admn=0
* now for a regression--always check the descriptive statistics first
skipif (wsal_val.le.0)
skipif (a_mjocc.eq.14).or.(a_mjocc.eq.0)
skipif (a_mjind.eq.0)
skipif (age.le.15)
genr agesq=age*age
stat
ols wsal_val white male age educ_att exec tech_sal serv_occ &
oper_occ ag_cnstr manuf trade pub_admn
ols wsal_val white male age agesq educ_att exec tech_sal serv_occ &
oper_occ ag_cnstr manuf trade pub_admn
* use test statements to test for "joint significance" of two+ variables
test
test exec=0
test tech_sal=0
test serv_occ=0
test oper_occ=0
end
*somethings the natural log transform of nonzero variables is useful
genr lnwsal=log(wsal_val)
ols lnwsal white male age agesq educ_att exec tech_sal serv_occ &
oper_occ ag_cnstr manuf trade pub_admn
*tests for heteroskedasticity, which is problem if prob signif is small
* this statement below immediately follows the equation being tested
* ignore the "ARCH TEST" results; its testing something different
diagnos / het
* the following "hetcov" option automatically controls for heteroskedasticity
ols lnwsal white male age agesq educ_att exec tech_sal serv_occ &
oper_occ ag_cnstr manuf trade pub_admn / hetcov
stop
end