BYU
Go to RouteY Brigham Young University

Richard Butler

SAS Program Example

*SAS wage_LM_tests.sas ******************************************************;
* the previous command, delimit, means that all commands end with a semicolon;
* sentences that start like this with a * are non-executable, comment statements;
* comments can also be set off with the /* stuff here */ type of commenting;
/* this is example of this alternative way to comment */
data one;
infile "D:\Documents and Settings\Richard Butler\My Documents\my2000docs\BYU\econ388\classrm_data\ut_ind_occ.txt";
input wklywg mjind mjocc age school marital race sex stateid;
/* in SAS, you do all the data manipulation in the data step, then the estimation in the proc step */
/* you can't intermingle them like you can in Stata, and data steps and procedures should end with a run;*/
/* also assignment statements and tests for equality are both handled with a single = sign */
if sex=1 then male =1; else if sex=2 then male=0;
if race=1 then white=1; else if race ne 1 then white=0;
if race=. then white=.;
run;
proc means; variables male white; run;
proc reg;
model wklywg=age sex/ACOV;
data two; set one;
/* dummy variables for educational attainment follow, based on CPS codes */
/* the first one indicates to skip those who are children (A_HGA=0 */
/* the omitted category are those with graduate training (beyond college)*/
if ( (school=31) or (school=32) or (school=33) or (school=34) or
(school=35) or (school=36) or (school=37) or (school=38)) then no_hi_sc=1; else no_hi_sc=0;
if (school=39) then high_sch=1; else high_sch=0;
if ((school=40) or (school=41) or (school=42)) then some_col=1; else some_col=0;
if (school=43) then college=1; else college=0;
if school=. then do; no_hi_sc=.; high_sch=.; some_col=.; college=.;
educ_att=.; yrhs=.; yrcol=.; end;
******************EDUCATIONAL ATTAINMENT VARIABLES FOLLOW *****************;
if (school=31) then educ_att=0;
if (school=32) then educ_att=4 ;
if (school=33) then educ_att=6 ;
if (school=34) then educ_att=8 ;
if (school=35) then educ_att=9 ;
if (school=36) then educ_att=10;
if (school=37) then educ_att=11;
if ((school=38) or (school=39)) then educ_att=12;
if (school=40) then educ_att=13;
if ((school=41) or (school=42))then educ_att=14;
if (school=43) then educ_att=16;
if (school=44) then educ_att=18;
if (school=45) then educ_att=22;
if (school=46) then educ_att=21;

if (school=31) then yrhs=0;
if (school=32) then yrhs=4 ;
if (school=33) then yrhs=6 ;
if (school=34) then yrhs=8 ;
if (school=35) then yrhs=9 ;
if (school=36) then yrhs=10 ;
if (school=37) then yrhs=11 ;
if ((school=38) or (school=39)) then yrhs=12;
if (school=40) then yrcol=1 ;
if ((school=41) or (school=42))then yrcol=2;
if (school=43) then yrcol=4 ;
if (school=44) then yrcol=6 ;
if (school=45) then yrcol=10;
if (school=46) then yrcol=9 ;
if ((school >= 31) and (school <= 39)) then yrcol=0;
if ((school >= 40) and (school <= 46)) then yrhs=12;

*the occupational controls follow. these are dummy variables based on A_MJOCC codes;
* the first one indicates to skip those who are children or in the armed forces;
* omitted category is laborers, farmers, forresters;
if ((mjocc=1) or (mjocc=2)) then exec=1; else exec=0;
if ((mjocc=3) or (mjocc=4)) then tech_sal=1; else tech_sal=0;
if ((mjocc=5) or (mjocc=6) or (mjocc=7) or (mjocc=8)) then serv_occ=1; else serv_occ=0;
if ((mjocc=9) or (mjocc=10) or (mjocc=11)) then oper_occ=1; else oper_occ=0;
if mjocc=. then do; exec=.; tech_sal=.; serv_occ=.; oper_occ=1.;end;

*the industry controls follow. these are dummy variables based on A_MJIND codes;
* the first one indicates to skip those who are children or not employed;
* omitted category is laborers, farmers, forresters;
if ((mjind=1) or (mjind=2) or (mjind=3) or (mjind=21)) then ag_cnstr=1; else ag_cnstr=0;
if ((mjind=4) or (mjind=5)) then manuf=1; else manuf=0;
if ((mjind=9) or (mjind=10)) then trade=1; else trade=0;
if (mjind=22) then pub_admn=1; else pub_admn=0;
if mjind=. then do; ag_cnstr=.; manuf=.; trade=.; pub_admn=.; end;

lnwage = log(wklywg);
run;
***********************************************************************************;
*test 1--the usual F-statistic approach to testing for "education effects" ;
proc reg;
model lnwage=age white male no_hi_sc high_sch some_col college exec tech_sal serv_occ oper_occ
ag_cnstr manuf trade pub_admn;
test no_hi_sc, high_sch,some_col,college; run;
***********************************************************************************;
*test 2--the LM-approach for testing for "education effects" using two stages ***;
proc reg;
model lnwage=age white male exec tech_sal serv_occ oper_occ
ag_cnstr manuf trade pub_admn;
output out=lnwg_resids r=resids; run;
proc reg data=lnwg_resids outest=est;
model resids=age white male no_hi_sc high_sch some_col college exec tech_sal serv_occ oper_occ
ag_cnstr manuf trade pub_admn /aic;
/*output out=three;
run;
/* look at r2*N as a LM test statistic for same hypothesis */
/*proc means data=three ; var resids; output out=three n=nobs; run;
data next; set est three;
retain _rsq_;
if _n_=2 then lm=nobs* _RSQ_;
run;
proc means; var lm; run;
*/