*********************************************************************** * Program: C:\NHANES\CleanRecode_MeansComparison.sas * Proposal: Check distributions and outliers **********************************************************************; libname NH "C:\your_dir"; /*change the name of the directory folder to the location that you saved your downloaded dataset from the sample code and dataset downloads module: for example "c:\nhanes\data"*/ /*Compare the mean values for cholesterol with the outliers included in the data and excluded from the data*/ ***exclude 3 SPs with high serum cholesterol; data exclu_3SPs; set NH.demo_BP2b; if seqn in (10494, 13996, 17821) then delete; run; proc format; value race 1='Mexican American' 2='Other Hispanic' 3='Non-Hispanic white' 4='Non-Hispanic Balck' 5='Other Race - Including Multi-Racial'; run; proc means data=NH.demo_BP2b mean stderr maxdec=1; where ridstatr=2 and ridageyr>=20; var lbxtc; class ridreth1; weight wtmec4yr; format ridreth1 race.; title "Mean of serum total cholesterol - include outliers"; run; proc means data=exclu_3SPs mean stderr maxdec=1; where ridstatr=2 and ridageyr>=20; var lbxtc; class ridreth1; weight wtmec4yr; format ridreth1 race.; title "Mean of serum total cholesterol - exclude three outliers"; run;