****************************************************************************************************************; * Example SUDAAN code to replicate NCHS Data Brief No. 52, Figure 2 *; * Mean blood mercury and lead levels in pregnant women aged 18-49 years, by age group: United States 2003-2008 *; * *; * Jones L, Parker JD, Mendola P. Blood lead and mercury levels in pregnant women in the United States, *; * NCHS Data Brief. No 52. Hyattsville, MD: National Center for Health Statistics. 2010. *; * *; * Available at: https://www.cdc.gov/nchs/products/databriefs/db52.htm *; ****************************************************************************************************************; options nocenter nodate nonumber pagesize=100 linesize=150; OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; %put Run in SAS &sysver (maintenance release and release year: &sysvlong4) and SUDAAN Release 11.0.1 (SAS-Callable, 32 bit version); ** Macro To Download Data from NHANES website **; %macro CreateDS(myDS); %let i = 1; %let DS = %scan(&myDS, &i); %do %until(&DS = %nrstr()); %let Suffix = %lowcase(%substr(&DS, %eval(%length(&DS)-1))); %if (&Suffix = _j) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2017-2018/&DS..xpt"; %end; %else %if (&Suffix = _i) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2015-2016/&DS..xpt"; %end; %else %if (&Suffix = _h) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2013-2014/&DS..xpt"; %end; %else %if (&Suffix = _g) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2011-2012/&DS..xpt"; %end; %else %if (&Suffix = _f) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2009-2010/&DS..xpt"; %end; %else %if (&Suffix = _e) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2007-2008/&DS..xpt"; %end; %else %if (&Suffix = _d) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2005-2006/&DS..xpt"; %end; %else %if (&Suffix = _c) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2003-2004/&DS..xpt"; %end; %else %if (&Suffix = _b) %then %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/2001-2002/&DS..xpt"; %end; %else %do; filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/1999-2000/&DS..xpt"; %end; libname xptIn xport; data &DS; set xptIn.&DS; run; libname xptIn clear; filename xptIn clear; %let i = %eval(&i+1); %let DS = %scan(&myDS, &i); %end; %mend CreateDS; ** Call macro to import datafiles for multiple individual components **; * Demographic (DEMO) *; * Cadmium, Lead, & Total Mercury - Blood (named L06BMT_C for 2003-2004, PbCd_x for 2005-2006 and 2007-2008) *; %CreateDS(demo_c demo_d demo_e L06BMT_C PbCd_D PbCd_E ); ** Append data files from multiple survey cycles **; * Demographic (DEMO) *; data demo; set demo_c demo_d demo_e; run; * Cadmium, Lead, & Total Mercury - Blood (note file name change between 2003-2004 and 2005-2006 cycles) *; *LBXTHG - Mercury, total (ug/L) ; * LBXBPB - Lead (ug/dL) ; data PbCd (keep = seqn LBXTHG LBXBPB); set L06BMT_C PbCd_D PbCd_E ; run; data baseDS; merge demo (in = a) PbCd; by seqn; if a; * Create variable to define analysis population: pregnant adults age 18 - 49 (inclusive) with positive exam weight *; inAnalysis= (18<=ridageyr<=49 and RIDEXPRG=1 and wtmec2yr>0); * Create 6-year MEC exam weight *; wtmec6yr=wtmec2yr/3; * Create age categories *; if 18<=ridageyr<25 then ageCat=1; else if 25<=ridageyr<30 then ageCat=2; else if 30<=ridageyr<35 then ageCat=3; else if 35<=ridageyr<50 then ageCat=4; * Log-transform the outcome variables (for method 2 of calculating the geometric mean) *; LBXTHG_log=log(LBXTHG); LBXBPB_log=log(LBXBPB); run; * Define formats for labeling output *; proc format; value ageLabelF 1="18-24 years" 2="25-29 years" 3="30-34 years" 4="35 years or over" 0,-2=" " ; value varlabel 1="Mercury (ug/L)" 2="Lead (ug/dL)" ; run; * First sort the dataset by strata (sdmvstra) and PSU (sdmvpsu) before calling SUDAAN procedures*; PROC SORT DATA =baseDS; BY sdmvstra sdmvpsu ; RUN ; ******************************************************************************; ** Calculate geometric and arithmetic means of mercury and lead levels **; ** Data brief 52, Figure 2 **; ******************************************************************************; ** Method 1: Request the geometric mean and standard error in PROC DESCRIPT **; * Note that SUDAAN does not calculate the confidence intervals of the geometric mean *; PROC DESCRIPT DATA = baseDS FILETYPE = SAS DESIGN = WR nomarg geometric; * geometric option requests that geometric means be calculated *; NEST SDMVSTRA SDMVPSU / MISSUNIT; * specify survey design variables *; WEIGHT wtmec6yr; * specify sampling weight *; VAR LBXTHG LBXBPB; * specify variable(s) to analyze *; class ageCat / nofreq; * categorical variables *; SUBPOPX inAnalysis=1; * Specify the subpopulation of interest *; * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, a one-dimensional distribution is generated for each variable in the class statement.*; tables ageCat; rformat ageCat ageLabelF.; * print and output statements request geomean (geometric mean) and segeomean (standard error of the geometric mean) statistics *; print nsum geomean segeomean /STYLE=NCHS nohead notime nodate nsumfmt=f6.0 geomeanfmt=f6.2 segeomeanfmt=f6.2; OUTPUT nsum geomean segeomean / geomeanfmt=f6.2 segeomeanfmt=f6.2 FILENAME=fig2 FILETYPE=SAS REPLACE; rtitle "Mean blood mercury and lead levels in pregnant women aged 18-49 years, by age group: United States 2003-2008"; rtitle "Geometric mean from Method 1: Proc Descript"; run; * Display results in a table *; proc print data = fig2 noobs label; var variable ageCat nsum geomean segeomean ; format variable varlabel. geomean segeomean 5.2; title "Mean blood mercury and lead levels in pregnant women aged 18-49 years, by age group: United States 2003-2008"; title2 "Geometric mean from Method 1: Request the geometric mean and standard error in PROC DESCRIPT"; run; title; ** Method 2: Request the arithmetic mean of the log-transformed variables and then transform back to the original scale **; * This method allows the user to calculate a confidence interval for the geometric mean *; PROC DESCRIPT DATA = baseDS FILETYPE = SAS DESIGN = WR nomarg atlevel1=1 atlevel2=2 ; * atlevel1 and atlevel2 options request the number of unique strata and PSUs *; NEST SDMVSTRA SDMVPSU / MISSUNIT;* specify survey design variables *; WEIGHT wtmec6yr;* specify sampling weight *; VAR LBXTHG_log LBXBPB_log;* specify variable(s) to analyze *; class ageCat / nofreq;* categorical variables *; SUBPOPX inAnalysis=1; * Specify the subpopulation of interest *; * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, a one-dimensional distribution is generated for each variable in the class statement.*; tables ageCat; rformat ageCat ageLabelF.; rtitle "Arithmetic mean of the log-transformed variables"; print nsum mean semean atlev1 atlev2 /style=nchs; OUTPUT nsum mean semean atlev1 atlev2/ FILENAME=fig2_logtmp FILETYPE=SAS REPLACE; run; proc print data=fig2_logtmp noobs label; title "Intermediate results from method 2: arithmetic mean of the log-transformed variables"; run; title; * Transform back into original scale (exponentiate the mean of the log-transformed variables) *; data fig2_logMethod; set fig2_logtmp ; geomean=exp(mean); * approximate the standard error of the geometric mean (see Alf and Grossberg paper for details) *; segeomean= exp(mean) * semean; * calculate the confidence interval around the log-transformed variable, accounting for the degrees of freedom for each subgroup *; * note that the CIs calculated by Proc Descript do not account for the reduction in the degrees of freedom for subgroups *; * calculate degrees of freedom for each subgroup as (# of PSUs with records in the subgroup minus # of strata with records in the subgroup) *; df=atlev2-atlev1; ll=mean+tinv(.025 ,df)*semean; ul=mean+tinv(.975 ,df)*semean; * exponentiate to transform the confidence interval into the original scale *; llgeomean=exp(ll); ulgeomean=exp(ul); label ageCat="Age category" geomean = "Geometric mean" segeomean="SE geometric mean" llgeomean="Lower CL" ulgeomean="Upper CL" ; run; * Display results in a table *; proc print data = fig2_logMethod noobs label; var variable ageCat nsum geomean segeomean llgeomean ulgeomean; format variable varlabel. geomean segeomean llgeomean ulgeomean 5.2; title "Mean blood mercury and lead levels in pregnant women aged 18-49 years, by age group: United States 2003-2008"; title2 "Geometric mean from Method 2: calculated as the mean of the log-transformed values, then exponentiated"; footnote "Confidence intervals were calculated for log-transformed mercury and lead levels and exponentiated for presentation."; footnote2 "Consequently, they cannot be directly derived from standard errors shown in this table."; run; footnote; ********************************; ** References **; ** Alf EF, Grossberg JMJP, Psychophysics. The geometric mean: Confidence limits and significance tests. 1979 26(5):419-421. *;