****************************************************************************************************************************************;
* Description: Example SUDAAN code 
*             (1) Replicates NCHS publication Series 3, No. 46. Anthropometric Reference Data for Children and Adults: United States, 
*                 2015–2018. 44 pp
*             (2) Produces estimates for 2015-2018
* Input data: 2015-2018 Demographic Variables and Sample Weights (DEMO) 
*             2015-2018 Body Measures (BMX) 
* Available at: https://www.cdc.gov/nchs/data/series/sr_03/sr03-046-508.pdf
****************************************************************************************************************************************;

options nocenter nodate nonumber pagesize=100 linesize=150;
OPTIONS FORMCHAR="|----|+|---+=|-/\<>*";

%put Run in SAS &sysver (maintenance release and release year: &sysvlong4)
 and SUDAAN Release 11.0.1 (SAS-Callable, 32 bit version);

** Macro To Download Data from NHANES website **;
%macro CreateDS(myDS);
  %let i = 1;
  %let DS = %scan(&myDS, &i);

  %do %until(&DS = %nrstr());
    %let Suffix = %lowcase(%substr(&DS, %eval(%length(&DS)-1)));
    %if       (&Suffix = _j) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2017/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _i) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2015/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _h) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _g) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2011/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _f) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2009/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _e) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2007/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _d) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2005/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _c) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2003/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _b) %then %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2001/DataFiles/&DS..xpt"; %end;
    %else %do; filename xptIn url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/1999/DataFiles/&DS..xpt"; %end;
    libname xptIn xport;

    data &DS;
      set xptIn.&DS;
    run;

    libname xptIn clear;
    filename xptIn clear;

    %let i = %eval(&i+1);
    %let DS = %scan(&myDS, &i);
  %end;
%mend CreateDS;


*** Format of Selected Variables ;                                                                              
proc format;                                                                                     
  value sexfmt
    0 = "Total"
    1 = "Male"                         
    2 = "Female"                        
    ;
 
  value age019fmt
    1  = "01:Birth-2 months" 
    2  = "02:3-5 months"
    3  = "03:6-8 months"
    4  = "04:9-11 months" 
    5  = "05:1 year "
    6  = "06:2 years" 
    7  = "07:3 years" 
    8  = "08:4 years" 
    9  = "09:5 years" 
    10 = "10:6 years" 
    11 = "11:7 years" 
    12 = "12:8 years" 
    13 = "13:9 years" 
    14 = "14:10 years" 
    15 = "15:11 years" 
    16 = "16:12 years"
    17 = "17:13 years"
    18 = "18:14 years" 
    19 = "19:15 years" 
    20 = "20:16 years"
    21 = "21:17 years" 
    22 = "22:18 years" 
    23 = "23:19 years" 
    ;

  value age219fmt
    1  = "01:2-5 months"
    2  = "02:6-8 months"
    3  = "03:9-11 months" 
    4  = "04:1 year "
    5  = "05:2 years" 
    6  = "06:3 years" 
    7  = "07:4 years" 
    8  = "08:5 years" 
    9  = "09:6 years" 
    10 = "10:7 years" 
    11 = "11:8 years" 
    12 = "12:9 years" 
    13 = "13:10 years" 
    14 = "14:11 years" 
    15 = "15:12 years"
    16 = "16:13 years"
    17 = "17:14 years" 
    18 = "18:15 years" 
    19 = "19:16 years"
    20 = "20:17 years" 
    21 = "21:18 years" 
    22 = "22:19 years" 
    ;

  value agemos06fmt
    1  = "1:Birth-2 months" 
    2  = "2:3-5     months"
    3  = "3:6       months"
    ;

  value age2060pfmt
    0 = "0:20+   years"
    1 = "1:20-39 years" 
    2 = "2:40-59 years"
    3 = "3:60+   years"
    ;

  value age2080pfmt
    0 = "0:20+   years"
    1 = "1:20-29 years" 
    2 = "2:30-39 years"
    3 = "3:40-49 years"
    4 = "4:50-59 years"
    5 = "5:60-69 years"
    6 = "6:70-79 years"
    7 = "7:80+   years"
    ;

  value racemxfmt
    0="0;Total"
    1="1:Non-Hispanic White"
    2="2:Non-Hispanic Black"
    5="5:Mexican American"
    6="6:Other"
    ;

  value racehoafmt
    0="0:Total"
    1="1:Non-Hispanic White"
    2="2:Non-Hispanic Black"
    3="3:Non-Hispanic Asian"
    4="4:Hispanic"
    6="6:Other"
    ; 

  value racefmt
    1="1:Non-Hispanic White"
    2="2:Non-Hispanic Black"
    3="3:Non-Hispanic Asian"
    4="4:Hispanic"
    5="5:Mexican American"
    ; 

  value dat2yrfmt
    7 = "2011-2012"
    8 = "2013-2014"
    9 = "2015-2016"
    10 = "2017-2018"
    ;

  value dat4yrfmt
        0 = "Total"
    1 = "2011-2014"
    2 = "2015-2018"
    ;

  value pctlfmt
     5 = "5th"
    10 = "10th"
    15 = "15th"
    25 = "25th"
    50 = "50th"
    75 = "75th"
    85 = "85th"
    90 = "90th"
    95 = "95th"
    ;
run;

************************************************************
*** Read in Data Files ***
************************************************************;
%CreateDS(demo_i demo_j bmx_i bmx_j);


*****************************************************
*** Combine datasets from above ***
*** Create characteristics variables for analysis ***
*****************************************************;
*** demo;
data demo1518;
  set demo_i demo_j;
  *keep seqn sddsrvyr riagendr ridexprg ridexagm RIDEXAGM RIDAGEYR WTMEC2YR WTMEC4YR WTMEC2YR WTMEC4YR SDMVSTRA SDMVPSU;
run;


*** BMI;
data bmi1518;
  set bmx_i bmx_j;
run;


*** demo + bmi;
data demobmi_1518;
  merge demo1518 bmi1518;
  by seqn;

  sex = riagendr;

  * BMI calculation;
  ht_m=bmxht/100;
  bmical=round(bmxwt/(ht_m**2),.1);

  *CONVERSION FROM METRIC TO ENGLISH;
  wt_lb=(BMXWT*2.2046);
  ht_inch=(bmxht*0.3937);

  * Age groups 
  *************;
  *** Children;
  * Birth through 11 months: 1=0mos, 2=1mos, 3=2mos, 4=3mos, 5=4mos, 6=5mos, 7=6mos, 8=7mos, 9=8mos, 10=9mos, 11=10mos, 12=11mos;
  array am(12) _temporary_ (0 1 2 3 4 5 6 7 8 9 10 11);
  do i=1 to dim(am);
     if ridexagm = am[i] then agemos = i;
  end;
    drop i;

  * Birth through 23 months: 1=Birth–2 months, 2=3–5 months, 3=6–8 months, 4=9–11 months;
  if 0<= ridexagm <3  then agemos011=1; *Birth–2 months;
  if 3<= ridexagm <6  then agemos011=2; *3-5 months;
  if 6<= ridexagm <9  then agemos011=3; *6-8 months;
  if 9<= ridexagm <12 then agemos011=4; *9-11 months;

  * Children 1-19 years: 1 yr, 2 yrs, 3 yrs,..., 15 yrs, 16 yrs, 17 yrs, 18 yrs, 19 years;
  array agL(19) _temporary_ (12 24 36 48 60 72 84 96  108 120 132 144 156 168 180 192 204 216 228);
  array agU(19) _temporary_ (24 36 48 60 72 84 96 108 120 132 144 156 168 180 192 204 216 228 240);
  do i=1 to dim(agL);
     do j=1 to dim(agU);        
      if agL[i]<= ridexagm <= (agU[j]-1) then age119 = i; *1+ years;
     end;
  end;
    
  *** Combine age groups:
  * Birth - 19 years;
  if 0<= ridexagm <12 then age019 = agemos011;
  else age019 = age119 + 4;

  * 2months - 19 years;
  if 2<= ridexagm <12 then age219 = agemos211;
  else age219 = age119 + 3;


  *** Convert age in months to years;
  agyr = ridexagm/12;   


  *** Adults 20 and over: 
  * Age 20-80 and over: 0=20+ years, 1= 20–29 years, 2=30–39 years, 3=40–49 years, 4=50–59 years, 5=60–69 years, 6=70–79 years, 8=80 years and over;
  if 20<= ridageyr <30 then age2080p = 1;
  if 30<= ridageyr <40 then age2080p = 2;
  if 40<= ridageyr <50 then age2080p = 3;
  if 50<= ridageyr <60 then age2080p = 4;
  if 60<= ridageyr <70 then age2080p = 5;
  if 70<= ridageyr <80 then age2080p = 6;
  if 80<= ridageyr     then age2080p = 7;

  * Age 20-60 and over: 0=20+ years, 1= 20–39 years, 2=40–59 years, 3=50–59 years, 4=60 years and over;
  if 20<= ridageyr <40 then age2060p = 1;
  if 40<= ridageyr <60 then age2060p = 2;
  if 60<= ridageyr     then age2060p = 3;

  
  * Race and Hispanic origin with NH Asian;
  *---------------------------------------*;
  * From 1976 to 2006, the NHANES sample was designed to provide estimates specifically for persons of Mexican origin.;
  * Beginning in 2007, NHANES allows for reporting of both total Hispanics and Mexican Americans;
  * RIDRETH1: This is the race-ethnicity variable that can be linked to the previous NHANES race-ethnicity variable in 1999-2010.;

  * Including Mexican American (MA) - 1999-2018: 1=1999-200, 2=2001-2002, ..., 10=2017-2018;
  * 1=Non-Hispanic White, 2=Non-Hispanic Black, 5=Mexican American, 6=Other;
  if ridreth1=3 then racemx= 1;              
  else if ridreth1=4 then racemx= 2;          
  else if ridreth1=1 then racemx= 5;         
  else if ridreth1 in (2, 5) then racemx= 6; 

  * Including All Hispanic and NH Asian - 2011-2018: 7=2011-12, 8=2013-14, 9=2005-2016, 10=2017-2018;
  * 1=Non-Hispanic White, 2=Non-Hispanic Black, 3=Non-Hispanic Asian, 4=All Hispanic = 1:Mexican American + 2:Other Hispanic, 6=Other;
  if ridreth3 = 3 then racehoa=1;            
  else if ridreth3 = 4 then racehoa=2;       
  else if ridreth3 = 6 then racehoa=3;       
  else if ridreth3 in (1, 2) then racehoa=4; 
    else if ridreth3 = 7 then racehoa=6;     


  * Including All Hispanic and NH Asian - 2011-2018: 7=2011-12, 8=2013-14, 9=2005-2016, 10=2017-2018;
  * 1=Non-Hispanic White, 2=Non-Hispanic Black, 3=Non-Hispanic Asian, 4=All Hispanic = 1:Mexican American + 2:Other Hispanic, 5=Mexican American;

    if ridreth3 = 3 then race=1;            
  else if ridreth3 = 4 then race=2;       
  else if ridreth3 = 6 then race=3;       
  else if ridreth3 in (1, 2) then race=4; 
  else if ridreth3 = 1 then race=5; 

  * Create combined weight;
  ************************;
  * 2015-2018;
  if sddsrvyr in (9, 10) then do;
     wtmec4yr = 1/2 * WTMEC2YR;
     dat2yr = sddsrvyr;
     dat4yr = 2;
  end;

  * Subpopulation;
  ****************;
  if 12*0<= ridexagm <12*20 and ridexprg ne 1 and bmxwt ne . then sel019=1; *Birth-19 years;
  if 12*2<= ridexagm <12*20 and ridexprg ne 1 and (bmxwt ne . and bmical ne .) then sel219=1;* 20 years and over;
  if 2<= ridageyr le 19 and ridexprg ne 1 and bmxwt ne . then sel219=1; *2-19 years;
    if ridageyr ge 20 and ridexprg ne 1 and (bmxwt ne . and bmical ne .) then sel20=1;* 20 years and over;


  label racemx  = "R/E: 1=Non-Hispanic White, 2=Non-Hispanic Black, 5=Mexican American, 6=Other"
          racehoa = "R/E: 1=Non-Hispanic White, 2=Non-Hispanic Black, 3=Non-Hispanic Asian, 4=All Hispanic, 6=Other"
      wt_lb   = "Weight(pounds)"
      ht_inch = "Height(inches)"
      ; 
run;


proc sort data=demobmi_1518; 
by sdmvstra sdmvpsu; 
run;

****************************** ARITHMETIC MEANS*****************************;

***************************************************************;
*Tables 1,2,7,8,13*;
***************************************************************;

proc descript data=demobmi_1518 design=wr atlevel1=1 atlevel2=2 deft2 nomarg;
    * Nest statement: PSUs nested within Strata accounts for the design effects*;
  nest sdmvstra sdmvpsu/strlev=1 psulev=2 missunit;
  * Weight statement: specify appropriate weight, accounts for the unequal probability of sampling and non-response.*;
  weight wtmec4yr;
  * Subpopx statement: specify the subpopulation of interest (the inclusion criteria)*;
  subpopx sel019=1/ name="youth 0-19 years";
  * Class statement: specify categorical variable(s)*;
  class dat4yr riagendr age019 / nofreq;
  * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, 
    a one-dimensional distribution is generated.*;
  tables dat4yr*riagendr*age019 ;
  * Var statement: specify the analysis variable(s)*;
  var BMXWT wt_lb bmxht ht_inch bmical;
  print nsum="SampSz" mean="Mn" semean="SEMn"/ nohead notime style=nchs nsumfmt=F7.0 meanfmt=F7.2 semeanfmt=F7.2; 
  * output statement to output the number of observations (nsum), means, 
      standard error of the means (semean), number of strata (atlev1), 
      and number of PSUs (atlev2)**;
  output nsum mean semean atlev1 atlev2/ FILENAME=youth  FILETYPE=SAS meanfmt=f6.1 semeanfmt=f6.2 REPLACE;
  rtitle "Children Means: Selected Measurement/indeces, by age group and sex: NHANES 2015-2018";
  rformat riagendr sexfmt.;
  rformat age019 age019fmt.;
  rformat dat4yr dat4yrfmt.;
run;

**********************************************************;
*Tables 3,4,5,6,9,10,11,12,14,15*;
**********************************************************;

proc descript data=demobmi_1518 design=wr atlevel1=1 atlevel2=2 deft2 ;
    * Nest statement: PSUs nested within Strata accounts for the design effects*;
  nest sdmvstra sdmvpsu/strlev=1 psulev=2 missunit;
  * Weight statement: specify appropriate weight, accounts for the unequal probability of sampling and non-response.*;
  weight wtmec4yr;
  * Subpopx statement: specify the subpopulation of interest (the inclusion criteria)*;
  subpopx sel20=1/ name="Adults 20+ years";
  * Class statement: specify categorical variable(s)*;
  class dat4yr race riagendr age2080p age2060p / nofreq;
  * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, 
    a one-dimensional distribution is generated.*;
  tables dat4yr*age2080p dat4yr*race*riagendr*age2060p;
  * Var statement: specify the analysis variable(s)*;
  var BMXWT wt_lb bmxht ht_inch  bmical;
  print nsum="SampSz" mean="Mn" semean="SEMn"/ nohead notime style=nchs nsumfmt=F7.0 meanfmt=F7.2 semeanfmt=F7.2; 
  * output statement to output the number of observations (nsum), means, 
      standard error of the means (semean), number of strata (atlev1), 
      and number of PSUs (atlev2)**;
  output nsum mean semean atlev1 atlev2/ FILENAME=Adults  FILETYPE=SAS REPLACE meanfmt=f6.1 semeanfmt=f6.2;
  rtitle "Adult Means: Selected Measurement/indeces, by age group and race: NHANES 2015-2018";
  rformat race racefmt.;
  rformat riagendr sexfmt.;
  rformat age2060p age2060pfmt.;
  rformat age2080p age2080pfmt.;
  rformat dat4yr dat4yrfmt.;
run;


****************************** PERCENTILES ANALYSIS**********************;

***************************************************************;
*Tables 1,2,7,8,13*;
***************************************************************;

proc descript data=demobmi_1518 design=wr;
    * Nest statement: PSUs nested within Strata accounts for the design effects*;
  nest sdmvstra sdmvpsu/strlev=1 psulev=2 missunit;
  * Weight statement: specify appropriate weight, accounts for the unequal probability of sampling and non-response.*;
  weight wtmec4yr;
  * Subpopx statement: specify the subpopulation of interest (the inclusion criteria)*;
  subpopx sel019=1/ name="youth";
  * Class statement: specify categorical variable(s)*;
  class dat4yr riagendr age019 / nofreq;
  * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, 
    a one-dimensional distribution is generated.*;
  tables dat4yr*riagendr*age019;
  * Var statement: specify the analysis variable(s)*;
  var BMXWT wt_lb bmxht ht_inch bmical;
  * Percentile statement: Generates the specified percentiles*;
  percentile 5 10 15 25 50 75 85 90 95;
  print nsum="SampSz" qtile="Quantile"  seqtile="SE" / nohead notime style=nchs nsumfmt=F7.0 qtilefmt=F9.2 seqtilefmt=F7.3; 
    * output statement to output the number of observations (nsum), percentiles, 
    standard error of the percentiles (se)**;
    output nsum qtile seqtile / FILENAME=pctle_Youth FILETYPE=SAS REPLACE;
  rtitle "Children Percentiles: Selected Measurement/indeces, by age group and sex: NHANES 2015-2018";
  rformat riagendr sexfmt.;
  rformat age019 age019fmt.;
  rformat dat4yr dat4yrfmt.;
run;


**********************************************************;
*Tables 3,4,5,6,9,10,11,12,14,15*;
**********************************************************;

proc descript data=demobmi_1518 design=wr;
    * Nest statement: PSUs nested within Strata accounts for the design effects*;
  nest sdmvstra sdmvpsu/strlev=1 psulev=2 missunit;
  * Weight statement: specify appropriate weight, accounts for the unequal probability of sampling and non-response.*;
  weight wtmec4yr;
  * Subpopx statement: specify the subpopulation of interest (the inclusion criteria)*;
  subpopx sel20=1/ name="Adults";
  * Class statement: specify categorical variable(s)*;
  class dat4yr riagendr race age2060p age2080p/ nofreq;
  * Table(s) statement: specifies cross-tabulations for which estimates are requested. If a table statement is not present, 
    a one-dimensional distribution is generated.*;
  tables dat4yr*race*riagendr*age2060p dat4yr*riagendr*age2080p;
  * Var statement: specify the analysis variable(s)*;
  var BMXWT wt_lb bmxht ht_inch bmical;
  * Percentile statement: Generates the specified percentiles*;
  percentile 5 10 15 25 50 75 85 90 95;
  print nsum="SampSz" qtile="Quantile"  seqtile="SE" / nohead notime style=nchs nsumfmt=F7.0 qtilefmt=F9.2 seqtilefmt=F7.3; 
  * output statement to output the number of observations (nsum), percentiles, 
    standard error of the percentiles (se)**;
  output nsum qtile seqtile / FILENAME=pctle_Adults FILETYPE=SAS REPLACE;
  rtitle "Adult Percentiles: Selected Measurement/indeces, by age group, sex and race: NHANES 2015-2018";
  rformat race racefmt.;
  rformat riagendr sexfmt.;
  rformat age2060p age2060pfmt.;
  rformat age2080p age2080pfmt.;
  rformat dat4yr dat4yrfmt.;
run;