****************************************************************************************************************;
* Example SUDAAN code to replicate NCHS Data Brief No. 303, Figures 1                                          *;
* Prevalence of Depression Among Adults Aged 20 and Over: United States, 2013–2016                             *;
*                                                                                                              *;
* Brody DJ, Pratt LA, Hughes JP. Prevalence of Depression Among Adults Aged 20 and Over: United                *;
* States, 2013–2016. NCHS Data Brief. No 303. Hyattsville, MD: National Center for Health Statistics. 2018.    *;                *;
*                                                                                                              *;
* Available at: https://www.cdc.gov/nchs/products/databriefs/db303.htm                                         *;
****************************************************************************************************************;

options nocenter nodate nonumber pagesize=100 linesize=150;
OPTIONS FORMCHAR="|----|+|---+=|-/\<>*";

%put Run in SAS &sysver (maintenance release and release year: &sysvlong4)
 and SUDAAN Release 11.0.1 (SAS-Callable, 32 bit version);

** Macro To Download Data from NHANES website **;

%macro CreateDS(myDS);
  %let i = 1;
  %let DS = %scan(&myDS, &i);

  %do %until(&DS = %nrstr());
    %let Suffix = %lowcase(%substr(&DS, %eval(%length(&DS)-1)));
    %if (&Suffix = _j) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2017/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _i) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2015/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _h) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _g) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2011/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _f) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2009/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _e) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2007/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _d) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2005/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _c) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2003/DataFiles/&DS..xpt"; %end;
    %else %if (&Suffix = _b) %then %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2001/DataFiles/&DS..xpt"; %end;
    %else %do; filename &DS url "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/1999/DataFiles/&DS..xpt"; %end;
    libname &DS xport;

    data &DS;
      set &DS..&DS;
    run;

    %let i = %eval(&i+1);
    %let DS = %scan(&myDS, &i);
  %end;
%mend CreateDS;

** HOW TO USE **;
%CreateDS(demo_e demo_f demo_g demo_h demo_i dpq_e dpq_f dpq_g dpq_h dpq_i);

* Read in SAS transport files and append across survey cycles - Demographic files *;
data demo;
  set demo_e(keep=seqn riagendr ridageyr sdmvstra sdmvpsu wtmec2yr sddsrvyr indfmpir)
      demo_f(keep=seqn riagendr ridageyr sdmvstra sdmvpsu wtmec2yr sddsrvyr indfmpir)
      demo_g(keep=seqn riagendr ridageyr sdmvstra sdmvpsu wtmec2yr sddsrvyr indfmpir)
      demo_h(keep=seqn riagendr ridageyr sdmvstra sdmvpsu wtmec2yr sddsrvyr indfmpir)
      demo_i(keep=seqn riagendr ridageyr sdmvstra sdmvpsu wtmec2yr sddsrvyr indfmpir);
run;

* Read in SAS transport files and append across survey cycles - Mental Health - Depression Screener files *;
data dpq;
  set dpq_e
      dpq_f
      dpq_g
      dpq_h
      dpq_i;

  ** Set Refused/Don't Know To Missing (for all variable names starting with "dpq") **;
  array _dpq dpq:;
  do over _dpq;
    if (_dpq >= 7) then call missing(_dpq);
  end;

  ** Create Depression Score (score will be missing if any of the items are missing) **;
  Depression_Score = dpq010+dpq020+dpq030+dpq040+dpq050+dpq060+dpq070+dpq080+dpq090;

  ** Create binary depression indicator as 0/100 variable, to calculate the prevalence of depression **; 
  if (0 <= Depression_Score < 10) then Depression_Indicator = 0;
  else if (Depression_Score >= 10) then Depression_Indicator = 100;

  keep seqn Depression_Score Depression_Indicator;
run;


* Define formats *; 
proc format;
  * format to categorize age *;
  value ageCatF
  low-<20=" "
  20-39="1"
  40-59="2"
  60-high="3"
  ;

 value pirF
  low-<1.0="1"
  1.0-<2.0="2"
  2.0-<4.0="3"
  4.0-high="4"
  ;
   *Labels for categorized variables *;
  value genf
    1='Men' 2='Women';
  value agef
    1='20-39' 2='40-59' 3='60 and over';
  value depf
    0='0-9' 1='10 or more';
  value fplf
    1='Less than 100% FPL' 2='100% to less than 200% FPL' 3='200% to less than 400% FPL' 4='At or above 400% FPL';
 value srvf
    5='2007-2008' 6='2009-2010' 7='2011-2012' 8='2013-2014' 9='2015-2016';

run;

* Merge component files to produce analysis dataset *;
data one;
  merge demo
        dpq;
  by seqn;
  ** Create Selection Variable For Subpopulation Of Interest **;
  if (ridageyr >= 20) then Select = 1;


  ** Categorize age (apply format, then convert to numeric variable) **;
  ageCat=input(put( ridageyr, ageCatF.), best8.); 

   ** Categorize income (apply format, then convert to numeric variable) **;
  pir=input(put( indfmpir, pirf.), best8.); 

  ** Calculate MEC weight for 10-year data *;
  ** use the MEC exam weights, per the analytic notes in the DPQ documentation file **;
  WTMEC4YR = 1/5 * WTMEC2YR;
run;


*************************************************************************************************;
                       * Subset 2013-14 to 2015-16 data*;
*************************************************************************************************;


data one2; 
set one; if sddsrvyr in (5,6,7) then delete; run;


*************************************************************************************************;
* Sort the analysis dataset by the survey design variables before running SUDAAN procedures *;
*************************************************************************************************;

proc sort data=one2;
  by sdmvstra sdmvpsu;
run;

************************************************************;
** CALCULATE PROPORTIONS - Figures 1 - 3                  **;
************************************************************;

proc descript data=one2 design=wr;
  nest sdmvstra sdmvpsu; * specify survey design variables *;
  weight WTMEC4YR; * specify sampling weight *;
  subpopx Select = 1; * analyze the subpopulation of interest *;
  class riagendr ageCat pir/nofreq;
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
  var Depression_Indicator;
  catlevel 100; 
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
  table riagendr*(ageCat pir);
  print nsum percent sepercent/style=nchs nohead notime nodate percentfmt=f8.1 sepercentfmt=f8.1;
  rtitle "Percentage of adults aged 20 and over with depression, by age and sex: United States, 2013–2016";
  rformat riagendr genf.;
  rformat ageCat agef.;
  rformat pir fplf.; 
run;


** Alternative example demonstrating the RECODE statement                                                        **;
** Note that SUDAAN does not allow use of an RFORMAT statement to categorize a continuous variable (i.e. age)    **;
** Instead, use the RECODE statement if you wish to categorize continuous variables within the DESCRIPT procedure *;

proc descript data=one2 design=wr;
  nest sdmvstra sdmvpsu; * specify survey design variables *;
  weight WTMEC4YR; * specify sampling weight *;
  subpopx Select = 1; * subpopulation of interest *;
  * Class statement: specify categorical variable(s)*;
  class riagendr ridageyr indfmpir/nofreq;
  * the recode statement replaces these variables with recoded (categorized) values before other statements are executed *;
  * recoded: ridageyr<20 = 0, 20<=ridageyr<40 = 1, 40<=ridageyr<60 = 2, ridageyr>=60 = 3 *;
  *          Depression_Score<10 = 0,  Depression_Score>=10 = 1 *;
  recode ridageyr = (20 40 60)
         Depression_Score = (10)
         indfmpir = (0 1 2 4);
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
  var Depression_Score;
  catlevel 1; 
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
  table riagendr*(ridageyr indfmpir);
  print nsum percent sepercent/style=nchs nohead notime nodate percentfmt=f8.1 sepercentfmt=f8.1;
  rtitle "Percentage of adults aged 20 and over with depression, by age and sex: United States, 2013–2016";
  rtitle "Alternative method demonstrating the RECODE statement";
  rformat riagendr genf.;
  rformat ridageyr agef.;
  rformat indfmpir fplf.; 
run;

****************************************************************************************************;
** T Tests                                                                                        **;
** Between Men And Women (among age groups 20-39, 40-59, 60 and over, and total aged 20 and over) **; 
** and by family income                                                                           **;
****************************************************************************************************;

proc descript data=one2 design=wr;
  nest sdmvstra sdmvpsu;* specify survey design variables *;
  weight WTMEC4YR;* specify sampling weight *;
  subpopx Select = 1;* subpopulation of interest *;
  * Class statement: specify categorical variable(s)*;
  class riagendr ageCat pir/nofreq;
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
  var Depression_Indicator;
  catlevel 100;
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
  tables ageCat pir;
  * Pairwise statement: Requests all pairwise contrasts*;
  pairwise riagendr;
  setenv labwidth=28;
  print nsum t_pct P_pct /style=nchs nohead notime nodate;
  rtitle "Test for differences between men and women: Percentage of adults aged 20 and over with depression, by age and sex: United States, 2013–2016";
  rformat riagendr genf.;
  rformat ageCat agef.;
  rformat pir fplf.; 
run;

****************************************************************************************************;
** T-Tests                                                                                        **;
** Between Age Groups (among men, women, and total)                                               **;
****************************************************************************************************;

proc descript data=one2 design=wr;
  nest sdmvstra sdmvpsu;* specify survey design variables *;
  weight WTMEC4YR;* specify sampling weight *;
  subpopx Select = 1;* subpopulation of interest *;
  * Class statement: specify categorical variable(s)*;
  class riagendr ageCat/nofreq;
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
  var Depression_Indicator;
  catlevel 100;
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
  table riagendr;
* Pairwise statement: Requests all pairwise contrasts*;
  pairwise ageCat;
  setenv labwidth=34;
  print nsum t_pct P_pct/style=nchs;
  rtitle "Test for differences between age groups: Percentage of adults aged 20 and over with depression, by age and sex: United States, 2013–2016";
  rformat riagendr genf.;
  rformat ageCat agef.;
run;


************************************************************************;
*FIGURE 3*;
** T-Tests and Analysis of trends**;
************************************************************************;


proc descript data=one2 design=wr;
nest sdmvstra sdmvpsu;* specify survey design variables *;
weight WTMEC4YR;* specify sampling weight *;
subpopx ridageyr >= 20 and Depression_Score >= 0 and indfmpir >= 0; * subpopulation of interest *;
 * Class statement: specify categorical variable(s)*;
class riagendr indfmpir/nofreq;
  * the recode statement replaces these variables with recoded (categorized) values before other statements are executed *;
  recode indfmpir = (0 1 2 4)
         Depression_Score = (10);
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
var Depression_Score;
catlevel 1;
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
table riagendr;
* Poly statement: Requests linear trends*;
poly indfmpir=1/name="";
/*Use table and pairwise statements below to obtain T-TESTS for sex*/
/*table indfmpir;
* Pairwise statement: Requests all pairwise contrasts*;
pairwise riagendr;*/
print nsum t_pct p_pct/style=nchs;
rformat riagendr genf.;
rformat indfmpir fplf.; 
run;


***********************************************************;
    *FIGURE 5*;
***********************************************************;
proc sort data=one;
  by sdmvstra sdmvpsu;
run;

**********************************;
*Prevalence by survey cycle*
**********************************;
proc descript data=one design=wr;
  nest sdmvstra sdmvpsu; * specify survey design variables *;
  weight WTMEC4YR; * specify sampling weight *;
subpopx ridageyr >= 20 and Depression_Score >= 0;* subpopulation of interest *;
  * Class statement: specify categorical variable(s)*;
  class riagendr sddsrvyr/nofreq;
    * the recode statement replaces the variable with recoded (categorized) values before other statements are executed *;
  recode Depression_Score = (10);
    * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
  var Depression_Score;
  catlevel 1; 
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
  table riagendr*sddsrvyr;
  print nsum percent sepercent/style=nchs nohead notime nodate percentfmt=f8.1 sepercentfmt=f8.1;
  rtitle "Percentage of adults aged 20 and over with depression, by age and sex: United States, 2013–2016";
  rtitle "Alternative method demonstrating the RECODE statement";
  rformat riagendr genf.;
  rformat ridageyr agef.;
  rformat sddsrvyr srvf.;
run;

*******************************************;
*Analysis of trends and T-Tests*
*******************************************;

proc descript data=one design=wr;
nest sdmvstra sdmvpsu;* specify survey design variables *;
weight wtmec2yr;* specify sampling weight *;
subpopx ridageyr >= 20 and Depression_Score >= 0;* subpopulation of interest *;
 * Class statement: specify categorical variable(s)*;
class riagendr sddsrvyr/nofreq;
  * the recode statement replaces the variable with recoded (categorized) values before other statements are executed *;
recode Depression_Score = (10);
  * var and catlevel statements: specify analysis variable(s), and category level for the analysis*;
var Depression_Score;
catlevel 1;
  * Table(s) statement: specifies stratification for which estimates are requested. If a table statement is not present, 
  a one-dimensional estimates is generated.*;
table riagendr;
* Poly statement: Requests linear and quadratic trends*;
poly sddsrvyr=2/name="";
/*Use table and pairwise statements below to obtain T-TESTS for sex*/
/*table sddsrvyr;
* Pairwise statement: Requests all pairwise contrasts*;
pairwise riagendr;*/
print nsum t_pct p_pct/style=nchs;
rformat riagendr genf.;
rformat sddsrvyr srvf.;
run;