*****************************************************************************************************************;
* Example SUDAAN code to replicate NCHS Data Brief No. 364, Figures 1 - 2                                      *;
* Hypertension Prevalence Among Adults Aged 18 and Over: United States, 2017-2018                               *;
*                                                                                                               *;
* Ostchega,Y, Fryar,CD, Nwankwo, T, Nguyen, DT. Hypertension Prevalence Among Adults Aged 18 and Over:          *; 
* United States, 2017E018. NCHS Data Brief. No 364. Hyattsville, MD: National Center for Health Statistics.    *;
* 2020.                                                                                                         *; 
*                                                                                                               *;
* Available at: https://www.cdc.gov/nchs/products/databriefs/db364.htm                                          *;
*****************************************************************************************************************;

options nocenter nodate nonumber pagesize=100 linesize=150;
OPTIONS FORMCHAR="|----|+|---+=|-/\<>*";

%put Run in SAS &sysver (maintenance release and release year: &sysvlong4)
 and SUDAAN Release 11.0.1 (SAS-Callable, 32 bit version);

** Macro To Download Data from NHANES website **;

%macro CreateDS(myDS);
  %let i = 1;
  %let DS = %scan(&myDS, &i);

  %do %until(&DS = %nrstr());
    %let Suffix = %lowcase(%substr(&DS, %eval(%length(&DS)-1)));
    %if (&Suffix = _j) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2017-2018/&DS..xpt"; %end;
    %else %if (&Suffix = _i) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2015-2016/&DS..xpt"; %end;
    %else %if (&Suffix = _h) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2013-2014/&DS..xpt"; %end;
    %else %if (&Suffix = _g) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2011-2012/&DS..xpt"; %end;
    %else %if (&Suffix = _f) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2009-2010/&DS..xpt"; %end;
    %else %if (&Suffix = _e) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2007-2008/&DS..xpt"; %end;
    %else %if (&Suffix = _d) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2005-2006/&DS..xpt"; %end;
    %else %if (&Suffix = _c) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2003-2004/&DS..xpt"; %end;
    %else %if (&Suffix = _b) %then %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/2001-2002/&DS..xpt"; %end;
    %else %do; filename &DS url "https://wwwn.cdc.gov/nchs/nhanes/1999-2000/&DS..xpt"; %end;
    libname &DS xport;

    data &DS;
      set &DS..&DS;
    run;

    %let i = %eval(&i+1);
    %let DS = %scan(&myDS, &i);
  %end;
%mend CreateDS;


** HOW TO USE **;
%CreateDS(demo bpq bpx);
%CreateDS(demo_b bpq_b bpx_b);
%CreateDS(demo_c bpq_c bpx_c);
%CreateDS(demo_d bpq_d bpx_d);
%CreateDS(demo_e bpq_e bpx_e);
%CreateDS(demo_f bpq_f bpx_f);
%CreateDS(demo_g bpq_g bpx_g);
%CreateDS(demo_h bpq_h bpx_h);
%CreateDS(demo_i bpq_i bpx_i);
%CreateDS(demo_j bpq_j bpx_j);

data bp9900;
  merge demo
        bpq(keep=seqn bpq020 bpq050a BPQ100D)
        bpx(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp0102;
  merge demo_b
        bpq_b(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_b(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp0304;
  merge demo_c
        bpq_c(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_c(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp0506;
  merge demo_d
        bpq_d(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_d(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp0708;
  merge demo_e
        bpq_e(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_e(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp0910;
  merge demo_f
        bpq_f(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_f(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp1112;
  merge demo_g
        bpq_g(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_g(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp1314;
  merge demo_h
        bpq_h(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_h(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp1516;
  merge demo_i
        bpq_i(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_i(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 
data bp1718;
  merge demo_j
        bpq_j(keep=seqn bpq020 bpq050a BPQ100D)
        bpx_j(keep=seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4);
                 by seqn; run; 


data hyper_9918;
set bp9900 bp0102 bp0304 bp0506 bp0708 bp0910 bp1112 bp1314 bp1516 bp1718;
proc sort; by seqn; run;



Proc format;
 value agefmt    
  1="20-39" 
  2="40-59" 
  3="60+";
  value agecatfmt  
  1="18-39" 
  2="40-59" 
  3="60+"
  0,-2='18 and over';
  value sexfmt    
  1='Men'
  2='Women'
  0,-2='All';
  value FPLfmt   
  1='<=130'
  2='130-<=350'
  3='>350';
  VALUE EDUCfmt     
  1='<HS grad or less'
  2='Some College'
  3='>=College graduate';

  value race_et4fmt 
  1='NH white'
  2='NH black'
  3='Hispanic'
  4='NH Asian'
  5='other';

  value race4fmt 
  1='NH white'
  2='NH black'
  3='Hispanic'
  4='NH Asian'
  5='other';
  value hyper_newfmt   
  1="New HTN" 
  0="no hypertension";
  value hyper_oldfmt   
  1="Old HTN" 
  0="no hypertension";
  value controlfmt   
  1="Controlled" 
  0="not controlled";
  value sfmt      
  1='1999-2000'
  2='2001-2002'
  3='2003-2004'
  4='2005-2006'
  5='2007-2008'
  6='2009-2010'
  7='2011-2012'
  8='2013-2014'
  9='2015-2016' 
  10='2017-2018';
  value awarefmt    
  1="aware" 
  0="unaware";
run;


data hyper9918;
set hyper_9918;

 ** create age group category included**;
	   
        if 18 le ridageyr lt 40 then agecat=1;
   else if 40 le ridageyr lt 60 then agecat=2;
   else if  ridageyr ge 60      then agecat=3;
 

if ridreth1=3 then race_et2=1;
else if ridreth1=4 then race_et2=2;
else if ridreth1=1 then race_et2=3;
else if ridreth1 in (2,5) then race_et2=4;

if ridreth1=3 then race_et3=1;
else if ridreth1=4 then race_et3=2;
else if ridreth1 in (1,2) then race_et3=3;
else if ridreth1=5 then race_et3=4;

if ridreth3=3 then race_et4=1;
else if ridreth3=4 then race_et4=2;
else if ridreth3 in (1,2) then race_et4=3;
else if ridreth3=6 then race_et4=4;
else if ridreth3=7 then race_et4=5;

*income;
if indfmpir >0.00 and indfmpir le 1.30 then FPL=1;
else if indfmpir >1.30 and indfmpir le 3.50 then FPL=2;
else if indfmpir >3.50 then FPL=3;
 
*education;

If ridageyr in (18,19) then do;
if (dmdeduc3 >=0 and dmdeduc3 <15) or dmdeduc3=55 or dmdeduc3=66 then EDUC = 1; /*HS DIPLOMA or LESS*/
else if dmdeduc3=15 then EDUC=2; 			/*Some college*/
end; 

       if dmdeduc2 in(1,2,3) then EDUC = 1; 		/*HS DIPLOMA or LESS*/
  else if dmdeduc2=4 then EDUC=2; 			/*Some college*/
  else if dmdeduc2=5  then EDUC =3; 		/*COLLEGE*/


**Hypertension prevalence;
** Count Number of Nonmissing SBP's & DBP's **;
  n_sbp = n(of bpxsy1-bpxsy4);
  n_dbp = n(of bpxdi1-bpxdi4);
  ** Set DBP Values Of 0 To Missing For Calculating Average **;
  array _DBP bpxdi1-bpxdi4;
  do over _DBP;
    if (_DBP = 0) then _DBP = .;
  end;  
  ** Calculate Mean Systolic and Diastolic **;
  mean_sbp = mean(of bpxsy1-bpxsy4);
  mean_dbp = mean(of bpxdi1-bpxdi4);
  ** Create Hypertensive Category Variable **;
    
** Create Hypertensive Category Variable (code used in previous DB definitions)************************;
  if (mean_sbp >= 130 or mean_dbp >= 80 or bpq050a = 1) then do;
    Hyper_new = 1;
if (mean_sbp >= 130 or mean_dbp >= 80) then Controlled = 0;
      else if (n_sbp > 0 and n_dbp > 0) then Controlled = 1;
end;
  else if (n_sbp > 0 and n_dbp > 0) then
    Hyper_new = 0;

	if Hyper_new = 1 then Hyper_new1 = 100; 
	if Hyper_new = 0 then Hyper_new1 = 0; 

	
if (mean_sbp >= 140 or mean_dbp >= 90 or bpq050a = 1) then do;
    Hyper_old = 1;
if (mean_sbp >= 140 or mean_dbp >= 90) then Controlold = 0;
      else if (n_sbp > 0 and n_dbp > 0) then Controlold = 1;
end;
  else if (n_sbp > 0 and n_dbp > 0) then
    Hyper_old = 0;


***aware**********************************************************************;
if bpq020=1 then aware=1;
else if bpq020=2 then aware=0;


*sex;
sex=RIAGENDR;

*Sub-population of interest;
if ridageyr >=18 and ridexprg ne 1 and (n_sbp ne 0 or n_dbp ne 0) then sela=1; 

format agecat agecatfmt. sex sexfmt. race_et4 race_et4fmt.;

run;

*************************;
*Subset 2017-2018 data*;

data hyper_1718; set hyper9918; if sddsrvyr=10; run;


**************************************************************************************;
*Figures 1 and 2. Unadjusted Prevalence of hypertension, for the age, sex, breakdowns*;

proc surveymeans data=hyper_1718 mean nobs stderr PLOTS=NONE; 
  * specify survey design variables in the strata, cluster, and weight statements *;
strata sdmvstra;
cluster sdmvpsu;
weight wtmec2yr;
class sex agecat;
  * specify your subpopulation(s) of interest in the domain statement *;
domain sela sela*sex sela*agecat sela*sex*agecat;
  * specify your analysis variable(s) in the var statement *;
var hyper_new1;
  * ODS OUTPUT statement writes specified output to an output dataset *;
ods OUTPUT domain(match_all)=unadj;
format agecat AGEcatFMT.;
format sex SexFMT.;
run;

data stats; 
set unadj unadj1 unadj2 unadj3;
if sela=1;
proc sort; by sex agecat ;
proc print;
var  sex agecat n mean stderr;
run;



*Figure 1. Adjusted Prevalence of hypertension, for "ALL";

PROC SURVEYREG data=hyper_1718 nomcar;
STRATA sdmvstra;
CLUSTER sdmvpsu;
CLASS  agecat;
WEIGHT wtmec2yr;
DOMAIN sela;
MODEL   hyper_new1 = agecat /noint solution vadjust=none; 
ESTIMATE 'Total                     ' agecat 0.4203  0.3572  0.2225;
ods OUTPUT  estimates=ageadj_prev1a;
*ods select ParameterEstimates Estimates;
TITLE    'Age-standardized prevalence of persons 18 years and older with high blood pressure: NHANES 2017-2018';
run;


*Figure 1. Adjusted Prevalence of hypertension, for "By sex";

PROC SURVEYREG data=hyper_1718 nomcar;
STRATA  sdmvstra;
CLUSTER sdmvpsu;
CLASS   sex agecat;
WEIGHT  wtmec2yr;
DOMAIN sela;
MODEL   hyper_new1=sex agecat sex*agecat/noint solution vadjust=none; 
ESTIMATE 'Males            ' sex 1 0 agecat 0.4203  0.3572  0.2225 sex*agecat 0.4203  0.3572  0.2225 0 0 0; 
ESTIMATE 'Females          ' sex 0 1 agecat 0.4203  0.3572  0.2225 sex*agecat 0 0 0 0.4203  0.3572  0.2225; 
ods OUTPUT  estimates=ageadj_prev2a;
*ods select ParameterEstimates Estimates;
TITLE 'Age-standardized prevalence of persons 18 years and older with high blood pressure: NHANES 2017-2018';
run;


*Figure 2. Adjusted Prevalence of hypertension, for "ALL, by race";

**Use option order=internal to specify the order of class variables is by unformatted value rather than formatted value *;
* The coding on the ESTIMATE statements assumes NH White is first category variable race_et4 but by default PROC SURVEYREG parameterizes based on formatted value (alphabetical) *; 
* See: https://support.sas.com/documentation/cdl/en/statug/68162/HTML/default/statug_surveyreg_syntax01.htm#statug.surveyreg.orderopt ;
*PROC SURVEYREG data=hyper_1718 nomcar;

PROC SURVEYREG data=hyper_1718 nomcar order=internal;
STRATA sdmvstra;
CLUSTER sdmvpsu;
CLASS race_et4 agecat;
WEIGHT wtmec2yr;
DOMAIN sela;
MODEL  hyper_new1=race_et4 agecat race_et4*agecat /noint solution vadjust=none; 
ESTIMATE 'NH White      '  race_et4 1 0 0 0 agecat 0.4203  0.3572  0.2225 race_et4*agecat 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0;
ESTIMATE 'NH Black      '  race_et4 0 1 0 0 agecat 0.4203  0.3572  0.2225 race_et4*agecat 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0;
ESTIMATE 'Hispanic      '  race_et4 0 0 1 0 agecat 0.4203  0.3572  0.2225 race_et4*agecat 0 0 0 0 0 0 0.4203  0.3572  0.2225 0 0 0;
ods OUTPUT  estimates=ageadj_prev1;
*ods select ParameterEstimates Estimates;
TITLE    'Age-standardized prevalence of persons 18 years and older with high blood pressure: NHANES 2017-2018';
run;

*Figure 2. Adjusted Prevalence of hypertension, for "By race and sex";

PROC SURVEYREG data=hyper_1718 nomcar order=internal;
STRATA sdmvstra;
CLUSTER sdmvpsu;
CLASS sex race_et4 agecat;
WEIGHT wtmec2yr;
DOMAIN sela;
MODEL  hyper_new1=sex race_et4 agecat sex*race_et4*agecat /noint solution vadjust=none; 
ESTIMATE 'NH White Men' sex 1 0  race_et4 1 0 0 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;
ESTIMATE 'NH Black Men' sex 1 0  race_et4 0 1 0 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;
ESTIMATE 'Hispanic men' sex 1 0  race_et4 0 0 1 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0 0 0 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;
ESTIMATE 'NH White Women' sex 0 1 race_et4 1 0 0 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0 0 0 0 ;
ESTIMATE 'NH Black Women' sex 0 1 race_et4 0 1 0 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0 0 0 0 ;
ESTIMATE 'Hispanic Women' sex 0 1 race_et4 0 0 1 0 agecat 0.4203  0.3572  0.2225 sex*race_et4*agecat 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.4203  0.3572  0.2225 0 0 0 0 0 0 ;

ods OUTPUT  estimates=ageadj_prev2;
*ods select ParameterEstimates Estimates;
TITLE 'Age-standardized prevalence of persons 20 years and older with high blood pressure: NHANES 2017-2018';
run;


data all_ageadj;
set ageadj_prev1a ageadj_prev1 ageadj_prev2a ageadj_prev2;

*only want to look at domains where sela=1 *;
*proc print;
proc print data = all_ageadj (where=(domain=:"sela=1"));
var  label estimate stderr ;
title 'Age-standardized prevalence of persons 18 years and older with high blood pressure: NHANES 2017-2018';
* Formatting to same significant digits as shown in data brief *;
format estimate stderr f5.1;
run;


**********************************************************************************************;
* other option for sex and race is to specify race as a separate domain (interacted with sela) *;
PROC SURVEYREG data=hyper_1718 nomcar order=internal;
STRATA sdmvstra;
CLUSTER sdmvpsu;
CLASS sex race_et4 agecat;
WEIGHT wtmec2yr;
DOMAIN sela*race_et4;
MODEL  hyper_new1=sex agecat sex*agecat /noint solution vadjust=none; 
ESTIMATE 'Males            ' sex 1 0 agecat 0.4203  0.3572  0.2225 sex*agecat 0.4203  0.3572  0.2225 0 0 0; 
ESTIMATE 'Females          ' sex 0 1 agecat 0.4203  0.3572  0.2225 sex*agecat 0 0 0 0.4203  0.3572  0.2225; 
ods OUTPUT  estimates=ageadj_prev3;
*ods select ParameterEstimates Estimates;
TITLE 'Age-standardized prevalence of persons 20 years and older with high blood pressure: NHANES 2017-2018';
run;

proc print data = ageadj_prev3 (where=(domain=:"sela=1" and domain not in ("sela=1 race_et4=NH Asian", "sela=1 race_et4=other")));
var domain label estimate stderr;
format estimate stderr f5.1;
run;



*SOURCE: SAS 9.4 Documentation SAS/STAT(R) 9.4 User's Guide

*Note: NOMCAR requests that the procedure treat missing values in the variance computation as not missing completely
at random (NOMCAR) for Taylor series variance estimation. When you specify the NOMCAR option, PROC SURVEYREG computes 
variance estimates by analyzing the nonmissing values as a domain or subpopulation, where the entire population includes
both nonmissing and missing domains. See the section Missing Values for more details. 
By default, PROC SURVEYREG completely excludes an observation from analysis if that observation has a missing value, 
unless you specify the MISSING option. Note that the NOMCAR option has no effect on a classification variable when you 
specify the MISSING option, which treats missing values as a valid nonmissing level. 
The NOMCAR option applies only to Taylor series variance estimation. The replication methods, which you request with the
VARMETHOD=BRR and VARMETHOD=JACKKNIFE options, do not use the NOMCAR option. 

*Note: that when there is a CLASS statement, you need to use the SOLUTION option with the CLPARM option to
obtain the parameter estimates and their confidence limits. 
VADJUST=DF | NONE 
specifies whether to use degrees of freedom adjustment  in the computation of the matrix  for the variance estimation. 
If you do not specify the VADJUST= option, by default, PROC SURVEYREG uses the degrees-of-freedom adjustment that is 
equivalent to the VARADJ=DF option. If you do not want to use this variance adjustment, you can specify the 
VADJUST=NONE option. ;