************************************************************************************************ %HDI to compute Health Disparity (HD) Indices PURPOSE: The %HDI macro calculates HD Indices such as Slope Inequality of Index (SII), Relative Index of Inequality (RII) -- including both RII(mean) and RII(ratio)--, Health Concentration Index (HCI), and their 95% CIs using Taylor Series Linearization and t-statistics. USAGE: the %HDI macro has the following parameters REQUIRED response = one binary response/outcome/dependent health variable factor = one ordinal predictor/explanatory/independent socioeconomic position (SEP) variable OPTIONAL inDS = input SAS dataset (default=_last_) strata = strata variable (optional) cluster = cluster variable, and it has to be numeric (optional) weight = weight variable if there is one, blank otherwise (optional) reorderX = specifies if the factor needs to be reordered from the lowest to highest SEP 1=Yes (if the factor's lowest value corresponds to highest SEP) 0=No (if the factor's lowest value corresponds to lowest SEP) (default=0, no) outDS = output SAS dataset to store all the HD indices and their 95% CIs (default=outHDI) Authors: Nancy F. Cheng, Stuart A. Gansky CONTACTS: nancy.cheng@ucsf.edu, stuart.gansky@ucsf.edu Citations: Cheng NF, Han PZ, Gansky SA (2008). Methods and software for estimating health disparities: The case of children's oral health, Am J Epidemiol Doi:10.1093/aje/kwn207. Cheng NF, Gansky SA: Computing Health Disparity (HD) Indices and their 95% confidence intervals - A SAS macro. CAN-DO website. Accessed date [http://www.ucsf.edu/cando/resources.html#software] REFERENCES: Brown MC (1994). "Using Gini-style indices to evaluate the spatial patterns of health practitioners: Theoretical considerations and an application based on Alberta data," Soc Sci Med 38(9):1243-56 Hayes LJ, Berry G (2002). "Sampling variability of the Kunst-Mackenbach relative index of inequality," J Epidemiol Community Health 56(10):762-5. Keppel K, Pamuk E, Lynch J, et al (2005). "Methodological issues in measuring health disparities," Vital Health Stat 2. 141:1-16. Keppel KG, Pearcy JN, Klein RJ (2004). "Measuring progress in healthy people 2010," Healthy People 2010 Stat Notes 25:1-16. Shah BV (1998). "Linearization methods of variance estimation," In: Armitage P and Colton T, eds.-in-chief. Encyclopedia of Biostatistics. New York: Wiley 3:2276-2279. Wolter KM (1985). Introduction to Variance Estimation. New York: Springer-Verlag. ************************************************************************************************; *set graphics options; symbol1 color=blue ci=blue interpol=join value=square height=1; symbol2 v=none i=join line=2 ci=red; *l=2, dash line; axis1 label=(angle=90) minor=none; axis2 minor=none origin=(0); %macro HDI(inDS=_last_, strata=, cluster=, weight=, response=, factor=, reorderX=0, outDS=outHDI); %if &strata eq %str() and &cluster eq %str() %then %do; %*for non-survey data; *generate _id_ to be used in proc crosstab for non-survey data; data wkdata; set &inDS; _id_=_n_; run; proc sort data=wkdata; by _id_; run; proc means data=wkdata order=internal mean SUMWGT sum ; class &factor; %if &weight ne %str() %then %str(weight &weight;); var &response; *save weighted mean (proportion) and sumed weights; output out=mout(drop=_type_ _freq_) mean=mean stderr=stderr sumwgt=sumwgt sum=sum; run; data stat dm; set mout; if _n_ =1 then output stat; else output dm; run; %let strata=_one_; %let cluster=_id_; %end; %else %do; %*for complex survey data; proc sort data=&inDS out=wkdata; by &strata &cluster; run; proc surveymeans data=wkdata order=internal mean SUMWGT sum; class &strata &cluster &factor; strata &strata; cluster &cluster; %if &weight ne %str() %then %str(weight &weight;); domain &factor; var &response; format &cluster 14.0; ods output Statistics=stat domain=dm; %*save weighted mean (proportion) and sumed weights; run; %end; %if &reorderX=1 %then %do; proc sort data=dm; by descending &factor; run; %end; data _null_; *save overall mean and its stderr; set stat; ybar=mean*100; seYbar=stderr*100; call symput('ybar', ybar); call symput('seYbar', seYbar); call symput('totalWgt', sumwgt); *save total weights; call symput('total1Y', sum); *save sum of y=1; run; data dm1; set dm end=last; totalWgt=&totalWgt; *get total weights for the population; pct1Y=mean*100; *percent of response=1 in each category of a factor (mean value of health variable among each group) ; propPop=sumWgt/totalWgt; * proportion of population for each category of a factor ; retain cumPropPop 0.0;* cumPct1YPop 0.0; cumPropPop+propPop; *cumulative proportion of population; lagCumPP=lag(cumPropPop); * lag of cumPropPop; if _n_=1 then lagCumPP=0; cumPropPopMP=lagCumPP + propPop/2; * calculate the cumulative midpoint for each category; *percent of response=1 in each category of a factor over the whole population; total1Y=&total1Y; prop1YPop=sum/total1Y; *sum is the weighted sum of health variable; retain cumProp1YPop 0.0; cumProp1YPop+prop1YPop; %** prepare to compute HCI **; * calculate HCI from concentration curve: HCI = 2 * (net area between the curve and the diagonal line) = 2 * (1/2 - AUC) where AUC is the area under the curve, AUC= 1/2 * sum(X(i+1) - Xi )(Y(i+1) + Yi) = 1 - 2 *AUC = 1- sum(X(i+1) - Xi )(Y(i+1) + Yi) = 1 - sum(deltaX * YplusLagY) = 1- sum(propPop * YplusLagY) = 1 - sum(AxB) = 1 - cumSumAxB where Yi: Cumulative proportion of the health variable in the ith factor category Xi: Cumulative proportion of the population variable in the ith factor category; lagY=lag(cumProp1YPop); if _n_=1 then do; l45=0.0; lagY=0.0; end; else do; l45=1.0; end; YplusLagY=cumProp1YPop + lagY; *=(Y(i+1) + Yi); AxB=propPop *YplusLagY; retain cumSumAxB 0.0; cumSumAxB+AxB; ** end of HCI preparation **; if last then do; hci=1-cumSumAxB; *calculate concentration index C; call symput('hci', hci); call symput('n', _n_); *total # of factor category; end; label Mean ='Proportion of response=1' Sum ='Weighted Sum of Health Variable' SumWgt ='Sum of Weights' propPop='Proportion of Population' prop1YPop='Proportion of Health Variable' cumPropPop='Cumulative Proportion of Population' cumProp1YPop='Cumulative Proportion of Health Variable' hci='Health Concentration Index'; run; *plot percent vs. cumulative proportion of the population for each factor category; proc reg data=dm1; model pct1Y=cumPropPopMP/clb; weight propPop; plot pct1Y*cumPropPopMP/vaxis=(0 to 100 by 20) haxis=(0 to 1 by 0.1); ods output ParameterEstimates=pe; %*save parameter estimates; label pct1Y='Percent' cumPropPopMP='Cumulative proportion of population ranked by SES'; *title "Percentages of population of &response by &factor level"; title ' '; run; quit; data dm2; *store the data for plot; set dm1; keep cumProp1YPop cumPropPop l45; run; data dmplot; *origin data for plots; cumProp1YPop=0; cumPropPop=0; l45=0; run; proc datasets; append base=dmplot data=dm2; run; legend1 label=none value=(tick=1 'Concentration curve' tick=2 'Line of equality') position=(bottom center outside); *plot concentration curve; proc gplot data=dmplot; plot cumProp1YPop*cumPropPop=1 l45*l45=2 /overlay vaxis=axis1 hminor = 0 vminor = 0 legend=legend1; title 'Concentration Curve'; label cumPropPop='Cumulative proportion of population ranked by SES' cumProp1YPop="Cumulative proportion in health variable"; run; quit; title ' '; data _null_; set pe; %*save intercept and slope to macro variables; if variable='Intercept' then do; call symput('alpha', estimate); call symput('seAlpha', stderr); end; else do; call symput('beta', estimate); call symput('seBeta', stderr); call symput('lclBeta', lowerCL); call symput('uclBeta', upperCL); end; run; *calculate std err of HCI using Taylor series linearization; %calSeHCI(inDS=wkdata, strata=&strata, cluster=&cluster, weight=&weight, response=&response, factor=&factor, n=&n, seOut=seout); data _null_; set seout; call symput('seCI', se); run; data &outDS; alpha=α beta=β seBeta=&seBeta; ybar=&ybar; %*note ybar=totalPct1YPop holds too; seYbar=&seYbar; xbar=0.5; c=tinv(0.975, &n-2); *n is the # of data points and p=2 is the # of regression parameters including intercept, df=n-p; SII=beta; seSII=seBeta; lclSII=&lclBeta; uclSII=&uclBeta; RIImean=SII/ybar; %*use Taylor series linealiztion to appoximate std err; *estimate std Err of RII mean with ignoring the variability in ybar; seRIIm=seSII/ybar; lclRIIm=RIImean - c*seRIIm; uclRIIm=RIImean + c*seRIIm; RIIratio=alpha/(alpha+beta); *1.estimate std Err of RII ratio with the variability in ybar included; seRIIr1= sqrt( (beta**2 * seYbar**2 + ybar**2 * seBeta**2)/(ybar + beta*(1-xbar))**4); lclRIIr1= RIIratio - c*seRIIr1; uclRIIr1= RIIratio + c*seRIIr1; *2. use the natural logarithm of RII ratio, this one is reported, method 1 and 3 can be used for comparison; varLogRIIr2=(beta**2 * seYbar**2 + ybar**2 * seBeta**2)/ ( (ybar-beta*xbar)**2 * (ybar + beta *(1-xbar))**2 ); seLogRIIr2 = sqrt(varLogRIIr2); lclRIIr2= exp(log(RIIratio) - c*seLogRIIr2); uclRIIr2= exp(log(RIIratio) + c*seLogRIIr2); *3. ignore variability of ybar in above 2; varLogRIIr3=(ybar**2 * seBeta**2)/ ( (ybar-beta*xbar)**2 * (ybar + beta *(1-xbar))**2 ); seLogRIIr3 = sqrt(varLogRIIr3); lclRIIr3= exp(log(RIIratio) - c*seLogRIIr3); uclRIIr3= exp(log(RIIratio) + c*seLogRIIr3); lclRIIr=lclRIIr2; uclRIIr=uclRIIr2; HCI = &hci; seHCI=&seCI; lclHCI= HCI - c*seHCI; uclHCI= HCI + c*seHCI; drop varLogRIIr2 varLogRIIr3 ; run; proc print data=&outDS; var SII lclSII uclSII RIImean lclRIIm uclRIIm RIIratio lclRIIr uclRIIr HCI lclHCI uclHCI; title 'HD indices and their 95% CIs'; run; %mend HDI; ************************************************************************************* %calSeHCI to calculate variance of HCI using first order Taylor series linearization (TSL) and compute se =sqrt(var). Parameters: inDS = input dataset strata = strata variable, cluster = cluster variable weight = weight variable response = response variable factor = factor variable n = the # of categories of a factor, n>=2 seOut = output dataset to std. err. of HCI Note: the %calSeHCI is called by %HDI ************************************************************************************; %macro calSeHCI(inDS=, strata=, cluster=, weight=, response=, factor=, n=, seOut=); proc crosstab data=&inDS; %*for non-survey data or clutered survey data; %if &strata eq %str() %then %str(nest _one_ &cluster/missunit;); %*for stratified survey data; %else %if &cluster eq %str() %then %str(nest &strata/missunit;); %else %str(nest &strata &cluster/missunit;); class &factor &response; %*if weight is blank, then use 1 for each obs; %if &weight eq %str() %then %str(weight _one_;); %else %str(weight &weight;); table &factor*&response; print COLPER SECOL COVCOL/secolfmt=f8.4 covcolfmt=f8.4; output COVCOL / FILENAME=out REPLACE covcolfmt=f8.4; run; data vc pct; set out01; *keep b004 b006 b007 b009 b010 b012 b013 b015; %keepSt(n=&n); if idnum=1 then output pct; %*save percent info for both factor and health variable; else if idnum=2 then do; if rownum<4 or mod(rownum-5, 3)=0 then delete; %*delete obs with health variable=0; output vc; *save variance-covariance matrix; *[(v(f1) cov(f1g1) ... cov(f1gn) cov(g1f1) v(g1) cov(g1gn) ................................ cov(gnf1) cov(gng1) ... v(gn) ]; end; run; *get proportions for both factor and health variable, and corresponding constants; data fgab; set pct; *array tmp1 b004 b007 b010 b013; *array tmp2 b006 b009 b012 b015; %arraySt(n=&n, arvar1=tmp1, arvar2=tmp2, pctXv=1); %arraySt(n=&n, arvar1=tmp1, arvar2=tmp2, pctXv=0); array f f1-f&n ; array g g1-g&n ; array a a1-a&n ; array b b1-b&n ; *get constant values for var of hci computation; do i=1 to &n; f(i)=tmp1(i)/100; * proportions for factor; g(i)=tmp2(i)/100; * proportions for health variable; sumg= -g(i); do j=1 to i-1; sumg + (-2)*g(j); end; a(i)=sumg; sumf= -f(i)-2; do k=1 to i; sumf + 2*f(k); end; b(i)= sumf; end; keep f1-f&n g1-g&n a1-a&n b1-b&n; run; data ab; %* generate constant matrix [(a1*a1 a1*b1 ... a1*bn),..., (an*a1 an*b1 ... an*bn)]; set fgab; *generate array ab a1 b1 a2 b2 ... an bn ; %arrayAB(n=&n); %let m=%eval(&n *2); array abi abi1-abi&m ; do i=1 to &m; do j=1 to &m; abi(j)=ab(i)*ab(j); end; output; end; drop f1-f&n g1-g&n i j a1-a&n b1-b&n; run; proc iml; use vc; read all into vcov ; %*create variance-covariance matrix from the dataset VC; *note that the scale vcov is percent, not proportion; use ab; read all into ab ; %*create constant matrix from the dataset ab; *print ab; *variance = sum of diagonal elements of the matrix; variance=sum(diag(ab*vcov)); *print variance; create vout var{variance}; append; quit; data &seOut; set vout; *change to proportion, and get the std err of HCI ; se=sqrt(variance)/100; run; %mend calSeHCI; %* %keepSt to generate keep statement according to the number of factor category to * extract variables from SUDAAN crosstab output Parameter: n= the # of category of a factor, n>=2 Note: the %keepSt is called by %calSeHCI; %macro keepSt(n=); %if &n=2 %then %do; %let kpst=%str(keep b04 b06 b07 b09)%str(;); &kpst; %end; %else %do; %let kpst=%str(keep b004 b006 b007 b009); %do i=3 %to &n; %let j=%eval(&i *3+1); %let k=%eval(&i *3+3); %let kpst=%str(&kpst)%str( )%str(b0&j)%str( )%str(b0&k); %if &i=&n %then %let kpst=%str(&kpst)%str(;); %end; &kpst; %end; %mend; * %arraySt to generate an array statement according to the number of factor category Parameters: n = # of category of a factor, n>=2 arvar1 = array name1 arvar2 = array name2 pctXv = indicator, 1 for percent factor variable and 0 for percent adverse health variable Note: the %arraySt is called by %calSeHCI; %macro arraySt(n=, arvar1=, arvar2=, pctXv=); %let arSt1=%str(array &arvar1 b04 b07); %let arSt2=%str(array &arvar2 b06 b09); %if &n>2 %then %do; %let arSt1=%str(array &arvar1 b004 b007); %let arSt2=%str(array &arvar2 b006 b009); %do i=3 %to &n; %let j=%eval(&i *3+1); %let arSt1=%str(&arSt1)%str( )%str(b0&j); %let k=%eval(&i *3+3); %let arSt2=%str(&arSt2)%str( )%str(b0&k); %if &i=&n %then %do; %let arSt1=%str(&arSt1)%str(;); %let arSt2=%str(&arSt2)%str(;); %end; %end; %end; %if &pctXv=1 %then &arSt1; %else &arSt2; %mend; %* %arrayAB to generate an array statement according to the number of factor category as follows: array ab a1 b1 a2 b2 ....an bn Parameter: n = # of category of a factor, n>=2 Note: the %arrayAB is called by %calSeHCI; %macro arrayAB(n=); %let arSt=%str(array ab); %do i=1 %to &n; %let arSt=%str(&arSt)%str( a&i b&i); %end; %let arSt=%str(&arSt)%str(;); &arSt; %mend; *test; libname ca 'S:\COHNAC06\Dataset'; %include 'S:\COHNAC06\code\formats.sas'; *for non suvey data; %HDI(inDS=ca.cohnac06All, weight=weight, response=untrt_any, factor=mealcut, reorderX=1, outDS=hdi); %HDI(inDS=ca.cohnac06All, response=untrt_any, factor=mealcut, reorderX=1, outDS=hdi); *for survey data; %HDI(inDS=ca.cohnac06All, strata=strata, cluster=schoolcoden, weight=weight, response=untrt_any, factor=mealcut, reorderX=1, outDS=hdi); %HDI(inDS=ca.cohnac06All, strata=strata, weight=weight, response=untrt_any, factor=mealcut, reorderX=1, outDS=hdi); %HDI(inDS=ca.cohnac06All, cluster=schoolcoden, weight=weight, response=untrt_any, factor=mealcut, reorderX=1, outDS=hdi);