**********************************************************************************;
**********************************************************************************;
** Macro NRI IDI {UCR} SAS Documentation                                		**;
**	                                                             				**;
** Calculates NRI and IDI measures  						 					**;
**																				**;
** Description 																	**;
** The macro NRI IDI calculates the NRI and IDI, which are measures that        **;
** compare discrimination ability between two logistic regression prediction	**; 
** models. The macro assumes a binary dependent variable and two sets of 		**;
** numerical and/or categorical covariates for the two models. For calculation  **;
** of NRI the number of risk categories (<= 10) and the cutoff probabilities    **;
** separating risk categories are input paramaters. Output are estimated NRI    **; 
** and IDI with standard errors and p values for test of the null hypotheses    **;
** that each measure in the population is zero. Optionally the reclassification **;
** table for NRI is printed.				                                    **;
**																				**;
** Usage																		**;
**																				**;
** %nriidi(ds = , y = , id = , model1n = , model1c = , model2n = , model2c = ,	**;
** nriskcat = , riskcutoffs = , printtable = )									**;
**																				**;
** Arguments																	**;
** ds   		Input data set													**;
** y   			Binary dependent variable (0 = controls, 1 = cases)				**;
** id			Subject identification variable									**;
** model1n 		Model 1: numerical covariates									**; 
** model1c 		Model 1: categorical covariates									**; 
** model2n 		Model 2: numerical covariates									**; 
** model2c 		Model 2: categorical covariates									**; 
** nriskcat 	Number of risk categories for NRI (< = 10)						**;
** riskcutoffs	Cutoff probabilities (in %) separating risk categories 			**;
**				(number of cutoffs = nriskcat - 1)   							**;
** printtable	Y/N = print/do not print classification table for NRI			**;

** Author    																	**;
** Lars Berglund <lars.berglund@ucr.uu.se> 										**;

** 																				**;
** 																				**;
** Reference: Pencina MJ, D' Agostino RB Sr, D' Agostino RB Jr, Vasan RS.		**;
**            Evaluating the added predictive ability of a new marker: From		**; 
** 			  area under the ROC curve to reclassification and beyond.			**;
**            Stat Med. 2008 Jan 30 27(2):157-72 discussion 207-12.        		**;


** 																				**;
** Example																		**;
** %nriidi(ds = a, y = death, id = patnr, model1n = sbp bmi, model1c = sex, 	**;	
** model2n = sbp bmi newriskfactor, model2c = sex, nriskcat = 4,				**;
** riskcutoffs = 6 15 30, printtable = Y)										**;
** 																				**;
** 																				**;
**********************************************************************************;
**********************************************************************************;

%macro nriidi(ds,y,id,model1n,model1c,model2n,model2c,nriskcat,riskcutoffs,printtable);

  proc logistic data=&ds descending noprint;
    class &model1c;
    model &y= &model1n &model1c;
    output out=out1 pred=pred1;

  run;

  proc logistic data=&ds descending noprint;
    class &model2c;
    model &y= &model2n &model2c;
    output out=out2 pred=pred2;

  run;

  proc sort data=out1;
    by &id;

  run;

  proc sort data=out2;
    by &id;

  run;

  data out;
    merge out1 out2;
      by &id;
    predcat1=.;
    predcat2=.;
	predcat1c='                            ';
	predcat2c='                            ';
	label predcat1c='Established risk factors';
	label predcat2c='Established risk factors + new risk factors';
	label &y='Cases(1)/Controls(0)=';
    riskcoh=.;
    riskcol=.;
	riskcohc='       ';
    riskcolc='       ';
	ic=' ';
	colon=':';
	to='-';
	one='1';
	lt='<';
	gt='>=';
    riskco=symget('riskcutoffs');
    do i= 1 to &nriskcat;
      j=i-1;
      if i=1 then do;
	    riskcoh=scan(riskco,i)/100;
		riskcohc=riskcoh;
		if pred1 ne . and pred1 lt riskcoh then call cats(predcat1c,one,colon,lt,riskcohc);
        if pred1 ne . and pred1 lt riskcoh then predcat1=1;
		if pred2 ne . and pred2 lt riskcoh then call cats(predcat2c,one,colon,lt,riskcohc);
	    if pred2 ne . and pred2 lt riskcoh then predcat2=1;
	  end;
      if i > 1 and i < &nriskcat then do;
        riskcoh=scan(riskco,i)/100;
		riskcohc=riskcoh;
	    riskcol=scan(riskco,j)/100;
		riskcolc=riskcol;
        ic=i;
        if pred1 ne . and pred1 ge riskcol  and pred1 lt riskcoh  then call cats(predcat1c,ic,colon,riskcolc,to,riskcohc);
 		if pred1 ne . and pred1 ge riskcol  and pred1 lt riskcoh  then predcat1=i;
		if pred2 ne . and pred2 ge riskcol  and pred2 lt riskcoh  then call cats(predcat2c,ic,colon,riskcolc,to,riskcohc);
        if pred2 ne . and pred2 ge riskcol  and pred2 lt riskcoh  then predcat2=i;
	  end;
      if i = &nriskcat then do;
        riskcol=scan(riskco,j)/100;
		ic=i;
        if pred1 ne . and pred1 ge riskcol  then predcat1c=ic || colon || gt || left(riskcolc);
		if pred1 ne . and pred1 ge riskcol  then predcat1=i;
		if pred2 ne . and pred2 ge riskcol  then predcat2c=ic || colon || gt || left(riskcolc);
        if pred2 ne . and pred2 ge riskcol  then predcat2=i;
	  end;
	end;
    diff_pred=pred2-pred1;

  run;

  data temp2;
    do &y=0 to 1;
      do predcat1=1 to &nriskcat;
	    do predcat2=1 to &nriskcat;
	      output;
        end;
      end;
    end;

  run;

  proc sort data=temp2;
    by &y predcat1 predcat2;

  run;

  proc freq data=out noprint;
    tables &y*predcat1*predcat2/nocol nopercent norow out=outfr sparse;

  run;

  %if %upcase(&printtable) = Y %then %do;

    proc sort data=out;
      by descending &y predcat1c predcat2c;

    run;

	proc tabulate data=out format=12.0 order=data;
	  class &y predcat1c predcat2c;
	  table &y, predcat1c ,predcat2c ;

	run;

  %end;

  proc sort data=outfr;
    by &y predcat1 predcat2;

  run;

  data outfr;
    merge outfr temp2;
      by &y predcat1 predcat2;

  run;

  data outfr;
    set outfr;
    drop &y percent; 

  run;

  proc transpose data=outfr out=outfr2;

  run;

  data outfr2;
    array col col1-col200;
    set outfr2;
      if _name_='COUNT';
	nrc=symget('nriskcat');
	istop=2*nrc**2;
    do i=1 to istop;
      if col{i}=. then col{i}=0;
    end;
    n_nonevents=0;
    n_events=0;
    n_up_nonevents=0;
    n_down_nonevents=0;
    n_up_events=0;
    n_down_events=0;
    do i=1 to istop;
      if i le istop/2 then n_nonevents=n_nonevents+col{i};
	  if i gt istop/2 then n_events=n_events+col{i};
    end;
    counter=0;
    do i=1 to nrc;
      do j= 1 to nrc;
	    counter=counter+1;
        if j gt i then n_up_nonevents= n_up_nonevents+col{counter};
	    if j lt i then n_down_nonevents= n_down_nonevents+col{counter};
      end;
    end;
    counter=0;
    do i=1 to nrc;
      do j= 1 to nrc;
	    counter=counter+1;
        if j gt i then n_up_events= n_up_events+col{istop/2+counter};
	    if j lt i then n_down_events= n_down_events+col{istop/2+counter};
      end;
    end;
    p_up_nonevents=  n_up_nonevents/n_nonevents;
    p_down_nonevents=  n_down_nonevents/n_nonevents;
    p_up_events=  n_up_events/n_events;
    p_down_events=  n_down_events/n_events;
    nri = ( p_up_events - p_down_events ) - ( p_up_nonevents - p_down_nonevents ) ;
    se_nri = ( ( p_up_events + p_down_events ) / n_events + ( p_up_nonevents + p_down_nonevents ) / n_nonevents ) **.5 ;
    z_nri = nri / se_nri ;
    p_nri= 2 * ( 1 - probnorm ( abs ( z_nri ) ) ) ;
    i=1;

  run;
 
  proc means data=out noprint;
    class &y;
    var pred1 pred2 diff_pred;
    output out=idi n=n n2 n3 mean=m_pred1 m_pred2 m_diff_pred stderr=a b se_diff_pred;

  run;

  data idi;
    set idi;
    if &y ne .;

  run;

  proc sort data=idi;
    by &y;

  run;

  data idi2;
    do i=1 to 2;
      set idi;
	  if &y=0 then do;
	    m_pred1_nonevents=m_pred1;
	    m_pred2_nonevents=m_pred2;
	    se_diff_nonevents=se_diff_pred;
	    n0=n;
	  end;
	  if &y=1 then do;
	    m_pred1_events=m_pred1;
	    m_pred2_events=m_pred2;
	    se_diff_events=se_diff_pred;
	    n1=n;
	  end;
    end;

  run;

  data idi2;
    set idi2;
    idi=(m_pred2_events-m_pred1_events)-(m_pred2_nonevents-m_pred1_nonevents);
    se_idi= ( se_diff_nonevents**2+se_diff_events**2) ** .5;
    z_idi=idi/se_idi;
    p_idi= 2 * ( 1 - probnorm ( abs ( z_idi) ) ) ;
    i=1;

  run;

  data nriidi;
    merge outfr2(keep=i nri se_nri p_nri) idi2(keep=n0 n1 i idi se_idi p_idi);
      by i;
    drop i;
    n=n0+n1;
    label n='n';

  run;

  proc print data=nriidi noobs label double;
    where nri ne .;
    var  n nri   se_nri    p_nri        idi  se_idi         p_idi;
  
  run;

%mend;

run;