##############################################################################################
# reclass function
# 
# Calculates NRI and IDI measures						
#										
# Description:									
# The reclass function calculates the NRI and IDI, which are measures that	
# compare discrimination ability between two logistic regression prediction	 
# models. The function assumes a binary dependent variable and two sets	
# of numerical and/or categorical covariates for the two models. For		
# calculation of NRI the cutoff	probabilities separating risk categories are 
# input paramaters. Output are estimated NRI and IDI with standard errors and 
# p values for test of the null	hypotheses that each measure in the population 
# is zero. Optional output is	the reclassification table for NRI and the c statistics.
# The Design package by Frank Harrell is required to run the function.						
#						
# Usage:
# reclass(f, data, lim, npred, tab)
#
# f         Model formula for logistic regression. Full model including new predictor(s). These 
#           should be placed at the end of the formula. If several new predictors are included 
#           in the model the number of new predictors should be given with argument npred. 
#           Example of model: Y ~ X1 + X2 + X3 + NEWPRED1 + NEWPRED2  (set npred to 2)
#
# data      Dataframe 
#
# lim       Vector containing limits (in the interval 0-1) for clinically relevant risk strata. 
#           Example: c(0.06, 0.20) to indicate strata <6%, 6-20%, >=20%
#
# npred     Number of new predictors in full model. Defaults to 1.
#
# tab       Set to TRUE to include printout of reclassification table and c statistics for
#           old and new model. Defaults to FALSE.
#
# Value:
# A list with the following named elements 
# reclass.table.with.outcome        Reclassification table for subjects who experienced the outcome
# reclass.table.without.outcome     Reclassification table for subjects who did not experience the outcome
# Cindex.old                        C index (area under ROC curve) for original model
# Cindex.new                        C index (area under ROC curve) for model with new predictor(s) added
# NRI                               Net reclassification improvement (NRI)
# SE.NRI                            Standard error for NRI
# pval.NRI                          P value for NRI
# IDI                               Integrated discrimination improvement (IDI)
# SE.IDI                            Standard error for IDI
# pval.IDI                          P value for IDI
#
# Author:
# Rolf Gedeborg, Dept of Surgical Sciences & UCR, Uppsala University
# (rolf.gedeborg@surgsci.uu.se) using R ver 2.9.2       Ver 2011-09-27
#
# Reference: 
# Pencina MJ, D' Agostino RB Sr, D' Agostino RB Jr, Vasan RS.	Evaluating the added predictive ability of a 
# new marker: From area under the ROC curve to reclassification and beyond. 
# Stat Med. 2008 Jan 30 27(2):157-72 discussion 207-12.
#             
##############################################################################################


reclass <- function(f, data, lim, npred = 1, tab = FALSE) {

# Create labels for classes
c.first <- paste("<", lim[1] * 100, "%", sep = "")
c.last <- paste(">=", lim[length(lim)] * 100, "%", sep = "")

if(length(lim) > 1) {
  c.lab <- character(length(lim) - 1)
  for(i in 1:(length(lim) - 1)) c.lab[i] <- paste(lim[i] * 100, "-", (lim[i+1] - 0.01) * 100, "%", sep = "")
  c.lab <- c(c.first, c.lab, c.last)  
  }

if(length(lim) == 1) {
  c.lab <- character(length(lim) - 1)
  for(i in 1:(length(lim) - 1)) c.lab[i] <- paste(lim[i] * 100, "-", (lim[i+1] - 0.01) * 100, "%", sep = "")
  c.lab <- c(c.first, c.last)  
  }
  
# Extract formula without new predictor(s)
f.nopred <- as.formula(paste(all.vars(f)[1], "~", paste(all.vars(f)[2:(length(all.vars(f)) - npred)], collapse ="+")))

require(rms)
# Fit basic model without new predictor(s)
fit.old <- glm(f.nopred, data = data, family = "binomial")
data$pred.old <- predict(fit.old, newdata = data, type = "response")
cind.old <- somers2(data$pred.old, data[, names(data)==all.vars(f)[1]])["C"]

# Fit new model with new predictor(s)
fit.new <- glm(f, data = data, family = "binomial")
data$pred.new <- predict(fit.new, newdata = data, type = "response")
cind.new <- somers2(data$pred.new, data[, names(data)==all.vars(f)[1]])["C"]

# Create new empty matrix
mx1 <- numeric(nrow(data)); mx2 <- numeric(nrow(data)); mx3 <- numeric(nrow(data)); mx4 <- numeric(nrow(data))

# Create matrix with reclassification results
for(i in 1:length(lim)) { 
  mx1 <- cbind(mx1, ifelse(data[, names(data)==all.vars(f)[1]] == 1 & data$pred.new > data$pred.old & rep(lim[i], nrow(data)) > data$pred.old & 
                          rep(lim[i], nrow(data)) <= data$pred.new, 1, 0))
  mx2 <- cbind(mx2, ifelse(data[, names(data)==all.vars(f)[1]] == 1 & data$pred.new < data$pred.old & rep(lim[i], nrow(data)) < data$pred.old & 
                          rep(lim[i], nrow(data)) >= data$pred.new, 1, 0))
  mx3 <- cbind(mx3, ifelse(data[, names(data)==all.vars(f)[1]] == 0 & data$pred.new > data$pred.old & rep(lim[i], nrow(data)) > data$pred.old & 
                          rep(lim[i], nrow(data)) <= data$pred.new, 1, 0))
  mx4 <- cbind(mx4, ifelse(data[, names(data)==all.vars(f)[1]] == 0 & data$pred.new < data$pred.old & rep(lim[i], nrow(data)) < data$pred.old & 
                          rep(lim[i], nrow(data)) >= data$pred.new, 1, 0))                                                                            
}

phat.up.ev <- sum(ifelse(rowSums(mx1) > 0, 1, 0)) / sum(data[, names(data)==all.vars(f)[1]])
phat.down.ev <- sum(ifelse(rowSums(mx2) > 0, 1, 0)) / sum(data[, names(data)==all.vars(f)[1]])
phat.up.nonev <- sum(ifelse(rowSums(mx3) > 0, 1, 0)) / (nrow(data) - sum(data[, names(data)==all.vars(f)[1]]))
phat.down.nonev <- sum(ifelse(rowSums(mx4) > 0, 1, 0)) / (nrow(data) - sum(data[, names(data)==all.vars(f)[1]]))

# Calculate NRI
NRI <- (phat.up.ev - phat.down.ev) - (phat.up.nonev - phat.down.nonev)      

# Test for NRI
se.nri <- sqrt(((phat.up.ev + phat.down.ev) / sum(data[, names(data)==all.vars(f)[1]])) + 
                ((phat.up.nonev + phat.down.nonev) / (nrow(data) - sum(data[, names(data)==all.vars(f)[1]]))))
z.nri <- NRI / se.nri

p.nri <- 2 * (1 - pnorm(abs(z.nri)))

# Create reclassification table...
data$old.class <- cut(data$pred.old, breaks = c(0, lim, 1), right = TRUE, include.lowest = TRUE, labels = c.lab)
data$new.class <- cut(data$pred.new, breaks = c(0, lim, 1), right = TRUE, include.lowest = TRUE, labels = c.lab)

label(data$old.class) <- "Old classification"
label(data$new.class) <- "New classification"

# ...for those with outcome var=1
data.1 <- subset(data, data[, names(data)==all.vars(f)[1]] == 1)
Old_classification <- data.1$old.class
New_classification <- data.1$new.class
t1 <- addmargins(table(Old_classification, New_classification))

# ...for those with outcome var=0
data.2 <- subset(data, data[, names(data)==all.vars(f)[1]] == 0)
Old_classification <- data.2$old.class
New_classification <- data.2$new.class
t2 <- addmargins(table(Old_classification, New_classification))

# Calculate IDI
IDI <- (mean(data$pred.new[data[, names(data)==all.vars(f)[1]] == 1]) - 
        mean(data$pred.old[data[, names(data)==all.vars(f)[1]] == 1])) - 
        (mean(data$pred.new[data[, names(data)==all.vars(f)[1]] == 0]) - 
        mean(data$pred.old[data[, names(data)==all.vars(f)[1]] == 0])); IDI

# Test for IDI
se.idi <- sqrt((sd(data$pred.new[data[, names(data)==all.vars(f)[1]] == 1] - 
        data$pred.old[data[, names(data)==all.vars(f)[1]] == 1])^2 / 
        nrow(data[data[, names(data)==all.vars(f)[1]] == 1, ])) + 
        (sd(data$pred.new[data[, names(data)==all.vars(f)[1]] == 0] - 
        data$pred.old[data[, names(data)==all.vars(f)[1]] == 0])^2 / 
        nrow(data[data[, names(data)==all.vars(f)[1]] == 0, ])))
z.idi <- IDI / se.idi
          
p.idi <- 2 * (1 - pnorm(abs(z.idi))); p.idi

result2 <- list(reclass.table.with.outcome=t1, reclass.table.without.outcome=t2, Cindex.old=cind.old,
              Cindex.new=cind.new, NRI=NRI, SE.NRI=se.nri, pval.NRI=p.nri, IDI=IDI, SE.IDI=se.idi, 
              pval.IDI=p.idi)
result3 <- list(NRI=NRI, SE.NRI=se.nri, pval.NRI=p.nri, IDI=IDI, SE.IDI=se.idi, pval.IDI=p.idi)

if(tab == TRUE) return(result2)  
if(tab == FALSE) return(result3)
}