nnet2sas() supports centering and scaling

nnet2sas() version 1 introduced a way to export a nnet() model trained in R to Base SAS through metaprogramming, and now nnet2sas() version 2 introduces support for variable centering and scaling as implemented in caret::train(). See the link for version 1 for more background on nnet2sas().

###
### prepare demonstration data
###
 
require(earth) # for etitantic data
data(etitanic)
mydata <- etitanic
mydata$survived <- as.factor(ifelse(etitanic$survived==1, 'T', 'F'))
summary(mydata)


 
###
### train the neural network
###
 
require(nnet)
require(caret)
require(pROC) # caret requires pROC with the ROC metric

fit <- train(
    survived ~ ., 
    data = mydata, 
    tuneLength = 2, 
    method = "nnet", 
    preProcess = c('center', 'scale'), 
    metric = "ROC",
    trControl = trainControl(summaryFunction = twoClassSummary, classProbs = T)
    )



###
### show information about the training and neural network
###

fit
fit$finalModel


###
### metaprogram SAS code
###

# nnet2sas version 2 
nnet2sas <- function(fit)
{
    if(!inherits(fit, c('train','nnet')))
        stop('nnet2sas requires an object returned by train() or nnet()')
    if(inherits(fit, 'train'))
    {
        if(!inherits(fit$finalModel, 'nnet'))
            stop('expecting train() was used with method="nnet"')
        if (any(!(fit$preProcess$methods %in% c('center','scale'))))
            stop('preprocessing support is limited to center and scale')
        nn <- fit$finalModel
        preprocess_methods <- fit$preProcess$methods
    } else {
        nn <- fit
        fit$preProcess$methods <- numeric(0)
    }

    sas <- paste("/* neural network size", paste(nn$n, collapse='-'), "*/\n")
    sas <- paste(sas, '/* inputs: ', paste(nn$coefnames, collapse=' '), ' */\n', sep='')
    sas <- paste(sas, '/* this macro handles extreme values */\n')
    sas <- paste(sas, '%macro logistic(z);\n')
    sas <- paste(sas, '1/(1+exp(min(max(-(&z),-500),500)))\n')
    sas <- paste(sas, '%mend;\n')
 
    # Define the input layer.
    # If there are factors, then in SAS you will have to manually change
    # something like 'pclass2nd' to 'pclass eq "2nd"'.
    # Also, this is the place to apply a range transformation (if any).
     
    for (input in 1:nn$n[1]){
        inputvar <- inputvar.org <- nn$coefnames[input]
        if ('center' %in% fit$preProcess$method)
        {
            inputvar <- paste('(',inputvar,'-',fit$preProcess$mean[[inputvar.org]],')')
        }
        if ('scale' %in% fit$preProcess$method)
        {
            inputvar <- paste(inputvar,'/',fit$preProcess$std[[inputvar.org]])
        }
        sas <- paste(sas, 'i', input,' = ', inputvar,';\n',sep='')
    }

    # notation:
    # zji refers to z^j_i where j is the layer and i is the unit
    # aji refers to a^j_i is the activation of g(z^j_i) where g is the sigmoid function
     
    # compute the hidden layer from the input layer
    for (h in 1:nn$n[2]) {
        unit.offset <- (nn$n[1]+1)*(h-1)+1
        z2 <- c()
        # bias unit (intercept)
        z2[1] <- paste('z2',h,' = ',nn$wts[unit.offset],sep='')
        # loop through input layer
        for (input in 1:nn$n[1]){
            z2[input+1] <- paste('(',nn$wts[unit.offset+input],' * i', input, ')', sep='')
        }
        sas <- paste(sas, paste(z2, collapse='+'),';\n',sep='')
        sas <- paste(sas, 'a2',h," = %logistic(z2",h,");\n", sep='')
    }
 
    # compute the output layer from the hidden layer
    output.offset <- (nn$n[1]+1)*(nn$n[2])
    z3<-c()
    # bias unit
    z3[1] <- paste('z31 = ',nn$wts[output.offset+1],sep='')
    # loop through the hidden layer
    for (h in 1:nn$n[2]) {
        z3[h+1] <- paste('(',nn$wts[output.offset + h + 1],' * a2', h, ')', sep='')
    }
     
    sas <- paste(sas, paste(z3, collapse='+'),';\n',sep='')
    sas <- paste(sas, "o = %logistic(z31);\n", sep='')
 
    # clean up temporary SAS variables
    sas <- paste(sas,
        paste('drop ',paste('i', 1:nn$n[1], collapse=' ', sep=''), ' ',
        paste('z2', 1:nn$n[2], collapse=' ', sep=''),' ',
        paste('a2', 1:nn$n[2], collapse=' ', sep=''),
        ' z31 ;\n', sep=''))
 
     
    return(sas)
}
 
# This is how to invoke the metaprogramming function with
# the fitted neural network as the input.
sascode <- nnet2sas(fit)
 
# Print SAS code to the R console.  In this case, you need something
# like NotePad++ to replace the \n line break with a real line break.
# Then you can paste it into SAS.
print(sascode)


# Export the predictors and predictions, so we can verify accuracy in Base SAS.
mydata <- cbind(mydata, predict(fit, etitanic, type="prob"))
write.csv(mydata, "etitanic.csv", row.names=F, quote=F)
About these ads
This entry was posted in Machine learning and data mining, SAS and tagged , , , , . Bookmark the permalink.

2 Responses to nnet2sas() supports centering and scaling

  1. Dan says:

    Hi Andrew, Thanks for this! To get more neurons for the hidden layer would we just change tunelength or modify createGrid in caret package? Thank you, Dan

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s