nnet2sas() version 1 introduced a way to export a nnet() model trained in R to Base SAS through metaprogramming, and now nnet2sas() version 2 introduces support for variable centering and scaling as implemented in caret::train(). See the link for version 1 for more background on nnet2sas().
###
### prepare demonstration data
###
require(earth) # for etitantic data
data(etitanic)
mydata <- etitanic
mydata$survived <- as.factor(ifelse(etitanic$survived==1, 'T', 'F'))
summary(mydata)
###
### train the neural network
###
require(nnet)
require(caret)
require(pROC) # caret requires pROC with the ROC metric
fit <- train(
survived ~ .,
data = mydata,
tuneLength = 2,
method = "nnet",
preProcess = c('center', 'scale'),
metric = "ROC",
trControl = trainControl(summaryFunction = twoClassSummary, classProbs = T)
)
###
### show information about the training and neural network
###
fit
fit$finalModel
###
### metaprogram SAS code
###
# nnet2sas version 2
nnet2sas <- function(fit)
{
if(!inherits(fit, c('train','nnet')))
stop('nnet2sas requires an object returned by train() or nnet()')
if(inherits(fit, 'train'))
{
if(!inherits(fit$finalModel, 'nnet'))
stop('expecting train() was used with method="nnet"')
if (any(!(fit$preProcess$methods %in% c('center','scale'))))
stop('preprocessing support is limited to center and scale')
nn <- fit$finalModel
preprocess_methods <- fit$preProcess$methods
} else {
nn <- fit
fit$preProcess$methods <- numeric(0)
}
sas <- paste("/* neural network size", paste(nn$n, collapse='-'), "*/\n")
sas <- paste(sas, '/* inputs: ', paste(nn$coefnames, collapse=' '), ' */\n', sep='')
sas <- paste(sas, '/* this macro handles extreme values */\n')
sas <- paste(sas, '%macro logistic(z);\n')
sas <- paste(sas, '1/(1+exp(min(max(-(&z),-500),500)))\n')
sas <- paste(sas, '%mend;\n')
# Define the input layer.
# If there are factors, then in SAS you will have to manually change
# something like 'pclass2nd' to 'pclass eq "2nd"'.
# Also, this is the place to apply a range transformation (if any).
for (input in 1:nn$n[1]){
inputvar <- inputvar.org <- nn$coefnames[input]
if ('center' %in% fit$preProcess$method)
{
inputvar <- paste('(',inputvar,'-',fit$preProcess$mean[[inputvar.org]],')')
}
if ('scale' %in% fit$preProcess$method)
{
inputvar <- paste(inputvar,'/',fit$preProcess$std[[inputvar.org]])
}
sas <- paste(sas, 'i', input,' = ', inputvar,';\n',sep='')
}
# notation:
# zji refers to z^j_i where j is the layer and i is the unit
# aji refers to a^j_i is the activation of g(z^j_i) where g is the sigmoid function
# compute the hidden layer from the input layer
for (h in 1:nn$n[2]) {
unit.offset <- (nn$n[1]+1)*(h-1)+1
z2 <- c()
# bias unit (intercept)
z2[1] <- paste('z2',h,' = ',nn$wts[unit.offset],sep='')
# loop through input layer
for (input in 1:nn$n[1]){
z2[input+1] <- paste('(',nn$wts[unit.offset+input],' * i', input, ')', sep='')
}
sas <- paste(sas, paste(z2, collapse='+'),';\n',sep='')
sas <- paste(sas, 'a2',h," = %logistic(z2",h,");\n", sep='')
}
# compute the output layer from the hidden layer
output.offset <- (nn$n[1]+1)*(nn$n[2])
z3<-c()
# bias unit
z3[1] <- paste('z31 = ',nn$wts[output.offset+1],sep='')
# loop through the hidden layer
for (h in 1:nn$n[2]) {
z3[h+1] <- paste('(',nn$wts[output.offset + h + 1],' * a2', h, ')', sep='')
}
sas <- paste(sas, paste(z3, collapse='+'),';\n',sep='')
sas <- paste(sas, "o = %logistic(z31);\n", sep='')
# clean up temporary SAS variables
sas <- paste(sas,
paste('drop ',paste('i', 1:nn$n[1], collapse=' ', sep=''), ' ',
paste('z2', 1:nn$n[2], collapse=' ', sep=''),' ',
paste('a2', 1:nn$n[2], collapse=' ', sep=''),
' z31 ;\n', sep=''))
return(sas)
}
# This is how to invoke the metaprogramming function with
# the fitted neural network as the input.
sascode <- nnet2sas(fit)
# Print SAS code to the R console. In this case, you need something
# like NotePad++ to replace the \n line break with a real line break.
# Then you can paste it into SAS.
print(sascode)
# Export the predictors and predictions, so we can verify accuracy in Base SAS.
mydata <- cbind(mydata, predict(fit, etitanic, type="prob"))
write.csv(mydata, "etitanic.csv", row.names=F, quote=F)
Hi Andrew, Thanks for this! To get more neurons for the hidden layer would we just change tunelength or modify createGrid in caret package? Thank you, Dan
Yes. I create a grid because tunelenth often gives me sub-optimal results.