Anda di halaman 1dari 4

Untitled

#Loading dataset
default <- read.csv("CCDefault.csv")

#Removing the ID column


default <- default[,-1]

#Converting categorical variables into factor


default$SEX = as.factor(default$SEX)
default$EDUCATION = as.factor(default$EDUCATION)
default$MARRIAGE = as.factor(default$MARRIAGE)
default$PAY_0 = as.factor(default$PAY_0)
default$PAY_2 = as.factor(default$PAY_2)
default$PAY_3 = as.factor(default$PAY_3)
default$PAY_4 = as.factor(default$PAY_4)
default$PAY_5 = as.factor(default$PAY_5)
default$PAY_6 = as.factor(default$PAY_6)
default$default.payment.next.month =
as.factor(default$default.payment.next.month)

#Partitioning dataset
set.seed(33)
index <- sample(seq_len(nrow(default)), size = 0.7 * nrow(default))
default.training <- default[index,]
default.validation <- default[-index,]

#Creating a naive bayes model


library(e1071)

## Warning: package 'e1071' was built under R version 3.4.2

nb_mod <- naiveBayes(default.payment.next.month ~ ., data = default.training)

#Applying the model on the validation dataset


val_pred.prob <- predict(nb_mod, newdata = default.validation, type = "raw")
#type = class

#Creating ROC tree


library(ROCR)

## Warning: package 'ROCR' was built under R version 3.4.2

## Loading required package: gplots

## Warning: package 'gplots' was built under R version 3.4.2


##
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':


##
## lowess

pred <- prediction(val_pred.prob[,2],


default.validation$default.payment.next.month)
perf <- performance(pred, "tpr", "fpr")
perf1 <- performance(pred, "auc")
perf1 = round(as.numeric(perf1@y.values),3)
plot(perf, main = "ROC Curve (Naive bayes)", colorize = T)

#Another way of creating ROC tree


library(ROSE)

## Warning: package 'ROSE' was built under R version 3.4.3

## Loaded ROSE 0.0-3

roc.curve(default.validation$default.payment.next.month, val_pred.prob[,2])
## Area under the curve (AUC): 0.750

#Area under the curve (AUC): 0.75

#Creating confusion matrix for cutoff probability of 0.6


prediction.v <- ifelse(val_pred.prob[,2]>0.6, 1, 0)
print(table(default.validation$default.payment.next.month, prediction.v))

## prediction.v
## 0 1
## 0 5384 1608
## 1 733 1275

#Creating confusion matrix for cutoff probability of 0.55


prediction.v <- ifelse(val_pred.prob[,2]>0.55, 1, 0)
print(table(default.validation$default.payment.next.month, prediction.v))

## prediction.v
## 0 1
## 0 4996 1996
## 1 655 1353

#Creating confusion matrix for cutoff probability of 0.5


prediction.v <- ifelse(val_pred.prob[,2]>0.5, 1, 0)
print(table(default.validation$default.payment.next.month, prediction.v))

## prediction.v
## 0 1
## 0 4427 2565
## 1 531 1477

Anda mungkin juga menyukai