# Prediction
CART_post_prey <- predict(CART_post_pruned, CART_tst, type = "class")
CART_post_cm <- table(CART_tst$PloanYN, CART_post_prey)
CART_post_cm
Perf_Table[1,] <- perf_eval(CART_post_cm)
Perf_Table
# CART with Pre-Pruning -------------------------------
# For CART
install.packages("matrixStats")
install.packages("party")
library(party)
# For AUROC
install.packages("ROCR")
library(ROCR)
# Divide the dataset into training/validation/test datasets
trn_idx <- 1:1000
val_idx <- 1001:1500
tst_idx <- 1501:2500
CART_trn <- data.frame(Ploan_input[trn_idx,], PloanYN = Ploan_target[trn_idx])
CART_val <- data.frame(Ploan_input[val_idx,], PloanYN = Ploan_target[val_idx])
CART_tst <- data.frame(Ploan_input[tst_idx,], PloanYN = Ploan_target[tst_idx])
# Construct single tree and evaluation
# tree parameter settings
min_criterion = c(0.9, 0.95, 0.99)
min_split = c(10, 30, 50, 100)
max_depth = c(0, 10, 5)
CART_pre_search_result = matrix(0,length(min_criterion)*length(min_split)*length(max_depth),11)
colnames(CART_pre_search_result) <- c("min_criterion", "min_split", "max_depth",
"TPR", "Precision", "TNR", "ACC", "BCR", "F1", "AUROC", "N_leaves")
iter_cnt = 1
for (i in 1:length(min_criterion)){
for ( j in 1:length(min_split)){
for ( k in 1:length(max_depth)){
cat("CART Min criterion:", min_criterion[i], ", Min split:", min_split[j], ", Max depth:", max_depth[k], "\n")
tmp_control = ctree_control(mincriterion = min_criterion[i], minsplit = min_split[j], maxdepth = max_depth[k])
tmp_tree <- ctree(PloanYN ~ ., data = CART_trn, controls = tmp_control)
tmp_tree_val_prediction <- predict(tmp_tree, newdata = CART_val)
tmp_tree_val_response <- treeresponse(tmp_tree, newdata = CART_val)
tmp_tree_val_prob <- 1-unlist(tmp_tree_val_response, use.names=F)[seq(1,nrow(CART_val)*2,2)]
tmp_tree_val_rocr <- prediction(tmp_tree_val_prob, CART_val$PloanYN)
# Confusion matrix for the validation dataset
tmp_tree_val_cm <- table(CART_val$PloanYN, tmp_tree_val_prediction)
# parameters
CART_pre_search_result[iter_cnt,1] = min_criterion[i]
CART_pre_search_result[iter_cnt,2] = min_split[j]
CART_pre_search_result[iter_cnt,3] = max_depth[k]
# Performances from the confusion matrix
CART_pre_search_result[iter_cnt,4:9] = perf_eval(tmp_tree_val_cm)
# AUROC
CART_pre_search_result[iter_cnt,10] = unlist(performance(tmp_tree_val_rocr, "auc")@y.values)
# Number of leaf nodes
CART_pre_search_result[iter_cnt,11] = length(nodes(tmp_tree, unique(where(tmp_tree))))
iter_cnt = iter_cnt + 1
}
}
}
# Find the best set of parameters
CART_pre_search_result <- CART_pre_search_result[order(CART_pre_search_result[,10], decreasing = T),]
CART_pre_search_result
best_criterion <- CART_pre_search_result[1,1]
best_split <- CART_pre_search_result[1,2]
best_depth <- CART_pre_search_result[1,3]
# Construct the best tree
tree_control = ctree_control(mincriterion = best_criterion, minsplit = best_split, maxdepth = best_depth)
# Use the training and validation dataset to train the best tree
CART_trn <- rbind(CART_trn, CART_val)
CART_pre <- ctree(PloanYN ~ ., data = CART_trn, controls = tree_control)
CART_pre_prediction <- predict(CART_pre, newdata = CART_tst)
CART_pre_response <- treeresponse(CART_pre, newdata = CART_tst)
# Performance of the best tree
CART_pre_cm <- table(CART_tst$PloanYN, CART_pre_prediction)
CART_pre_cm
Perf_Table[2,] <- perf_eval(CART_pre_cm)
Perf_Table
# Plot the ROC
CART_pre_prob <- 1-unlist(CART_pre_response, use.names=F)[seq(1,nrow(CART_tst)*2,2)]
CART_pre_rocr <- prediction(CART_pre_prob, CART_tst$PloanYN)
CART_pre_perf <- performance(CART_pre_rocr, "tpr","fpr")
plot(CART_pre_perf, col=5, lwd = 3)
# Plot the best tree
plot(CART_pre)
plot(CART_pre, type="simple")
댓글남기기