- Setting - isntall libraries
- Preprocess - Parsing & NLP
- Analysis - Random Forest, Deep Learning with 3 layers
김형준
Data Analyst
rm(list=ls())
save_dir <- "/Users/kimhyungjun/Dropbox/h2o/prac/"
options(repos='http://cran.nexr.com')
install_lib <- function(x){
for( i in x ){
# require returns TRUE invisibly if it was able to load package
if( ! require( i , character.only = TRUE ) ){
# If package was not able to be loaded then re-install
install.packages( i , dependencies = TRUE )
# Load package after installing
library( i , character.only = T)
} else {
library( i , character.only = T)
}
}
}
suppressMessages(install_lib(c("readxl",
"dplyr", "stringr",
"tm", "lsa",
"KoNLP",
"h2o")))
save_dir <- "/Users/kimhyungjun/Dropbox/h2o/prac/"
data_list <- list.files(paste(save_dir,"data/wips",sep=""))
data_list_kr <- data_list[grep("kr",data_list)]
data <- data.frame()
for (i in 1:length(data_list_kr))
{
data_temp <- read_excel(paste(save_dir,"data/wips/",data_list_kr[i],sep=""))
data_temp <- cbind(rep(substr(data_list_kr[i],1,1),nrow(data_temp)),data_temp)
colnames(data_temp)[1] <- "기술"
data <- rbind(data,data_temp)
rm(data_temp)
}
dt_kr <- data[,c("기술", "Original IPC Main", "출원일","발명의 명칭", "요약", "국가코드")]
dim(dt_kr)
## [1] 126 6
dt_text <- dt_kr %>% dplyr::select(요약) %>% .[[1]]
DT <- sapply(dt_text, extractNoun, USE.NAMES = F) %>%
sapply(function(x) paste(x, collapse = ' ')) %>%
as.data.frame
tdm <- TermDocumentMatrix(Corpus(DataframeSource(DT)),
control = list(
removeNumbers = TRUE,
wordLengths = c(2,Inf),
removePunctuation = TRUE,
weighting = function(x)
weightSMART(x, spec = "nnn")))
tdm <- as.matrix(tdm)
print(tdm[1:5,1:5])
## Docs
## Terms 1 2 3 4 5
## abs 0 0 0 0 0
## access 0 0 0 0 0
## ad 0 0 0 0 0
## algorithm 0 0 0 0 0
## anti 0 0 0 0 0
tdm <- lw_logtf(tdm) * gw_entropy(tdm)
print(tdm[1:5,1:5])
## Docs
## Terms 1 2 3 4 5
## abs 0 0 0 0 0
## access 0 0 0 0 0
## ad 0 0 0 0 0
## algorithm 0 0 0 0 0
## anti 0 0 0 0 0
actual_y <- dt_kr%>%dplyr::select(기술)%>%.[[1]]
save_data <- data.frame(cbind(t(tdm),actual_y))
write.table(save_data, paste(save_dir,'data/patent_kr.csv', sep=""),
row.names=F, col.names=F)
h2oServer <- h2o.init(nthreads=-1, max_mem_size = "6g")
## Successfully connected to http://127.0.0.1:54321
##
## R is connected to H2O cluster:
## H2O cluster uptime: 2 minutes 17 seconds
## H2O cluster version: 2.8.4.4
## H2O cluster name: H2O_started_from_R
## H2O cluster total nodes: 1
## H2O cluster total memory: 5.33 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 4
## H2O cluster healthy: TRUE
data_hex <- h2o.importFile(h2oServer, path = paste(save_dir,"data/patent_kr.csv", sep=""))
##
|
| | 0%
|
|=================================================================| 100%
random <- h2o.runif(data_hex, seed = 654321)
train.hex <- h2o.assign(data_hex[random <= .8,], "train.hex")
test.hex <- h2o.assign(data_hex[random > .8,], "test.hex")
label_t <- test.hex %>% as.data.frame %>% select(ncol(test.hex)) %>% table
label_t[label_t == max(label_t)] / sum(label_t)
## C
## 0.7188
my.dl <- h2o.deeplearning(x = 1:(ncol(train.hex)-1), y = ncol(train.hex), data=train.hex, validation=test.hex,
variable_importances=T,
activation = "RectifierWithDropout",
input_dropout_ratio = 0.25,
hidden_dropout_ratios = c(0.5,0.5,0.5),
adaptive_rate = T,
balance_classes = T,
train_samples_per_iteration = 1500,
hidden = c(250,250,250),
epochs = 15)
##
|
| | 0%
|
|================= | 26%
|
|================================== | 52%
|
|=================================================== | 78%
|
|=================================================================| 100%
my.dl
## IP Address: 127.0.0.1
## Port : 54321
## Parsed Data Key: train.hex
##
## Deep Learning Model Key: DeepLearning_8498c03635a06f9bdf4dcf87175e4c13
##
## Training classification error: 0.01309
##
## Validation classification error: 0.2188
##
## Confusion matrix:
## Reported on test.hex
## Predicted
## Actual A B C D E G H Error
## A 2 0 1 1 0 0 0 0.50000
## B 0 1 1 0 0 0 0 0.50000
## C 1 0 21 1 0 0 0 0.08696
## D 0 0 0 0 0 0 0 NaN
## E 0 0 0 1 1 0 0 0.50000
## G 0 0 0 0 0 0 0 NaN
## H 0 0 1 0 0 0 0 1.00000
## Totals 3 1 24 3 1 0 0 0.21875
##
## Hit Ratios for Multi-class Classification:
## k hit_ratios
## 1 1 0.7812
## 2 2 0.8438
## 3 3 0.9062
## 4 4 0.9062
## 5 5 0.9375
## 6 6 1.0000
## 7 7 1.0000
##
## Relative Variable Importance:
## C667 C1043 C1079 C538 C586 C403 C1140 C931 C847 C1315 C1206
## 1 1 0.9852 0.9608 0.9602 0.9584 0.9473 0.94 0.9358 0.931 0.9272 0.9247
## C204 C1207 C382 C556 C330 C1294 C182 C448 C846 C1088
## 1 0.9191 0.9181 0.9179 0.9168 0.9165 0.9145 0.9138 0.9133 0.9125 0.9124
## C1144 C1092 C871 C1231 C848 C25 C805 C1154 C141 C1250
## 1 0.9121 0.9118 0.9109 0.9108 0.9099 0.9092 0.9088 0.9087 0.9078 0.9078
## C809 C1024 C1080 C360 C230 C640 C849 C1153 C996 C134 C941
## 1 0.9076 0.9072 0.9065 0.9053 0.9045 0.9041 0.9026 0.9025 0.901 0.901 0.9
## C425 C1109 C924 C518 C472 C842 C126 C430 C527 C764
## 1 0.8979 0.8977 0.8969 0.8964 0.8961 0.896 0.896 0.8957 0.8946 0.8934
## C1195 C881 C1129 C1269 C1271 C646 C821 C158 C192 C42
## 1 0.8932 0.8928 0.8927 0.8926 0.8926 0.8923 0.8923 0.8921 0.8919 0.8912
## C212 C777 C1287 C93 C810 C148 C668 C507 C936 C611
## 1 0.8908 0.8905 0.8899 0.8898 0.8896 0.8895 0.8894 0.8892 0.8892 0.8891
## C455 C550 C1075 C1170 C1102 C415 C8 C217 C469 C378
## 1 0.8889 0.8887 0.8884 0.8883 0.8881 0.8879 0.8876 0.8875 0.8875 0.8872
## C822 C213 C964 C1236 C1015 C207 C1097 C1073 C1309 C247
## 1 0.8871 0.8865 0.8864 0.8858 0.8855 0.8845 0.8831 0.883 0.883 0.8818
## C473 C63 C1291 C600 C1311 C33 C348 C906 C553 C1201
## 1 0.8817 0.8816 0.8815 0.8814 0.8814 0.8814 0.8811 0.8808 0.8807 0.8806
## C812 C738 C385 C965 C341 C845 C1149 C638 C1125 C15
## 1 0.8804 0.8803 0.8788 0.8787 0.8784 0.8783 0.8781 0.8779 0.8776 0.8774
## C1031 C1055 C255 C32 C467 C337 C658 C1326 C1111 C394
## 1 0.8774 0.8773 0.8769 0.8766 0.8763 0.8761 0.8761 0.8758 0.8756 0.8755
## C1046 C174 C276 C552 C1035 C705 C699 C218 C90 C942
## 1 0.8752 0.875 0.8748 0.8748 0.8745 0.8742 0.8741 0.8738 0.8737 0.8734
## C30 C1032 C334 C882 C610 C806 C317 C1146 C120 C1335
## 1 0.8733 0.8733 0.8733 0.873 0.873 0.8725 0.8722 0.8721 0.8714 0.8713
## C1021 C509 C1082 C727 C51 C850 C776 C644 C779 C1148
## 1 0.8708 0.8708 0.8706 0.8705 0.8704 0.8703 0.8703 0.8702 0.87 0.8699
## C766 C636 C653 C82 C60 C1204 C128 C1101 C1321 C311
## 1 0.8697 0.8695 0.8695 0.8694 0.8693 0.8692 0.869 0.869 0.8688 0.8682
## C898 C598 C3 C1316 C44 C238 C113 C346 C969 C558
## 1 0.8675 0.8675 0.8674 0.8674 0.8674 0.8674 0.8673 0.867 0.8664 0.8663
## C26 C1247 C763 C604 C377 C1027 C45 C735 C1118 C799
## 1 0.8661 0.8661 0.8661 0.8657 0.8656 0.8654 0.8654 0.8652 0.8652 0.865
## C892 C1332 C52 C544 C72 C808 C880 C540 C398 C940
## 1 0.8647 0.8644 0.8643 0.8642 0.8638 0.8638 0.8638 0.8638 0.8638 0.8637
## C23 C956 C1023 C266 C368 C1104 C919 C446 C745 C1186
## 1 0.8634 0.8633 0.8631 0.8628 0.8622 0.8618 0.8615 0.8615 0.8614 0.8612
## C1136 C1267 C369 C55 C1319 C12 C106 C156 C823 C1304
## 1 0.8609 0.8608 0.8607 0.8603 0.8602 0.8599 0.8597 0.8596 0.8595 0.8595
## C166 C562 C184 C589 C1083 C295 C768 C679 C27 C388
## 1 0.8593 0.8593 0.8591 0.859 0.8588 0.8588 0.8587 0.8584 0.8584 0.8581
## C664 C1008 C65 C811 C1197 C86 C1281 C288 C371 C1323
## 1 0.8581 0.858 0.8578 0.8577 0.8577 0.8576 0.8575 0.8573 0.8572 0.857
## C273 C397 C167 C216 C949 C379 C1222 C183 C1167 C1061
## 1 0.857 0.8567 0.8567 0.8566 0.8565 0.8565 0.8562 0.8559 0.8557 0.8556
## C410 C961 C979 C1120 C252 C504 C80 C402 C630 C978
## 1 0.8553 0.8553 0.8552 0.8552 0.8552 0.8552 0.8551 0.855 0.8549 0.8548
## C985 C899 C732 C693 C143 C1078 C339 C396 C1134 C1324
## 1 0.8547 0.8547 0.8545 0.8543 0.854 0.8538 0.8538 0.8537 0.8537 0.8536
## C505 C999 C1198 C313 C144 C541 C908 C66 C1239 C1068
## 1 0.8535 0.8533 0.8533 0.8531 0.8529 0.8529 0.8529 0.8528 0.8528 0.8523
## C723 C1050 C1158 C629 C277 C62 C984 C1090 C147 C475
## 1 0.8521 0.852 0.852 0.8519 0.8515 0.8514 0.8513 0.8513 0.8513 0.8512
## C1258 C1318 C740 C904 C772 C305 C493 C980 C179 C1327
## 1 0.8512 0.8512 0.8508 0.8507 0.8504 0.8503 0.8502 0.8502 0.8501 0.8501
## C1217 C214 C523 C831 C412 C1166 C258 C185 C401 C1178
## 1 0.85 0.85 0.8499 0.8497 0.8496 0.8495 0.8495 0.8494 0.8494 0.8493
## C498 C408 C245 C39 C1245 C950 C450 C1151 C1295 C998
## 1 0.8491 0.849 0.8489 0.8488 0.8487 0.8486 0.8486 0.8486 0.8486 0.8485
## C1241 C601 C885 C1246 C429 C585 C1265 C1163 C731 C268
## 1 0.8485 0.8484 0.8483 0.8483 0.8481 0.848 0.8479 0.8479 0.8478 0.8478
## C494 C1240 C1139 C61 C367 C1002 C393 C652 C1257 C537
## 1 0.8475 0.8471 0.8471 0.8471 0.8469 0.8469 0.8468 0.8467 0.8466 0.8466
## C962 C169 C903 C474 C717 C165 C499 C136 C427 C459
## 1 0.8465 0.8464 0.8464 0.8463 0.8463 0.8462 0.8462 0.8461 0.8461 0.846
## C1014 C905 C780 C1182 C1012 C151 C584 C13 C239 C750 C506
## 1 0.846 0.846 0.8459 0.8459 0.8459 0.8458 0.8453 0.8453 0.8452 0.845 0.845
## C730 C642 C1159 C952 C648 C1038 C342 C824 C967 C389
## 1 0.845 0.8449 0.8449 0.8448 0.8448 0.8444 0.8442 0.844 0.844 0.8439
## C643 C438 C700 C684 C757 C1086 C109 C755 C54 C465
## 1 0.8439 0.8438 0.8438 0.8436 0.8436 0.8435 0.8435 0.8434 0.8433 0.8433
## C233 C470 C1336 C1280 C922 C689 C993 C131 C975 C526
## 1 0.8432 0.843 0.8428 0.8428 0.8428 0.8428 0.8427 0.8427 0.8425 0.8425
## C1081 C1328 C1094 C515 C122 C37 C1334 C1016 C284 C762
## 1 0.8424 0.8424 0.8423 0.8421 0.8421 0.8418 0.8416 0.8415 0.8414 0.8412
## C988 C418 C592 C1284 C1177 C483 C1135 C84 C593 C683
## 1 0.8411 0.8411 0.8411 0.841 0.841 0.8409 0.8409 0.8407 0.8407 0.8406
## C1112 C688 C835 C18 C986 C1215 C481 C759 C671 C1013
## 1 0.8406 0.8402 0.8402 0.8401 0.8401 0.8399 0.8399 0.8398 0.8398 0.8397
## C478 C210 C110 C294 C304 C641 C879 C187 C865 C485
## 1 0.8396 0.8393 0.8392 0.839 0.839 0.839 0.8389 0.8389 0.8388 0.8387
## C1261 C69 C937 C1093 C1242 C665 C463 C1072 C1337 C424
## 1 0.8386 0.8386 0.8385 0.8385 0.8383 0.8381 0.838 0.838 0.8379 0.8377
## C564 C1160 C422 C343 C325 C1306 C853 C67 C291 C161
## 1 0.8377 0.8376 0.8375 0.8373 0.8373 0.8372 0.8371 0.8371 0.837 0.8369
## C227 C797 C108 C351 C307 C870 C1105 C1213 C231 C939
## 1 0.8369 0.8367 0.8367 0.8366 0.8366 0.8364 0.8364 0.836 0.8359 0.8359
## C726 C595 C1065 C1041 C1133 C36 C263 C100 C464 C771
## 1 0.8358 0.8357 0.8356 0.8355 0.8355 0.8355 0.8353 0.8353 0.8352 0.8352
## C322 C1164 C894 C888 C573 C264 C171 C938 C87 C609
## 1 0.835 0.835 0.835 0.8349 0.8348 0.8348 0.8346 0.8345 0.8345 0.8344
## C918 C219 C775 C1113 C982 C292 C1020 C674 C460 C670
## 1 0.8344 0.8344 0.8343 0.8343 0.8343 0.8343 0.8343 0.8343 0.8341 0.834
## C387 C7 C800 C1025 C528 C404 C1243 C262 C1262 C114
## 1 0.8339 0.8338 0.8336 0.8336 0.8336 0.8335 0.8335 0.8335 0.8334 0.8333
## C17 C741 C1077 C70 C1131 C123 C1157 C618 C285 C983
## 1 0.8333 0.8333 0.8332 0.8331 0.833 0.833 0.8329 0.8328 0.8328 0.8327
## C911 C614 C798 C1272 C1273 C355 C710 C501 C290 C1259
## 1 0.8327 0.8326 0.8325 0.8324 0.8324 0.8322 0.8321 0.832 0.8319 0.8319
## C1200 C802 C816 C861 C867 C312 C719 C818 C149 C912
## 1 0.8318 0.8318 0.8317 0.8317 0.8317 0.8315 0.8315 0.8314 0.8313 0.8313
## C1189 C571 C1143 C79 C1199 C1037 C1106 C1260 C1279 C1228
## 1 0.8312 0.8312 0.8311 0.831 0.8309 0.8308 0.8308 0.8308 0.8307 0.8306
## C795 C29 C869 C559 C449 C623 C43 C713 C1168 C574
## 1 0.8305 0.8304 0.8304 0.8303 0.8303 0.8301 0.83 0.8295 0.8293 0.8291
## C357 C1029 C452 C241 C1255 C1132 C1344 C634 C1226 C709
## 1 0.829 0.8289 0.8287 0.8287 0.8284 0.8283 0.8283 0.8282 0.828 0.828
## C1333 C172 C1165 C1301 C49 C421 C489 C168 C458 C436
## 1 0.8279 0.8277 0.8277 0.8277 0.8276 0.8276 0.8276 0.8276 0.8276 0.8276
## C1045 C220 C897 C28 C1141 C433 C326 C1053 C1069 C250
## 1 0.8275 0.8274 0.8273 0.8273 0.8272 0.8272 0.8271 0.8271 0.827 0.8269
## C613 C104 C414 C991 C1270 C278 C1216 C1005 C102 C866
## 1 0.8269 0.8268 0.8268 0.8267 0.8265 0.8264 0.8263 0.8263 0.8262 0.8262
## C129 C681 C466 C1218 C606 C1237 C782 C1249 C830 C627 C1329
## 1 0.8262 0.8261 0.8261 0.826 0.826 0.826 0.826 0.8259 0.8259 0.8256 0.8256
## C628 C1172 C758 C1054 C366 C500 C333 C1058 C907 C199
## 1 0.8255 0.8255 0.8253 0.8253 0.8252 0.8251 0.825 0.8249 0.8248 0.8247
## C75 C1256 C1314 C786 C1063 C567 C819 C73 C794 C568
## 1 0.8247 0.8245 0.8245 0.8244 0.8243 0.8241 0.8241 0.8239 0.8238 0.8235
## C790 C350 C607 C1 C316 C1227 C125 C257 C838 C566
## 1 0.8234 0.8233 0.8232 0.8231 0.8231 0.823 0.8228 0.8228 0.8226 0.8225
## C419 C578 C319 C119 C1187 C1017 C64 C1283 C11 C621
## 1 0.8224 0.8223 0.8222 0.8221 0.8221 0.8219 0.8218 0.8217 0.8216 0.8215
## C946 C619 C890 C88 C21 C633 C703 C293 C889 C932
## 1 0.8215 0.8214 0.8214 0.8214 0.8212 0.821 0.821 0.8208 0.8208 0.8208
## C635 C995 C380 C235 C803 C1085 C1191 C524 C242 C909 C246
## 1 0.8204 0.8204 0.8201 0.8201 0.82 0.82 0.82 0.82 0.82 0.82 0.8199
## C576 C281 C280 C1292 C817 C855 C1219 C921 C1006 C895
## 1 0.8197 0.8194 0.8193 0.8192 0.8192 0.8191 0.819 0.819 0.8189 0.8189
## C391 C1192 C1155 C1310 C1212 C359 C14 C232 C514 C1299
## 1 0.8188 0.8188 0.8188 0.8188 0.8188 0.8188 0.8187 0.8187 0.8187 0.8186
## C599 C1107 C733 C487 C742 C195 C1099 C577 C970 C691
## 1 0.8186 0.8186 0.8186 0.8184 0.8182 0.8182 0.8181 0.8181 0.8179 0.8178
## C228 C533 C1000 C117 C1181 C1345 C807 C1322 C1059 C783
## 1 0.8178 0.8178 0.8176 0.8174 0.8174 0.8173 0.8171 0.817 0.8169 0.8168
## C695 C722 C692 C792 C308 C519 C416 C530 C1042 C685
## 1 0.8168 0.8168 0.8167 0.8166 0.8165 0.8164 0.8164 0.8164 0.8158 0.8158
## C877 C596 C549 C787 C1161 C240 C957 C522 C1036 C140
## 1 0.8158 0.8157 0.8157 0.8157 0.8156 0.8156 0.8156 0.8156 0.8155 0.8155
## C215 C1266 C206 C439 C579 C447 C484 C107 C1047 C1124 C191
## 1 0.8154 0.8153 0.8152 0.8151 0.8151 0.815 0.815 0.815 0.815 0.8149 0.8149
## C557 C121 C1288 C354 C1317 C457 C675 C862 C1156 C917
## 1 0.8148 0.8148 0.8148 0.8148 0.8148 0.8147 0.8147 0.8146 0.8145 0.8142
## C352 C920 C269 C891 C237 C543 C208 C887 C1263 C820
## 1 0.8141 0.814 0.814 0.814 0.8139 0.8138 0.8137 0.8137 0.8136 0.8135
## C488 C229 C181 C686 C1096 C620 C5 C1152 C1275 C1234
## 1 0.8135 0.8135 0.8135 0.8134 0.8133 0.8133 0.8132 0.8132 0.8131 0.8131
## C512 C1308 C702 C303 C1343 C407 C953 C118 C279 C314
## 1 0.8131 0.813 0.813 0.813 0.8129 0.8129 0.8129 0.8129 0.8128 0.8126
## C1341 C1251 C1300 C960 C164 C275 C503 C71 C35 C97
## 1 0.8125 0.8125 0.8122 0.812 0.812 0.812 0.8119 0.8119 0.8118 0.8118
## C542 C244 C555 C548 C893 C152 C1026 C1084 C1119 C1070
## 1 0.8118 0.8118 0.8117 0.8117 0.8117 0.8117 0.8116 0.8115 0.8114 0.8113
## C547 C358 C1122 C517 C1220 C612 C154 C531 C46 C994 C948
## 1 0.8112 0.8111 0.811 0.811 0.811 0.811 0.8108 0.8108 0.8108 0.8108 0.8107
## C1087 C24 C1278 C318 C323 C254 C1011 C2 C1307 C1303
## 1 0.8103 0.8103 0.8103 0.8102 0.8101 0.81 0.8098 0.8098 0.8097 0.8097
## C534 C826 C101 C827 C974 C1117 C135 C370 C405 C221
## 1 0.8093 0.8091 0.8089 0.8088 0.8088 0.8084 0.8083 0.8083 0.8082 0.8082
## C1062 C678 C399 C1297 C1044 C56 C160 C428 C1312 C631
## 1 0.8082 0.808 0.808 0.8079 0.8078 0.8076 0.8076 0.8076 0.8075 0.8074
## C860 C697 C1274 C1019 C1313 C597 C1173 C1009 C968 C565
## 1 0.8074 0.8073 0.8072 0.8072 0.8071 0.8071 0.8068 0.8067 0.8066 0.8066
## C1252 C1123 C1244 C1034 C445 C1108 C753 C300 C933 C746
## 1 0.8065 0.8065 0.8063 0.8063 0.8062 0.8061 0.806 0.806 0.8058 0.8056
## C1169 C420 C825 C127 C1253 C754 C669 C462 C205 C851
## 1 0.8056 0.8056 0.8056 0.8055 0.8054 0.8054 0.8053 0.8053 0.805 0.8049
## C714 C381 C657 C68 C739 C1276 C133 C698 C112 C1214
## 1 0.8048 0.8048 0.8048 0.8047 0.8047 0.8047 0.8046 0.8045 0.8045 0.804
## C972 C451 C490 C1039 C98 C1229 C40 C525 C925 C535
## 1 0.8039 0.8039 0.8033 0.8032 0.8031 0.8028 0.8026 0.8026 0.8025 0.8025
## C20 C884 C587 C180 C900 C716 C336 C1202 C309 C329
## 1 0.8024 0.8023 0.802 0.8019 0.8019 0.8018 0.8017 0.8015 0.8015 0.8014
## C839 C222 C990 C1176 C495 C53 C59 C1060 C236 C442
## 1 0.8013 0.8013 0.8012 0.8012 0.8011 0.8008 0.8007 0.8007 0.8007 0.8006
## C453 C626 C1162 C1203 C1071 C923 C1103 C1116 C327 C1056
## 1 0.8003 0.8002 0.8002 0.8001 0.8001 0.8 0.7997 0.7997 0.7996 0.7995
## C929 C840 C1022 C375 C1064 C529 C1067 C624 C193 C274
## 1 0.7994 0.7994 0.7993 0.7992 0.7992 0.7991 0.799 0.7989 0.7986 0.7985
## C561 C682 C781 C253 C718 C432 C74 C752 C1127 C901
## 1 0.7981 0.7981 0.798 0.7978 0.7978 0.7977 0.7976 0.7976 0.7972 0.797
## C989 C814 C349 C344 C1230 C1040 C928 C788 C1233 C590
## 1 0.7969 0.7968 0.7968 0.7966 0.7965 0.796 0.796 0.7959 0.7958 0.7957
## C188 C935 C139 C1126 C536 C858 C1209 C437 C356 C511
## 1 0.7954 0.7953 0.7953 0.7953 0.7951 0.795 0.7949 0.7948 0.7947 0.7946
## C78 C286 C639 C138 C987 C706 C1076 C270 C883 C793
## 1 0.7946 0.7945 0.7944 0.7944 0.7943 0.7941 0.7939 0.7936 0.7933 0.7932
## C386 C163 C201 C476 C373 C202 C1225 C575 C76 C687
## 1 0.7932 0.7928 0.7927 0.7927 0.7924 0.7922 0.7922 0.792 0.7919 0.7918
## C272 C58 C546 C1089 C1248 C784 C486 C289 C332 C77
## 1 0.7917 0.7917 0.7916 0.7914 0.7913 0.7911 0.7911 0.7909 0.7909 0.7906
## C896 C729 C747 C400 C902 C1174 C977 C813 C690 C966
## 1 0.7906 0.7904 0.7902 0.7901 0.79 0.7898 0.7897 0.7894 0.7893 0.7892
## C1298 C1028 C502 C873 C934 C916 C944 C1004 C1254 C411
## 1 0.7891 0.7889 0.7886 0.7884 0.7883 0.7882 0.7882 0.7881 0.7881 0.7875
## C91 C720 C497 C724 C1331 C1211 C83 C1001 C299 C189
## 1 0.7875 0.7873 0.787 0.787 0.7867 0.7864 0.7863 0.7857 0.7856 0.7854
## C1190 C1115 C38 C177 C306 C19 C1100 C243 C837 C545
## 1 0.7854 0.7852 0.7847 0.7844 0.7837 0.7837 0.7836 0.7836 0.7834 0.7833
## C374 C973 C649 C532 C406 C582 C271 C365 C1007 C57
## 1 0.7832 0.7831 0.7823 0.7822 0.7821 0.782 0.782 0.7819 0.7816 0.7816
## C92 C767 C715 C440 C249 C265 C1121 C930 C749 C48
## 1 0.7813 0.7811 0.7806 0.7804 0.7803 0.7803 0.78 0.78 0.7799 0.7798
## C9 C707 C770 C423 C603 C81 C209 C857 C654 C1180
## 1 0.7797 0.7797 0.7794 0.7792 0.7792 0.7791 0.7789 0.7789 0.7787 0.7787
## C1330 C1066 C1285 C1074 C211 C615 C260 C223 C594 C1208
## 1 0.7785 0.7783 0.7782 0.7781 0.7778 0.7778 0.7778 0.7778 0.7777 0.7775
## C743 C1098 C951 C413 C124 C804 C632 C256 C335 C583
## 1 0.7775 0.7773 0.777 0.7768 0.7759 0.7757 0.7757 0.7756 0.7755 0.7755
## C563 C1030 C1346 C1264 C225 C1224 C971 C945 C1193 C103
## 1 0.7751 0.7748 0.7732 0.7732 0.7729 0.7726 0.7723 0.7718 0.7718 0.7715
## C852 C409 C283 C443 C454 C655 C734 C1142 C1188 C132
## 1 0.7714 0.7713 0.7711 0.771 0.7709 0.7709 0.7702 0.7702 0.7701 0.7689
## C551 C434 C637 C251 C876 C1223 C471 C854 C836 C1268
## 1 0.7681 0.7676 0.767 0.766 0.7647 0.7644 0.7628 0.7626 0.7623 0.7619
## C1302 C539 C372 C302 C910 C1171 C864 C390 C320 C959
## 1 0.7618 0.7617 0.7615 0.7615 0.7615 0.7613 0.7604 0.7603 0.7602 0.7601
## C992 C508 C176 C711 C298 C791 C1290 C146 C226 C417
## 1 0.76 0.7594 0.7587 0.7582 0.7582 0.7581 0.7572 0.7568 0.753 0.7511
## C660 C353 C1282 C301 C130 C331 C234
## 1 0.7484 0.7483 0.7479 0.7476 0.731 0.7287 0.7172
my.dl@model[[5]]
## Predicted
## Actual A B C D E G H Error
## A 2 0 1 1 0 0 0 0.50000
## B 0 1 1 0 0 0 0 0.50000
## C 1 0 21 1 0 0 0 0.08696
## D 0 0 0 0 0 0 0 NaN
## E 0 0 0 1 1 0 0 0.50000
## G 0 0 0 0 0 0 0 NaN
## H 0 0 1 0 0 0 0 1.00000
## Totals 3 1 24 3 1 0 0 0.21875
1 - my.dl@model[[7]] ## ACC
## [1] 0.7812
head(h2o.predict(my.dl, test.hex)%>%as.data.frame)
## predict A B C D E G
## 1 A 0.6136039 0.111985 0.02641 0.0132594 0.179549 5.333e-03
## 2 C 0.0127641 0.008337 0.96056 0.0006452 0.016413 4.207e-05
## 3 D 0.0375538 0.005900 0.26127 0.4435623 0.041049 3.228e-04
## 4 A 0.5944300 0.115836 0.05802 0.0270273 0.137409 6.703e-02
## 5 B 0.0001642 0.809124 0.16376 0.0021986 0.024717 2.054e-05
## 6 C 0.0019222 0.126002 0.46913 0.3800802 0.008371 5.710e-03
## H
## 1 4.986e-02
## 2 1.238e-03
## 3 2.103e-01
## 4 2.508e-04
## 5 1.628e-05
## 6 8.789e-03
colnames(save_data)[str_replace_all(names(my.dl@model[[9]]), "C", "")%>%as.numeric%>%.[1:10]]
## [1] "안전" "주차" "지도" "상의" "속도" "목적지"
## [7] "추천" "전동" "인프라에" "확정"
my.rf <- h2o.randomForest(x = 1:(ncol(train.hex)-1), y = ncol(train.hex), data = train.hex, validation = test.hex,
type="fast",
importance=TRUE,
ntree=c(5),
depth=c(5,10))
##
|
| | 0%
|
|=== | 5%
|
|==================================== | 55%
|
|=================================================================| 100%
print(my.rf)
## IP Address: 127.0.0.1
## Port : 54321
## Parsed Data Key: train.hex
##
## Grid Search Model Key: GridSearch_902f65e42cb5cf70e09cf0fcf6348c4
##
## Summary
## model_key ntrees max_depth nbins
## 1 SpeeDRF_b4edb0f482b1f6fa1b0c0395fe504058 5 5 1024
## 2 SpeeDRF_afef12a710a12854797399683cc6435e 5 10 1024
## prediction_error run_time
## 1 0.2812 6433
## 2 0.2812 7481
my.rf@model[[1]]@model$confusion
## Predicted
## Actual A B C D E G H Error
## A 0 0 4 0 0 0 0 1.0000
## B 0 0 2 0 0 0 0 1.0000
## C 0 0 23 0 0 0 0 0.0000
## D 0 0 0 0 0 0 0 NaN
## E 0 0 2 0 0 0 0 1.0000
## G 0 0 0 0 0 0 0 NaN
## H 0 0 1 0 0 0 0 1.0000
## Totals 0 0 32 0 0 0 0 0.2812
print(1 - my.rf@sumtable[[1]]$prediction_error)
## [1] 0.7188
print(1 - my.rf@sumtable[[2]]$prediction_error)
## [1] 0.7188
h2o.shutdown(h2oServer)
## Are you sure you want to shutdown the H2O instance running at http://127.0.0.1:54321 (Y/N)?