Text-Based Core Technology Classification using Deep Lerning

- Self Driving Cars -

김형준
Data Analyst

Contents

  1. Setting - isntall libraries
  2. Preprocess - Parsing & NLP
  3. Analysis - Random Forest, Deep Learning with 3 layers
rm(list=ls())
save_dir <- "/Users/kimhyungjun/Dropbox/h2o/prac/"
options(repos='http://cran.nexr.com')
install_lib <- function(x){
  for( i in x ){
    #  require returns TRUE invisibly if it was able to load package
    if( ! require( i , character.only = TRUE ) ){
      #  If package was not able to be loaded then re-install
      install.packages( i , dependencies = TRUE )
      #  Load package after installing
            library( i , character.only = T)
    } else {
            library( i , character.only = T)
    }
  }
}

suppressMessages(install_lib(c("readxl", 
                               "dplyr", "stringr", 
                               "tm", "lsa",
                               "KoNLP", 
                               "h2o")))
save_dir <- "/Users/kimhyungjun/Dropbox/h2o/prac/"
data_list <- list.files(paste(save_dir,"data/wips",sep=""))
data_list_kr <- data_list[grep("kr",data_list)]

data <- data.frame()
for (i in 1:length(data_list_kr))
{
    data_temp <- read_excel(paste(save_dir,"data/wips/",data_list_kr[i],sep=""))
    data_temp <- cbind(rep(substr(data_list_kr[i],1,1),nrow(data_temp)),data_temp)
    colnames(data_temp)[1] <- "기술"
    data <- rbind(data,data_temp)
    rm(data_temp)
}
dt_kr <- data[,c("기술", "Original IPC Main", "출원일","발명의 명칭", "요약", "국가코드")]
dim(dt_kr)
## [1] 126   6
dt_text <- dt_kr %>% dplyr::select(요약) %>% .[[1]]
DT <-  sapply(dt_text, extractNoun, USE.NAMES = F) %>%
       sapply(function(x) paste(x, collapse = ' ')) %>%
       as.data.frame
tdm <- TermDocumentMatrix(Corpus(DataframeSource(DT)),
                                control = list(
                                    removeNumbers = TRUE,
                                    wordLengths = c(2,Inf),
                                    removePunctuation = TRUE,
                                    weighting = function(x)
                                        weightSMART(x, spec = "nnn")))
tdm <- as.matrix(tdm)
print(tdm[1:5,1:5])
##            Docs
## Terms       1 2 3 4 5
##   abs       0 0 0 0 0
##   access    0 0 0 0 0
##   ad        0 0 0 0 0
##   algorithm 0 0 0 0 0
##   anti      0 0 0 0 0
tdm <- lw_logtf(tdm) * gw_entropy(tdm)
print(tdm[1:5,1:5])
##            Docs
## Terms       1 2 3 4 5
##   abs       0 0 0 0 0
##   access    0 0 0 0 0
##   ad        0 0 0 0 0
##   algorithm 0 0 0 0 0
##   anti      0 0 0 0 0
actual_y <- dt_kr%>%dplyr::select(기술)%>%.[[1]]
save_data <- data.frame(cbind(t(tdm),actual_y))
write.table(save_data, paste(save_dir,'data/patent_kr.csv', sep=""),
            row.names=F, col.names=F)
h2oServer <- h2o.init(nthreads=-1, max_mem_size = "6g")
## Successfully connected to http://127.0.0.1:54321 
## 
## R is connected to H2O cluster:
##     H2O cluster uptime:         2 minutes 17 seconds 
##     H2O cluster version:        2.8.4.4 
##     H2O cluster name:           H2O_started_from_R 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   5.33 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  4 
##     H2O cluster healthy:        TRUE
data_hex <- h2o.importFile(h2oServer, path = paste(save_dir,"data/patent_kr.csv", sep=""))
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
random <- h2o.runif(data_hex, seed = 654321)
train.hex <- h2o.assign(data_hex[random <= .8,], "train.hex")
test.hex  <- h2o.assign(data_hex[random > .8,], "test.hex")
label_t <- test.hex %>% as.data.frame %>% select(ncol(test.hex)) %>% table
label_t[label_t == max(label_t)] / sum(label_t)
##      C 
## 0.7188
my.dl <- h2o.deeplearning(x = 1:(ncol(train.hex)-1), y = ncol(train.hex), data=train.hex, validation=test.hex,
                         variable_importances=T,
                         activation = "RectifierWithDropout", 
                         input_dropout_ratio = 0.25, 
                         hidden_dropout_ratios = c(0.5,0.5,0.5), 
                         adaptive_rate = T,
                         balance_classes = T, 
                         train_samples_per_iteration = 1500, 
                         hidden = c(250,250,250), 
                         epochs = 15)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |=================================================================| 100%
my.dl
## IP Address: 127.0.0.1 
## Port      : 54321 
## Parsed Data Key: train.hex 
## 
## Deep Learning Model Key: DeepLearning_8498c03635a06f9bdf4dcf87175e4c13
## 
## Training classification error: 0.01309
## 
## Validation classification error: 0.2188
## 
## Confusion matrix:
## Reported on test.hex 
##         Predicted
## Actual   A B  C D E G H   Error
##   A      2 0  1 1 0 0 0 0.50000
##   B      0 1  1 0 0 0 0 0.50000
##   C      1 0 21 1 0 0 0 0.08696
##   D      0 0  0 0 0 0 0     NaN
##   E      0 0  0 1 1 0 0 0.50000
##   G      0 0  0 0 0 0 0     NaN
##   H      0 0  1 0 0 0 0 1.00000
##   Totals 3 1 24 3 1 0 0 0.21875
## 
## Hit Ratios for Multi-class Classification:
##   k hit_ratios
## 1 1     0.7812
## 2 2     0.8438
## 3 3     0.9062
## 4 4     0.9062
## 5 5     0.9375
## 6 6     1.0000
## 7 7     1.0000
## 
## Relative Variable Importance:
##   C667  C1043  C1079   C538   C586   C403 C1140   C931  C847  C1315  C1206
## 1    1 0.9852 0.9608 0.9602 0.9584 0.9473  0.94 0.9358 0.931 0.9272 0.9247
##     C204  C1207   C382   C556   C330  C1294   C182   C448   C846  C1088
## 1 0.9191 0.9181 0.9179 0.9168 0.9165 0.9145 0.9138 0.9133 0.9125 0.9124
##    C1144  C1092   C871  C1231   C848    C25   C805  C1154   C141  C1250
## 1 0.9121 0.9118 0.9109 0.9108 0.9099 0.9092 0.9088 0.9087 0.9078 0.9078
##     C809  C1024  C1080   C360   C230   C640   C849  C1153  C996  C134 C941
## 1 0.9076 0.9072 0.9065 0.9053 0.9045 0.9041 0.9026 0.9025 0.901 0.901  0.9
##     C425  C1109   C924   C518   C472  C842  C126   C430   C527   C764
## 1 0.8979 0.8977 0.8969 0.8964 0.8961 0.896 0.896 0.8957 0.8946 0.8934
##    C1195   C881  C1129  C1269  C1271   C646   C821   C158   C192    C42
## 1 0.8932 0.8928 0.8927 0.8926 0.8926 0.8923 0.8923 0.8921 0.8919 0.8912
##     C212   C777  C1287    C93   C810   C148   C668   C507   C936   C611
## 1 0.8908 0.8905 0.8899 0.8898 0.8896 0.8895 0.8894 0.8892 0.8892 0.8891
##     C455   C550  C1075  C1170  C1102   C415     C8   C217   C469   C378
## 1 0.8889 0.8887 0.8884 0.8883 0.8881 0.8879 0.8876 0.8875 0.8875 0.8872
##     C822   C213   C964  C1236  C1015   C207  C1097 C1073 C1309   C247
## 1 0.8871 0.8865 0.8864 0.8858 0.8855 0.8845 0.8831 0.883 0.883 0.8818
##     C473    C63  C1291   C600  C1311    C33   C348   C906   C553  C1201
## 1 0.8817 0.8816 0.8815 0.8814 0.8814 0.8814 0.8811 0.8808 0.8807 0.8806
##     C812   C738   C385   C965   C341   C845  C1149   C638  C1125    C15
## 1 0.8804 0.8803 0.8788 0.8787 0.8784 0.8783 0.8781 0.8779 0.8776 0.8774
##    C1031  C1055   C255    C32   C467   C337   C658  C1326  C1111   C394
## 1 0.8774 0.8773 0.8769 0.8766 0.8763 0.8761 0.8761 0.8758 0.8756 0.8755
##    C1046  C174   C276   C552  C1035   C705   C699   C218    C90   C942
## 1 0.8752 0.875 0.8748 0.8748 0.8745 0.8742 0.8741 0.8738 0.8737 0.8734
##      C30  C1032   C334  C882  C610   C806   C317  C1146   C120  C1335
## 1 0.8733 0.8733 0.8733 0.873 0.873 0.8725 0.8722 0.8721 0.8714 0.8713
##    C1021   C509  C1082   C727    C51   C850   C776   C644 C779  C1148
## 1 0.8708 0.8708 0.8706 0.8705 0.8704 0.8703 0.8703 0.8702 0.87 0.8699
##     C766   C636   C653    C82    C60  C1204  C128 C1101  C1321   C311
## 1 0.8697 0.8695 0.8695 0.8694 0.8693 0.8692 0.869 0.869 0.8688 0.8682
##     C898   C598     C3  C1316    C44   C238   C113  C346   C969   C558
## 1 0.8675 0.8675 0.8674 0.8674 0.8674 0.8674 0.8673 0.867 0.8664 0.8663
##      C26  C1247   C763   C604   C377  C1027    C45   C735  C1118  C799
## 1 0.8661 0.8661 0.8661 0.8657 0.8656 0.8654 0.8654 0.8652 0.8652 0.865
##     C892  C1332    C52   C544    C72   C808   C880   C540   C398   C940
## 1 0.8647 0.8644 0.8643 0.8642 0.8638 0.8638 0.8638 0.8638 0.8638 0.8637
##      C23   C956  C1023   C266   C368  C1104   C919   C446   C745  C1186
## 1 0.8634 0.8633 0.8631 0.8628 0.8622 0.8618 0.8615 0.8615 0.8614 0.8612
##    C1136  C1267   C369    C55  C1319    C12   C106   C156   C823  C1304
## 1 0.8609 0.8608 0.8607 0.8603 0.8602 0.8599 0.8597 0.8596 0.8595 0.8595
##     C166   C562   C184  C589  C1083   C295   C768   C679    C27   C388
## 1 0.8593 0.8593 0.8591 0.859 0.8588 0.8588 0.8587 0.8584 0.8584 0.8581
##     C664 C1008    C65   C811  C1197    C86  C1281   C288   C371 C1323
## 1 0.8581 0.858 0.8578 0.8577 0.8577 0.8576 0.8575 0.8573 0.8572 0.857
##    C273   C397   C167   C216   C949   C379  C1222   C183  C1167  C1061
## 1 0.857 0.8567 0.8567 0.8566 0.8565 0.8565 0.8562 0.8559 0.8557 0.8556
##     C410   C961   C979  C1120   C252   C504    C80  C402   C630   C978
## 1 0.8553 0.8553 0.8552 0.8552 0.8552 0.8552 0.8551 0.855 0.8549 0.8548
##     C985   C899   C732   C693  C143  C1078   C339   C396  C1134  C1324
## 1 0.8547 0.8547 0.8545 0.8543 0.854 0.8538 0.8538 0.8537 0.8537 0.8536
##     C505   C999  C1198   C313   C144   C541   C908    C66  C1239  C1068
## 1 0.8535 0.8533 0.8533 0.8531 0.8529 0.8529 0.8529 0.8528 0.8528 0.8523
##     C723 C1050 C1158   C629   C277    C62   C984  C1090   C147   C475
## 1 0.8521 0.852 0.852 0.8519 0.8515 0.8514 0.8513 0.8513 0.8513 0.8512
##    C1258  C1318   C740   C904   C772   C305   C493   C980   C179  C1327
## 1 0.8512 0.8512 0.8508 0.8507 0.8504 0.8503 0.8502 0.8502 0.8501 0.8501
##   C1217 C214   C523   C831   C412  C1166   C258   C185   C401  C1178
## 1  0.85 0.85 0.8499 0.8497 0.8496 0.8495 0.8495 0.8494 0.8494 0.8493
##     C498  C408   C245    C39  C1245   C950   C450  C1151  C1295   C998
## 1 0.8491 0.849 0.8489 0.8488 0.8487 0.8486 0.8486 0.8486 0.8486 0.8485
##    C1241   C601   C885  C1246   C429  C585  C1265  C1163   C731   C268
## 1 0.8485 0.8484 0.8483 0.8483 0.8481 0.848 0.8479 0.8479 0.8478 0.8478
##     C494  C1240  C1139    C61   C367  C1002   C393   C652  C1257   C537
## 1 0.8475 0.8471 0.8471 0.8471 0.8469 0.8469 0.8468 0.8467 0.8466 0.8466
##     C962   C169   C903   C474   C717   C165   C499   C136   C427  C459
## 1 0.8465 0.8464 0.8464 0.8463 0.8463 0.8462 0.8462 0.8461 0.8461 0.846
##   C1014  C905   C780  C1182  C1012   C151   C584    C13   C239  C750  C506
## 1 0.846 0.846 0.8459 0.8459 0.8459 0.8458 0.8453 0.8453 0.8452 0.845 0.845
##    C730   C642  C1159   C952   C648  C1038   C342  C824  C967   C389
## 1 0.845 0.8449 0.8449 0.8448 0.8448 0.8444 0.8442 0.844 0.844 0.8439
##     C643   C438   C700   C684   C757  C1086   C109   C755    C54   C465
## 1 0.8439 0.8438 0.8438 0.8436 0.8436 0.8435 0.8435 0.8434 0.8433 0.8433
##     C233  C470  C1336  C1280   C922   C689   C993   C131   C975   C526
## 1 0.8432 0.843 0.8428 0.8428 0.8428 0.8428 0.8427 0.8427 0.8425 0.8425
##    C1081  C1328  C1094   C515   C122    C37  C1334  C1016   C284   C762
## 1 0.8424 0.8424 0.8423 0.8421 0.8421 0.8418 0.8416 0.8415 0.8414 0.8412
##     C988   C418   C592 C1284 C1177   C483  C1135    C84   C593   C683
## 1 0.8411 0.8411 0.8411 0.841 0.841 0.8409 0.8409 0.8407 0.8407 0.8406
##    C1112   C688   C835    C18   C986  C1215   C481   C759   C671  C1013
## 1 0.8406 0.8402 0.8402 0.8401 0.8401 0.8399 0.8399 0.8398 0.8398 0.8397
##     C478   C210   C110  C294  C304  C641   C879   C187   C865   C485
## 1 0.8396 0.8393 0.8392 0.839 0.839 0.839 0.8389 0.8389 0.8388 0.8387
##    C1261    C69   C937  C1093  C1242   C665  C463 C1072  C1337   C424
## 1 0.8386 0.8386 0.8385 0.8385 0.8383 0.8381 0.838 0.838 0.8379 0.8377
##     C564  C1160   C422   C343   C325  C1306   C853    C67  C291   C161
## 1 0.8377 0.8376 0.8375 0.8373 0.8373 0.8372 0.8371 0.8371 0.837 0.8369
##     C227   C797   C108   C351   C307   C870  C1105 C1213   C231   C939
## 1 0.8369 0.8367 0.8367 0.8366 0.8366 0.8364 0.8364 0.836 0.8359 0.8359
##     C726   C595  C1065  C1041  C1133    C36   C263   C100   C464   C771
## 1 0.8358 0.8357 0.8356 0.8355 0.8355 0.8355 0.8353 0.8353 0.8352 0.8352
##    C322 C1164  C894   C888   C573   C264   C171   C938    C87   C609
## 1 0.835 0.835 0.835 0.8349 0.8348 0.8348 0.8346 0.8345 0.8345 0.8344
##     C918   C219   C775  C1113   C982   C292  C1020   C674   C460  C670
## 1 0.8344 0.8344 0.8343 0.8343 0.8343 0.8343 0.8343 0.8343 0.8341 0.834
##     C387     C7   C800  C1025   C528   C404  C1243   C262  C1262   C114
## 1 0.8339 0.8338 0.8336 0.8336 0.8336 0.8335 0.8335 0.8335 0.8334 0.8333
##      C17   C741  C1077    C70 C1131  C123  C1157   C618   C285   C983
## 1 0.8333 0.8333 0.8332 0.8331 0.833 0.833 0.8329 0.8328 0.8328 0.8327
##     C911   C614   C798  C1272  C1273   C355   C710  C501   C290  C1259
## 1 0.8327 0.8326 0.8325 0.8324 0.8324 0.8322 0.8321 0.832 0.8319 0.8319
##    C1200   C802   C816   C861   C867   C312   C719   C818   C149   C912
## 1 0.8318 0.8318 0.8317 0.8317 0.8317 0.8315 0.8315 0.8314 0.8313 0.8313
##    C1189   C571  C1143   C79  C1199  C1037  C1106  C1260  C1279  C1228
## 1 0.8312 0.8312 0.8311 0.831 0.8309 0.8308 0.8308 0.8308 0.8307 0.8306
##     C795    C29   C869   C559   C449   C623  C43   C713  C1168   C574
## 1 0.8305 0.8304 0.8304 0.8303 0.8303 0.8301 0.83 0.8295 0.8293 0.8291
##    C357  C1029   C452   C241  C1255  C1132  C1344   C634 C1226  C709
## 1 0.829 0.8289 0.8287 0.8287 0.8284 0.8283 0.8283 0.8282 0.828 0.828
##    C1333   C172  C1165  C1301    C49   C421   C489   C168   C458   C436
## 1 0.8279 0.8277 0.8277 0.8277 0.8276 0.8276 0.8276 0.8276 0.8276 0.8276
##    C1045   C220   C897    C28  C1141   C433   C326  C1053 C1069   C250
## 1 0.8275 0.8274 0.8273 0.8273 0.8272 0.8272 0.8271 0.8271 0.827 0.8269
##     C613   C104   C414   C991  C1270   C278  C1216  C1005   C102   C866
## 1 0.8269 0.8268 0.8268 0.8267 0.8265 0.8264 0.8263 0.8263 0.8262 0.8262
##     C129   C681   C466 C1218  C606 C1237  C782  C1249   C830   C627  C1329
## 1 0.8262 0.8261 0.8261 0.826 0.826 0.826 0.826 0.8259 0.8259 0.8256 0.8256
##     C628  C1172   C758  C1054   C366   C500  C333  C1058   C907   C199
## 1 0.8255 0.8255 0.8253 0.8253 0.8252 0.8251 0.825 0.8249 0.8248 0.8247
##      C75  C1256  C1314   C786  C1063   C567   C819    C73   C794   C568
## 1 0.8247 0.8245 0.8245 0.8244 0.8243 0.8241 0.8241 0.8239 0.8238 0.8235
##     C790   C350   C607     C1   C316 C1227   C125   C257   C838   C566
## 1 0.8234 0.8233 0.8232 0.8231 0.8231 0.823 0.8228 0.8228 0.8226 0.8225
##     C419   C578   C319   C119  C1187  C1017    C64  C1283    C11   C621
## 1 0.8224 0.8223 0.8222 0.8221 0.8221 0.8219 0.8218 0.8217 0.8216 0.8215
##     C946   C619   C890    C88    C21  C633  C703   C293   C889   C932
## 1 0.8215 0.8214 0.8214 0.8214 0.8212 0.821 0.821 0.8208 0.8208 0.8208
##     C635   C995   C380   C235 C803 C1085 C1191 C524 C242 C909   C246
## 1 0.8204 0.8204 0.8201 0.8201 0.82  0.82  0.82 0.82 0.82 0.82 0.8199
##     C576   C281   C280  C1292   C817   C855 C1219  C921  C1006   C895
## 1 0.8197 0.8194 0.8193 0.8192 0.8192 0.8191 0.819 0.819 0.8189 0.8189
##     C391  C1192  C1155  C1310  C1212   C359    C14   C232   C514  C1299
## 1 0.8188 0.8188 0.8188 0.8188 0.8188 0.8188 0.8187 0.8187 0.8187 0.8186
##     C599  C1107   C733   C487   C742   C195  C1099   C577   C970   C691
## 1 0.8186 0.8186 0.8186 0.8184 0.8182 0.8182 0.8181 0.8181 0.8179 0.8178
##     C228   C533  C1000   C117  C1181  C1345   C807 C1322  C1059   C783
## 1 0.8178 0.8178 0.8176 0.8174 0.8174 0.8173 0.8171 0.817 0.8169 0.8168
##     C695   C722   C692   C792   C308   C519   C416   C530  C1042   C685
## 1 0.8168 0.8168 0.8167 0.8166 0.8165 0.8164 0.8164 0.8164 0.8158 0.8158
##     C877   C596   C549   C787  C1161   C240   C957   C522  C1036   C140
## 1 0.8158 0.8157 0.8157 0.8157 0.8156 0.8156 0.8156 0.8156 0.8155 0.8155
##     C215  C1266   C206   C439   C579  C447  C484  C107 C1047  C1124   C191
## 1 0.8154 0.8153 0.8152 0.8151 0.8151 0.815 0.815 0.815 0.815 0.8149 0.8149
##     C557   C121  C1288   C354  C1317   C457   C675   C862  C1156   C917
## 1 0.8148 0.8148 0.8148 0.8148 0.8148 0.8147 0.8147 0.8146 0.8145 0.8142
##     C352  C920  C269  C891   C237   C543   C208   C887  C1263   C820
## 1 0.8141 0.814 0.814 0.814 0.8139 0.8138 0.8137 0.8137 0.8136 0.8135
##     C488   C229   C181   C686  C1096   C620     C5  C1152  C1275  C1234
## 1 0.8135 0.8135 0.8135 0.8134 0.8133 0.8133 0.8132 0.8132 0.8131 0.8131
##     C512 C1308  C702  C303  C1343   C407   C953   C118   C279   C314
## 1 0.8131 0.813 0.813 0.813 0.8129 0.8129 0.8129 0.8129 0.8128 0.8126
##    C1341  C1251  C1300  C960  C164  C275   C503    C71    C35    C97
## 1 0.8125 0.8125 0.8122 0.812 0.812 0.812 0.8119 0.8119 0.8118 0.8118
##     C542   C244   C555   C548   C893   C152  C1026  C1084  C1119  C1070
## 1 0.8118 0.8118 0.8117 0.8117 0.8117 0.8117 0.8116 0.8115 0.8114 0.8113
##     C547   C358 C1122  C517 C1220  C612   C154   C531    C46   C994   C948
## 1 0.8112 0.8111 0.811 0.811 0.811 0.811 0.8108 0.8108 0.8108 0.8108 0.8107
##    C1087    C24  C1278   C318   C323 C254  C1011     C2  C1307  C1303
## 1 0.8103 0.8103 0.8103 0.8102 0.8101 0.81 0.8098 0.8098 0.8097 0.8097
##     C534   C826   C101   C827   C974  C1117   C135   C370   C405   C221
## 1 0.8093 0.8091 0.8089 0.8088 0.8088 0.8084 0.8083 0.8083 0.8082 0.8082
##    C1062  C678  C399  C1297  C1044    C56   C160   C428  C1312   C631
## 1 0.8082 0.808 0.808 0.8079 0.8078 0.8076 0.8076 0.8076 0.8075 0.8074
##     C860   C697  C1274  C1019  C1313   C597  C1173  C1009   C968   C565
## 1 0.8074 0.8073 0.8072 0.8072 0.8071 0.8071 0.8068 0.8067 0.8066 0.8066
##    C1252  C1123  C1244  C1034   C445  C1108  C753  C300   C933   C746
## 1 0.8065 0.8065 0.8063 0.8063 0.8062 0.8061 0.806 0.806 0.8058 0.8056
##    C1169   C420   C825   C127  C1253   C754   C669   C462  C205   C851
## 1 0.8056 0.8056 0.8056 0.8055 0.8054 0.8054 0.8053 0.8053 0.805 0.8049
##     C714   C381   C657    C68   C739  C1276   C133   C698   C112 C1214
## 1 0.8048 0.8048 0.8048 0.8047 0.8047 0.8047 0.8046 0.8045 0.8045 0.804
##     C972   C451   C490  C1039    C98  C1229    C40   C525   C925   C535
## 1 0.8039 0.8039 0.8033 0.8032 0.8031 0.8028 0.8026 0.8026 0.8025 0.8025
##      C20   C884  C587   C180   C900   C716   C336  C1202   C309   C329
## 1 0.8024 0.8023 0.802 0.8019 0.8019 0.8018 0.8017 0.8015 0.8015 0.8014
##     C839   C222   C990  C1176   C495    C53    C59  C1060   C236   C442
## 1 0.8013 0.8013 0.8012 0.8012 0.8011 0.8008 0.8007 0.8007 0.8007 0.8006
##     C453   C626  C1162  C1203  C1071 C923  C1103  C1116   C327  C1056
## 1 0.8003 0.8002 0.8002 0.8001 0.8001  0.8 0.7997 0.7997 0.7996 0.7995
##     C929   C840  C1022   C375  C1064   C529 C1067   C624   C193   C274
## 1 0.7994 0.7994 0.7993 0.7992 0.7992 0.7991 0.799 0.7989 0.7986 0.7985
##     C561   C682  C781   C253   C718   C432    C74   C752  C1127  C901
## 1 0.7981 0.7981 0.798 0.7978 0.7978 0.7977 0.7976 0.7976 0.7972 0.797
##     C989   C814   C349   C344  C1230 C1040  C928   C788  C1233   C590
## 1 0.7969 0.7968 0.7968 0.7966 0.7965 0.796 0.796 0.7959 0.7958 0.7957
##     C188   C935   C139  C1126   C536  C858  C1209   C437   C356   C511
## 1 0.7954 0.7953 0.7953 0.7953 0.7951 0.795 0.7949 0.7948 0.7947 0.7946
##      C78   C286   C639   C138   C987   C706  C1076   C270   C883   C793
## 1 0.7946 0.7945 0.7944 0.7944 0.7943 0.7941 0.7939 0.7936 0.7933 0.7932
##     C386   C163   C201   C476   C373   C202  C1225  C575    C76   C687
## 1 0.7932 0.7928 0.7927 0.7927 0.7924 0.7922 0.7922 0.792 0.7919 0.7918
##     C272    C58   C546  C1089  C1248   C784   C486   C289   C332    C77
## 1 0.7917 0.7917 0.7916 0.7914 0.7913 0.7911 0.7911 0.7909 0.7909 0.7906
##     C896   C729   C747   C400 C902  C1174   C977   C813   C690   C966
## 1 0.7906 0.7904 0.7902 0.7901 0.79 0.7898 0.7897 0.7894 0.7893 0.7892
##    C1298  C1028   C502   C873   C934   C916   C944  C1004  C1254   C411
## 1 0.7891 0.7889 0.7886 0.7884 0.7883 0.7882 0.7882 0.7881 0.7881 0.7875
##      C91   C720  C497  C724  C1331  C1211    C83  C1001   C299   C189
## 1 0.7875 0.7873 0.787 0.787 0.7867 0.7864 0.7863 0.7857 0.7856 0.7854
##    C1190  C1115    C38   C177   C306    C19  C1100   C243   C837   C545
## 1 0.7854 0.7852 0.7847 0.7844 0.7837 0.7837 0.7836 0.7836 0.7834 0.7833
##     C374   C973   C649   C532   C406  C582  C271   C365  C1007    C57
## 1 0.7832 0.7831 0.7823 0.7822 0.7821 0.782 0.782 0.7819 0.7816 0.7816
##      C92   C767   C715   C440   C249   C265 C1121 C930   C749    C48
## 1 0.7813 0.7811 0.7806 0.7804 0.7803 0.7803  0.78 0.78 0.7799 0.7798
##       C9   C707   C770   C423   C603    C81   C209   C857   C654  C1180
## 1 0.7797 0.7797 0.7794 0.7792 0.7792 0.7791 0.7789 0.7789 0.7787 0.7787
##    C1330  C1066  C1285  C1074   C211   C615   C260   C223   C594  C1208
## 1 0.7785 0.7783 0.7782 0.7781 0.7778 0.7778 0.7778 0.7778 0.7777 0.7775
##     C743  C1098  C951   C413   C124   C804   C632   C256   C335   C583
## 1 0.7775 0.7773 0.777 0.7768 0.7759 0.7757 0.7757 0.7756 0.7755 0.7755
##     C563  C1030  C1346  C1264   C225  C1224   C971   C945  C1193   C103
## 1 0.7751 0.7748 0.7732 0.7732 0.7729 0.7726 0.7723 0.7718 0.7718 0.7715
##     C852   C409   C283  C443   C454   C655   C734  C1142  C1188   C132
## 1 0.7714 0.7713 0.7711 0.771 0.7709 0.7709 0.7702 0.7702 0.7701 0.7689
##     C551   C434  C637  C251   C876  C1223   C471   C854   C836  C1268
## 1 0.7681 0.7676 0.767 0.766 0.7647 0.7644 0.7628 0.7626 0.7623 0.7619
##    C1302   C539   C372   C302   C910  C1171   C864   C390   C320   C959
## 1 0.7618 0.7617 0.7615 0.7615 0.7615 0.7613 0.7604 0.7603 0.7602 0.7601
##   C992   C508   C176   C711   C298   C791  C1290   C146  C226   C417
## 1 0.76 0.7594 0.7587 0.7582 0.7582 0.7581 0.7572 0.7568 0.753 0.7511
##     C660   C353  C1282   C301  C130   C331   C234
## 1 0.7484 0.7483 0.7479 0.7476 0.731 0.7287 0.7172
my.dl@model[[5]]
##         Predicted
## Actual   A B  C D E G H   Error
##   A      2 0  1 1 0 0 0 0.50000
##   B      0 1  1 0 0 0 0 0.50000
##   C      1 0 21 1 0 0 0 0.08696
##   D      0 0  0 0 0 0 0     NaN
##   E      0 0  0 1 1 0 0 0.50000
##   G      0 0  0 0 0 0 0     NaN
##   H      0 0  1 0 0 0 0 1.00000
##   Totals 3 1 24 3 1 0 0 0.21875
1 - my.dl@model[[7]]  ##  ACC
## [1] 0.7812
head(h2o.predict(my.dl, test.hex)%>%as.data.frame)
##   predict         A        B       C         D        E         G
## 1       A 0.6136039 0.111985 0.02641 0.0132594 0.179549 5.333e-03
## 2       C 0.0127641 0.008337 0.96056 0.0006452 0.016413 4.207e-05
## 3       D 0.0375538 0.005900 0.26127 0.4435623 0.041049 3.228e-04
## 4       A 0.5944300 0.115836 0.05802 0.0270273 0.137409 6.703e-02
## 5       B 0.0001642 0.809124 0.16376 0.0021986 0.024717 2.054e-05
## 6       C 0.0019222 0.126002 0.46913 0.3800802 0.008371 5.710e-03
##           H
## 1 4.986e-02
## 2 1.238e-03
## 3 2.103e-01
## 4 2.508e-04
## 5 1.628e-05
## 6 8.789e-03
colnames(save_data)[str_replace_all(names(my.dl@model[[9]]), "C", "")%>%as.numeric%>%.[1:10]]
##  [1] "안전"     "주차"     "지도"     "상의"     "속도"     "목적지"  
##  [7] "추천"     "전동"     "인프라에" "확정"
my.rf <- h2o.randomForest(x = 1:(ncol(train.hex)-1), y = ncol(train.hex), data = train.hex, validation = test.hex,
                         type="fast", 
                         importance=TRUE, 
                         ntree=c(5), 
                         depth=c(5,10))
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |=================================================================| 100%
print(my.rf)
## IP Address: 127.0.0.1 
## Port      : 54321 
## Parsed Data Key: train.hex 
## 
## Grid Search Model Key: GridSearch_902f65e42cb5cf70e09cf0fcf6348c4 
## 
## Summary
##                                  model_key ntrees max_depth nbins
## 1 SpeeDRF_b4edb0f482b1f6fa1b0c0395fe504058      5         5  1024
## 2 SpeeDRF_afef12a710a12854797399683cc6435e      5        10  1024
##   prediction_error run_time
## 1           0.2812     6433
## 2           0.2812     7481
my.rf@model[[1]]@model$confusion
##         Predicted
## Actual   A B  C D E G H  Error
##   A      0 0  4 0 0 0 0 1.0000
##   B      0 0  2 0 0 0 0 1.0000
##   C      0 0 23 0 0 0 0 0.0000
##   D      0 0  0 0 0 0 0    NaN
##   E      0 0  2 0 0 0 0 1.0000
##   G      0 0  0 0 0 0 0    NaN
##   H      0 0  1 0 0 0 0 1.0000
##   Totals 0 0 32 0 0 0 0 0.2812
print(1 - my.rf@sumtable[[1]]$prediction_error)
## [1] 0.7188
print(1 - my.rf@sumtable[[2]]$prediction_error)
## [1] 0.7188
h2o.shutdown(h2oServer)
## Are you sure you want to shutdown the H2O instance running at http://127.0.0.1:54321 (Y/N)?