Anda di halaman 1dari 13

RAssignment:ClassificationofOcean

Microbes
Chia
Loaddataintoworkspace.
library("tree")
library("ggplot2")
seaflow<read.csv("seaflow_21min.csv",header=T)
Plotpeagainstchl_small
##file_idtimecell_idd1
##Min.:203.0Min.:12.0Min.:0Min.:1328
##1stQu.:204.01stQu.:174.01stQu.:74861stQu.:7296
##Median:206.0Median:362.0Median:14995Median:17728
##Mean:206.2Mean:341.5Mean:15008Mean:17039
##3rdQu.:208.03rdQu.:503.03rdQu.:224013rdQu.:24512
##Max.:209.0Max.:643.0Max.:32081Max.:54048
##d2fsc_smallfsc_perpfsc_big
##Min.:32Min.:10005Min.:0Min.:32384
##1stQu.:95841stQu.:313411stQu.:134961stQu.:32400
##Median:18512Median:35483Median:18069Median:32400
##Mean:17437Mean:34919Mean:17646Mean:32405
##3rdQu.:246563rdQu.:391843rdQu.:222433rdQu.:32416
##Max.:54688Max.:65424Max.:63456Max.:32464
##pechl_smallchl_bigpop
##Min.:0Min.:3485Min.:0crypto:102
##1stQu.:16351stQu.:225251stQu.:2800nano:12698
##Median:2421Median:30512Median:7744pico:20860
##Mean:5325Mean:30164Mean:8328synecho:18146
##3rdQu.:58543rdQu.:382993rdQu.:12880ultra:20537
##Max.:58675Max.:64832Max.:57184

Partitions

library("caret")
##Loadingrequiredpackage:lattice
train_index<createDataPartition(1:nrow(seaflow),1,.5,list=F)
trainset<seaflow[train_index,]
testset<seaflow[train_index,]
DecisonTree
library("rpart")
fol<formula(pop~fsc_small+fsc_perp+fsc_big+pe+chl_big+chl_small)
model_dt<rpart(fol,method="class",data=trainset)
print(model_dt)
##n=36172
##
##node),split,n,loss,yval,(yprob)
##*denotesterminalnode
##
##1)root3617225756pico(0.00140.170.290.250.28)
##2)pe<5001.52631215954pico(00.220.3900.39)
##4)chl_small<32273.5115012055pico(00.000350.8200.18)*
##5)chl_small>=32273.5148116703ultra(00.390.06200.55)
##10)chl_small>=41054.55382771nano(00.86000.14)*
##11)chl_small<41054.594292092ultra(00.130.09700.78)*
##3)pe>=5001.59860794synecho(0.00520.0540.00590.920.015)
##6)chl_small>=37876668145nano(0.0760.7800.0730.067)*
##7)chl_small<378769192175synecho(00.00130.00630.980.011)*
predict_dt<predict(model_dt,newdata=testset)
prepop_dt<cbind(rownames(predict_dt),colnames(predict_dt)[apply(predict_dt,1,which.max)])
accuracy_dt<(sum(prepop_dt[,2]==seaflow[prepop_dt[,1],'pop']))/dim(prepop_dt)[1]
##Loadingrequiredpackage:grid

RadomForest
library("randomForest")
##randomForest4.612
##TyperfNews()toseenewfeatures/changes/bugfixes.
model_rf<randomForest(fol,data=trainset)
#model_rf<randomForest(fol,data=trainset,importance=T,keep.forest=T)
#testing
predict_rf<predict(model_rf,newdata=testset)
accuracy_rf<(sum(predict_rf==seaflow[labels(predict_rf),'pop']))/length(predict_rf)
print(accuracy_rf)
##[1]0.9202676
#Ginicoef
#"Thehigherthenumber,themoretheginiimpurityscoredecreasesbybranchingonthisvar
iable,indicatingthatthevariableismoreimportant"
importance(model_rf)

##MeanDecreaseGini
##fsc_small2705.5014
##fsc_perp2098.5750
##fsc_big206.2317
##pe8802.4334
##chl_big4923.9151
##chl_small8113.0261
SVM
library("e1071")
#SVM
#model<svm(fol,data=trainingdata)
model_svm<svm(fol,data=trainset)
#testing
predict_svm<predict(model_svm,newdata=testset)
accuracy_svm<(sum(predict_svm==seaflow[labels(predict_svm),'pop']))/length(predict_svm)
print(accuracy_svm)
##[1]0.919687
ConfusionMatrices
##true
##predcryptonanopicosynechoultra
##nano515204138870
##pico01949101978
##synecho0934904294
##ultra0115491807286
##true
##predcryptonanopicosynechoultra
##crypto512010
##nano0556801331
##pico001008101383
##synecho03390774
##ultra079536018510
##true
##predcryptonanopicosynechoultra
##crypto491020
##nano1565001369
##pico0010036281367
##synecho155590478
##ultra071235328484

##ConfusionMatrixandStatistics
##
##Reference
##Predictioncryptonanopicosynechoultra
##crypto491020
##nano1565001369
##pico0010036281367
##synecho155590478
##ultra071235328484
##
##OverallStatistics
##
##Accuracy:0.9197
##95%CI:(0.9168,0.9225)
##NoInformationRate:0.2887
##PValue[Acc>NIR]:<2.2e16
##
##Kappa:0.8917
##Mcnemar'sTestPValue:NA
##
##StatisticsbyClass:
##
##Class:cryptoClass:nanoClass:picoClass:synecho
##Sensitivity0.9607840.88720.96090.9964
##Specificity0.9999170.98760.94580.9975
##PosPredValue0.9423080.93840.87800.9924
##NegPredValue0.9999450.97620.98350.9988
##Prevalence0.0014100.17610.28870.2510
##DetectionRate0.0013550.15620.27750.2501
##DetectionPrevalence0.0014380.16650.31600.2520
##BalancedAccuracy0.9803510.93740.95340.9969
##Class:ultra
##Sensitivity0.8295
##Specificity0.9589
##PosPredValue0.8883
##NegPredValue0.9345
##Prevalence0.2828
##DetectionRate0.2346
##DetectionPrevalence0.2641
##BalancedAccuracy0.8942
DiscrteVariable?

Anda mungkin juga menyukai