> require("randomForest")
Loading required package: randomForest
randomForest 4.6-7
Type rfNews() to see new features/changes/bug fixes.
> tuneRF(d[,-8],d[,8],doBest=T)
mtry = 2 OOB error = 6.43%
Searching left ...
mtry = 1 OOB error = 9.23%
-0.4352332 0.05
Searching right ...
mtry = 4 OOB error = 6.6%
-0.02590674 0.05
Call:
randomForest(x = x, y = y, mtry = res[which.min(res[, 2]), 1])
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 2
OOB estimate of error rate: 6.4%
Confusion matrix:
No Yes class.error
No 1399 101 0.06733333
Yes 91 1409 0.06066667
> d.rf<- span="">randomForest(cv~.,d,mtry=2)
> print(d.rf)
Call:
randomForest(formula = cv ~ ., data = d, mtry = 2)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 2
OOB estimate of error rate: 6.37%
Confusion matrix:
No Yes class.error
No 1403 97 0.06466667
Yes 94 1406 0.06266667
> importance(d.rf)
MeanDecreaseGini
a1 20.320854
a2 11.490523
a3 2.380128
a4 203.135651
a5 75.415005
a6 783.553501
a7 2.679649
> table(d$cv,predict(d.rf,d[,-8]))
No Yes
No 1409 91
Yes 83 1417
->
> require("randomForest")
> train_dat<-read .csv="" header="TRUE)</span" kaggle="" ocuments="" train.csv="">
> str(train_dat)
> train_dat$holiday <- as.="" font="">factor(train_dat$holiday)
> train_dat$workingday <- as.factor="" span="" train_dat="" workingday="">
> train_dat$weather <- as.ordered="" span="" train_dat="">weather)
> train_dat$season <- as.ordered="" span="" train_dat="">season)
train_dat$datetime = as.POSIXct(train_dat$datetime)
> tuneRF(train_dat[,c(-10,-11,-12)],train_dat[,12],doBest=T)->->->->-read>
mtry = 3 OOB error = 16201.56
Searching left ...
mtry = 2 OOB error = 17056
-0.05273804 0.05
Searching right ...
mtry = 6 OOB error = 15210.13
0.06119329 0.05
mtry = 9 OOB error = 14763.69
0.02935166 0.05
Call:
randomForest(x = x, y = y, mtry = res[which.min(res[, 2]), 1])
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 9
Mean of squared residuals: 14300.74
% Var explained: 56.41
>
train_dat.rf<-randomforest class="synSpecial" font="">(count-randomforest>~.,train_dat[,c(-10,-11)],mtry=9)
> print(train_dat.rf)
Call:
randomForest(formula = count ~ ., data = train_dat[, c(-10, -11)], mtry = 9)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 9
Mean of squared residuals: 14306.06
% Var explained: 56.4
# テストデータを読み込む
train_dat<-read .csv="" font-family:="" gothic="" header="TRUE)</span><br style=" iragino="" kaku="" normal="" pro="" white-space:="">> str(train_dat)-read>
> test_dat<-read .csv="" header="TRUE)</span" kaggle="" ocuments="" test.csv="">-read>
> str(test_dat)
> test_dat$holiday <- as.factor="" holiday="" span="" test_dat="">
> test_dat$workingday <- as.factor="" span="" test_dat="" workingday="">
> test_dat$weather <- as.ordered="" span="">test_dat$weather)
> test_dat $season <- as.ordered="" span="">test_dat$season)->->->->
> tuneRF(train_dat[,c(-10,-11,-12)],train_dat[,12],doBest=T)
n <- nrow(iris)
s <- sample(n,n*0.5)
iris.train <- iris[s,]
iris.test <- iris[-s,]
# random forest
forest <- randomForest(Species~., data=iris.train, ntree=500)
pred.forest <- predict(forest, newdata=iris.test, type="class")
table(pred.forest, iris.test[,5])
# decision tree
tree <- rpart(Species~.,data=iris.train)
pred.rpart<-predict(tree, iris.test, type="class")
table(pred.rpart, iris.test[,5])
# importance
getTree(forest, 1, labelVar=TRUE)
varImpPlot(forest)
# report
split.screen(c(2,1))
split.screen(c(1,3), screen = 2)
screen(3); partialPlot(forest, iris, Petal.Length, "setosa")
screen(4); partialPlot(forest, iris, Petal.Length, "versicolor")
screen(5); partialPlot(forest, iris, Petal.Length, "virginica")
split.screen(c(2,1), screen = 1)
screen(1); plot(forest)
close.screen(all=T)