Powered by GitBook

randomForest

# Load training data
df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
training <- df
test <- df

# Fit a random forest classification model with spark.randomForest
model <- spark.randomForest(training, label ~ features, "classification", numTrees = 10)

# Model summary
summary(model)

# Prediction
predictions <- predict(model, test)
showDF(predictions)

結果

+-------------------+--------------------+--------------------+
|              label|            features|          prediction|
+-------------------+--------------------+--------------------+
| -9.490009878824548|(10,[0,1,2,3,4,5,...| -0.5952946889598917|
| 0.2577820163584905|(10,[0,1,2,3,4,5,...|  0.3905592077828489|
| -4.438869807456516|(10,[0,1,2,3,4,5,...|  -1.943054578215147|
|-19.782762789614537|(10,[0,1,2,3,4,5,...|  0.7869441065102672|
| -7.966593841555266|(10,[0,1,2,3,4,5,...|   1.884698176295899|
| -7.896274316726144|(10,[0,1,2,3,4,5,...|  -2.036167091174747|
| -8.464803554195287|(10,[0,1,2,3,4,5,...|   4.687457000545868|
| 2.1214592666251364|(10,[0,1,2,3,4,5,...|   -0.96645936792022|
| 1.0720117616524107|(10,[0,1,2,3,4,5,...| -3.1679334414062224|
|-13.772441561702871|(10,[0,1,2,3,4,5,...|  -3.058839075979845|
| -5.082010756207233|(10,[0,1,2,3,4,5,...| -2.8156937952733876|
|  7.887786536531237|(10,[0,1,2,3,4,5,...|  1.9217484288800157|
| 14.323146365332388|(10,[0,1,2,3,4,5,...|  3.4425306961556728|
|-20.057482615789212|(10,[0,1,2,3,4,5,...| -2.2042653645194323|
|-0.8995693247765151|(10,[0,1,2,3,4,5,...|    3.28016001188447|
| -19.16829262296376|(10,[0,1,2,3,4,5,...|  0.8742789261764511|
|  5.601801561245534|(10,[0,1,2,3,4,5,...|-0.18646704228130914|
|-3.2256352187273354|(10,[0,1,2,3,4,5,...|  -3.708134903960995|
| 1.5299675726687754|(10,[0,1,2,3,4,5,...|  1.9623153340518027|
| -0.250102447941961|(10,[0,1,2,3,4,5,...|  0.7669176091900198|
+-------------------+--------------------+--------------------+
only showing top 20 rows

results matching ""

No results matching ""