randomForest
df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
training <- df
test <- df
model <- spark.randomForest(training, label ~ features, "classification", numTrees = 10)
summary(model)
predictions <- predict(model, test)
showDF(predictions)
結果
+-------------------+--------------------+--------------------+
| label| features| prediction|
+-------------------+--------------------+--------------------+
| -9.490009878824548|(10,[0,1,2,3,4,5,...| -0.5952946889598917|
| 0.2577820163584905|(10,[0,1,2,3,4,5,...| 0.3905592077828489|
| -4.438869807456516|(10,[0,1,2,3,4,5,...| -1.943054578215147|
|-19.782762789614537|(10,[0,1,2,3,4,5,...| 0.7869441065102672|
| -7.966593841555266|(10,[0,1,2,3,4,5,...| 1.884698176295899|
| -7.896274316726144|(10,[0,1,2,3,4,5,...| -2.036167091174747|
| -8.464803554195287|(10,[0,1,2,3,4,5,...| 4.687457000545868|
| 2.1214592666251364|(10,[0,1,2,3,4,5,...| -0.96645936792022|
| 1.0720117616524107|(10,[0,1,2,3,4,5,...| -3.1679334414062224|
|-13.772441561702871|(10,[0,1,2,3,4,5,...| -3.058839075979845|
| -5.082010756207233|(10,[0,1,2,3,4,5,...| -2.8156937952733876|
| 7.887786536531237|(10,[0,1,2,3,4,5,...| 1.9217484288800157|
| 14.323146365332388|(10,[0,1,2,3,4,5,...| 3.4425306961556728|
|-20.057482615789212|(10,[0,1,2,3,4,5,...| -2.2042653645194323|
|-0.8995693247765151|(10,[0,1,2,3,4,5,...| 3.28016001188447|
| -19.16829262296376|(10,[0,1,2,3,4,5,...| 0.8742789261764511|
| 5.601801561245534|(10,[0,1,2,3,4,5,...|-0.18646704228130914|
|-3.2256352187273354|(10,[0,1,2,3,4,5,...| -3.708134903960995|
| 1.5299675726687754|(10,[0,1,2,3,4,5,...| 1.9623153340518027|
| -0.250102447941961|(10,[0,1,2,3,4,5,...| 0.7669176091900198|
+-------------------+--------------------+--------------------+
only showing top 20 rows