kmeans

Input

Param name Type(s) Default Description
featuresCol Vector "features" Feature vector

Output

Param name Type(s) Default Description
predictionCol Int "prediction" Predicted cluster center
# Fit a k-means model with spark.kmeans
irisDF <- suppressWarnings(createDataFrame(iris))
kmeansDF <- irisDF
kmeansTestDF <- irisDF
kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
                            k = 3)

# Model summary
summary(kmeansModel)

# Get fitted result from the k-means model
showDF(fitted(kmeansModel))

# Prediction
kmeansPredictions <- predict(kmeansModel, kmeansTestDF)
showDF(kmeansPredictions)

summary結果

> summary(kmeansModel)
$k
[1] 3

$coefficients
  Sepal_Length Sepal_Width Petal_Length Petal_Width
1 5.883607     2.740984    4.388525     1.434426   
2 5.006        3.428       1.462        0.246      
3 6.853846     3.076923    5.715385     2.053846   

$size
$size[[1]]
[1] 61

$size[[2]]
[1] 50

$size[[3]]
[1] 39


$cluster
SparkDataFrame[prediction:int]

$is.loaded
[1] FALSE

results matching ""

    No results matching ""