-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathNormalizerSample.java
More file actions
42 lines (33 loc) · 1.24 KB
/
NormalizerSample.java
File metadata and controls
42 lines (33 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
package spark.sample;
import java.util.ArrayList;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
//import org.apache.spark.ml.feature.StandardScaler;
import org.apache.spark.mllib.feature.StandardScaler;
import org.apache.spark.mllib.feature.StandardScalerModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.rdd.RDD;
public class NormalizerSample {
public static void main(String[] args) {
try(JavaSparkContext sc = Sample.context()){
Vector vector1 = Vectors.dense(new double[]{-2.0, 5.0, 1.0});
Vector vector2 = Vectors.dense(new double[]{2.0, 0.0, 1.0});
List<Vector> vectors = new ArrayList<Vector>() {
{
add(vector1);
add(vector2);
}
};
JavaRDD<Vector> dataset = sc.parallelize(vectors);
StandardScaler sscaler = new StandardScaler(true, true);
StandardScalerModel model = sscaler.fit(JavaRDD.toRDD(dataset));
RDD<Vector> result = model.transform(JavaRDD.toRDD(dataset));
System.out.println(result);
Vector[] collect = (Vector[]) result.collectPartitions();
System.out.println(collect);
//-0.7071 , 0.7071 , 0.0 , 0.7071 -0.7071 , 0.0
}
}
}