Ultimate-Java-Resources/Machine Learning/SVM.java at master · CodeForHunger/Ultimate-Java-Resources · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* SVM Classifier */

package smile.classification;

import smile.base.svm.KernelMachine;
import smile.base.svm.LinearKernelMachine;
import smile.base.svm.LASVM;
import smile.util.SparseArray;
import smile.math.kernel.BinarySparseLinearKernel;
import smile.math.kernel.LinearKernel;
import smile.math.kernel.MercerKernel;
import smile.math.kernel.SparseLinearKernel;

/**
 * Support vector machines for classification. The basic support vector machine
 * is a binary linear classifier which chooses the hyperplane that represents
 * the largest separation, or margin, between the two classes. If such a
 * hyperplane exists, it is known as the maximum-margin hyperplane and the
 * linear classifier it defines is known as a maximum margin classifier.
 * <p>
 * If there exists no hyperplane that can perfectly split the positive and
 * negative instances, the soft margin method will choose a hyperplane
 * that splits the instances as cleanly as possible, while still maximizing
 * the distance to the nearest cleanly split instances.
 * <p>
 * The nonlinear SVMs are created by applying the kernel trick to
 * maximum-margin hyperplanes. The resulting algorithm is formally similar,
 * except that every dot product is replaced by a nonlinear kernel function.
 * This allows the algorithm to fit the maximum-margin hyperplane in a
 * transformed feature space. The transformation may be nonlinear and
 * the transformed space be high dimensional. For example, the feature space
 * corresponding Gaussian kernel is a Hilbert space of infinite dimension.
 * Thus though the classifier is a hyperplane in the high-dimensional feature
 * space, it may be nonlinear in the original input space. Maximum margin
 * classifiers are well regularized, so the infinite dimension does not spoil
 * the results.
 * <p>
 * The effectiveness of SVM depends on the selection of kernel, the kernel's
 * parameters, and soft margin parameter C. Given a kernel, best combination
 * of C and kernel's parameters is often selected by a grid-search with
 * cross validation.
 * <p>
 * The dominant approach for creating multi-class SVMs is to reduce the
 * single multi-class problem into multiple binary classification problems.
 * Common methods for such reduction is to build binary classifiers which
 * distinguish between (i) one of the labels to the rest (one-versus-all)
 * or (ii) between every pair of classes (one-versus-one). Classification
 * of new instances for one-versus-all case is done by a winner-takes-all
 * strategy, in which the classifier with the highest output function assigns
 * the class. For the one-versus-one approach, classification
 * is done by a max-wins voting strategy, in which every classifier assigns
 * the instance to one of the two classes, then the vote for the assigned
 * class is increased by one vote, and finally the class with most votes
 * determines the instance classification.

public class SVM<T> extends KernelMachine<T> implements Classifier<T> {
    /**
     * Constructor.
     * @param kernel Kernel function.
     * @param instances The instances in the kernel machine, e.g. support vectors.
     * @param weight The weights of instances.
     * @param b The intercept;
     */
    public SVM(MercerKernel<T> kernel, T[] instances, double[] weight, double b) {
        super(kernel, instances, weight, b);
    }

    @Override
    public int predict(T x) {
        return f(x) > 0 ? +1 : -1;
    }

    /**
     * Fits a binary-class linear SVM.
     * @param x training samples.
     * @param y training labels.
     * @param C the soft margin penalty parameter.
     * @param tol the tolerance of convergence test.
     */
    public static Classifier<double[]> fit(double[][] x, int[] y, double C, double tol) {
        LASVM<double[]> lasvm = new LASVM<>(new LinearKernel(), C, tol);
        KernelMachine<double[]> svm = lasvm.fit(x, y);

        return new Classifier<double[]>() {
            LinearKernelMachine model = LinearKernelMachine.of(svm);

            @Override
            public int predict(double[] x) {
                return model.f(x) > 0 ? +1 : -1;
            }
        };
    }

    /**
     * Fits a binary-class linear SVM of binary sparse data.
     * @param x training samples.
     * @param y training labels.
     * @param p the dimension of input vector.
     * @param C the soft margin penalty parameter.
     * @param tol the tolerance of convergence test.
     */
    public static Classifier<int[]> fit(int[][] x, int[] y, int p, double C, double tol) {
        LASVM<int[]> lasvm = new LASVM<>(new BinarySparseLinearKernel(), C, tol);
        KernelMachine<int[]> svm = lasvm.fit(x, y);

        return new Classifier<int[]>() {
            LinearKernelMachine model = LinearKernelMachine.binary(p, svm);

            @Override
            public int predict(int[] x) {
                return model.f(x) > 0 ? +1 : -1;
            }
        };
    }

    /**
     * Fits a binary-class linear SVM.
     * @param x training samples.
     * @param y training labels.
     * @param p the dimension of input vector.
     * @param C the soft margin penalty parameter.
     * @param tol the tolerance of convergence test.
     */
    public static Classifier<SparseArray> fit(SparseArray[] x, int[] y, int p, double C, double tol) {
        LASVM<SparseArray> lasvm = new LASVM<>(new SparseLinearKernel(), C, tol);
        KernelMachine<SparseArray> svm = lasvm.fit(x, y);

        return new Classifier<SparseArray>() {
            LinearKernelMachine model = LinearKernelMachine.sparse(p, svm);

            @Override
            public int predict(SparseArray x) {
                return model.f(x) > 0 ? +1 : -1;
            }
        };
    }

    /**
     * Fits a binary-class SVM.
     * @param x training samples.
     * @param y training labels.
     * @param kernel the kernel function.
     * @param C the soft margin penalty parameter.
     * @param tol the tolerance of convergence test.
     */
    public static <T> SVM<T> fit(T[] x, int[] y, MercerKernel<T> kernel, double C, double tol) {
        LASVM<T> lasvm = new LASVM<>(kernel, C, tol);
        return lasvm.fit(x, y).toSVM();
    }
}