HUST ML Labs

机器学习之knn算法

1. knn算法原理

(1)AD(2)A

2. 使用sklearn中的kNN算法进行分类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
def classification(train_feature, train_label, test_feature):
'''
使用KNeighborsClassifier对test_feature进行分类
:param train_feature: 训练集数据
:param train_label: 训练集标签
:param test_feature: 测试集数据
:return: 测试集预测结果
'''

#********* Begin *********#
scaler = StandardScaler()
train_feature = scaler.fit_transform(train_feature)

classifier = KNeighborsClassifier(5)
classifier.fit(train_feature, train_label)

return classifier.predict(scaler.transform(test_feature))
#********* End *********#

3. 使用sklearn中的kNN算法进行回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from sklearn.neighbors import KNeighborsRegressor

def regression(train_feature, train_label, test_feature):
'''
使用KNeighborsRegressor对test_feature进行分类
:param train_feature: 训练集数据
:param train_label: 训练集标签
:param test_feature: 测试集数据
:return: 测试集预测结果
'''

#********* Begin *********#
clf = KNeighborsRegressor() # 生成K近邻分类器
clf.fit(train_feature, train_label) # 训练分类器
predict_result = clf.predict(test_feature) # 进行预测

return predict_result
#********* End *********#

4. 分析红酒数据

1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np

def alcohol_mean(data):
'''
返回红酒数据中红酒的酒精平均含量
:param data: 红酒数据对象
:return: 酒精平均含量,类型为float
'''

#********* Begin *********#
return data.data[:, 0].mean()
#********* End **********#

5. 对数据进行标准化

1
2
3
4
5
6
7
8
9
10
11
12
from sklearn.preprocessing import StandardScaler

def scaler(data):
'''
返回标准化后的红酒数据
:param data: 红酒数据对象
:return: 标准化后的红酒数据,类型为ndarray
'''

#********* Begin *********#
return StandardScaler().fit_transform(data.data)
#********* End **********#

6. 使用kNN算法进行预测

1
2
3
4
5
6
7
8
9
10
11
12
13
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler


def classification(train_feature, train_label, test_feature):

scaler = StandardScaler()
train_feature = scaler.fit_transform(train_feature)

classifier = KNeighborsClassifier(5)
classifier.fit(train_feature, train_label)

return classifier.predict(scaler.transform(test_feature))

感知机

1. 西瓜好坏自动识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#encoding=utf8
import numpy as np
#构建感知机算法
class Perceptron(object):
def __init__(self, learning_rate = 0.01, max_iter = 200):
self.lr = learning_rate
self.max_iter = max_iter
def fit(self, data, label):
'''
input:data(ndarray):训练数据特征
label(ndarray):训练数据标签
output:w(ndarray):训练好的权重
b(ndarry):训练好的偏置
'''
#编写感知机训练方法,w为权重,b为偏置
self.w = np.array([1.]*data.shape[1])
self.b = np.array([1.])
#********* Begin *********#
length = data.shape[0] # 迭代数据时会多次用到数据集长度

for i in range(self.max_iter): # 如果超出最大迭代次数就停止训练
has_error = False # 如果后续发现一次训练没有错误也停止训练

for i in range(length): # 迭代训练数据
x = data[i] # x 为当前数据
# 即 x_1*w_1 + x_2*w_2 + ... + x_i*w_i + b, 用向量乘法会简介一些
y = x.dot(self.w) + self.b

res = 1 if y > 0 else - 1 # 算出来的 y 接近 1 就相当于预测结果是 1, 接近 -1 则预测 -1
if res == label[i]: # 如果预测对了就不干事
continue

else: # 如果预测错了
has_error = True
# 按题目中给的公式更新 w 和 b
self.w -= self.lr * res * x
self.b -= self.lr * res
if not has_error:
break
#********* End *********#
def predict(self, data):
'''
input:data(ndarray):测试数据特征
output:predict(ndarray):预测标签
'''
#********* Begin *********#
predict = []
for x in data:
y = self.w.dot(x) + self.b
res = 1 if y > 0 else - 1
predict.append(res)

return predict

#********* End *********#
return predict

2. scikit-learn感知机实践 - 癌细胞精准识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# encoding=utf8
import os
import pandas as pd
import numpy as np
from sklearn.linear_model.perceptron import Perceptron
from sklearn.preprocessing import StandardScaler


# 获取训练数据
train_data = pd.read_csv('./step2/train_data.csv')
# 获取训练标签
train_label = pd.read_csv('./step2/train_label.csv')
train_label = train_label['target']
# 获取测试数据
test_data = pd.read_csv('./step2/test_data.csv')

if os.path.exists('./step2/result.csv'):
os.remove('./step2/result.csv')

# 标准化数据
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)

clf = Perceptron()
clf.fit(train_data, train_label)
pred = clf.predict(scaler.transform(test_data))

result = np.where(pred > 0.5, 1, 0)

df = pd.DataFrame(result, columns=["result"])
df.to_csv('./step2/result.csv')

逻辑回归

1. 逻辑回归核心思想

1
2
3
4
5
6
7
8
9
10
11
12
13
#encoding=utf8
import numpy as np

def sigmoid(t):
'''
完成sigmoid函数计算
:param t: 负无穷到正无穷的实数
:return: 转换后的概率值
:可以考虑使用np.exp()函数
'''
#********** Begin **********#
return 1 / (1 + np.exp(-t))
#********** End **********#

2. 逻辑回归的损失函数

(1)A (2)ACD (3)AB (4)D

3. 梯度下降

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-

import numpy as np
import warnings
warnings.filterwarnings("ignore")

def gradient_descent(initial_theta,eta=0.05,n_iters=1000,epslion=1e-8):
'''
梯度下降
:param initial_theta: 参数初始值,类型为float
:param eta: 学习率,类型为float
:param n_iters: 训练轮数,类型为int
:param epslion: 容忍误差范围,类型为float
:return: 训练后得到的参数
'''
# 请在此添加实现代码 #
#********** Begin *********#
theta = initial_theta
i_iter = 0
while i_iter < n_iters:
gradient = 2*(theta-3)
last_theta = theta
theta = theta - eta*gradient
if(abs(theta-last_theta)<epslion):
break
i_iter +=1
return theta
#********** End **********#

4. 癌细胞精准识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# -*- coding: utf-8 -*-

import numpy as np
import warnings
warnings.filterwarnings("ignore")

def sigmoid(x):
'''
sigmoid函数
:param x: 转换前的输入
:return: 转换后的概率
'''
return 1/(1+np.exp(-x))


def fit(x,y,eta=1e-3,n_iters=10000):
'''
训练逻辑回归模型
:param x: 训练集特征数据,类型为ndarray
:param y: 训练集标签,类型为ndarray
:param eta: 学习率,类型为float
:param n_iters: 训练轮数,类型为int
:return: 模型参数,类型为ndarray
'''
# 请在此添加实现代码 #
#********** Begin *********#
theta = np.zeros(x.shape[1])
i_iter = 0
while i_iter < n_iters:
gradient = (sigmoid(x.dot(theta))-y).dot(x)
theta = theta -eta*gradient
i_iter += 1
return theta
#********** End **********#

5. 手写数字识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from sklearn.linear_model import LogisticRegression

def digit_predict(train_image, train_label, test_image):
'''
实现功能:训练模型并输出预测结果
:param train_sample: 包含多条训练样本的样本集,类型为ndarray,shape为[-1, 8, 8]
:param train_label: 包含多条训练样本标签的标签集,类型为ndarray
:param test_sample: 包含多条测试样本的测试集,类型为ndarry
:return: test_sample对应的预测标签
'''

#************* Begin ************#
# 训练集变形
flat_train_image = train_image.reshape((-1, 64))
# 训练集标准化
train_min = flat_train_image.min()
train_max = flat_train_image.max()
flat_train_image = (flat_train_image-train_min)/(train_max-train_min)
# 测试集变形
flat_test_image = test_image.reshape((-1, 64))
# 测试集标准化
test_min = flat_test_image.min()
test_max = flat_test_image.max()
flat_test_image = (flat_test_image - test_min) / (test_max - test_min)
# 训练--预测
rf = LogisticRegression(C=4.0)
rf.fit(flat_train_image, train_label)
return rf.predict(flat_test_image)
#************* End **************#

朴素贝叶斯分类器

1. 条件概率

(1)A (2)C

2. 贝叶斯公式

(1)D (2)C

3. 朴素贝叶斯分类算法流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import numpy as np


class NaiveBayesClassifier(object):
def __init__(self):
'''
self.label_prob表示每种类别在数据中出现的概率
例如,{0:0.333, 1:0.667}表示数据中类别0出现的概率为0.333,类别1的概率为0.667
'''
self.label_prob = {}
'''
self.condition_prob表示每种类别确定的条件下各个特征出现的概率
例如训练数据集中的特征为 [[2, 1, 1],
[1, 2, 2],
[2, 2, 2],
[2, 1, 2],
[1, 2, 3]]
标签为[1, 0, 1, 0, 1]
那么当标签为0时第0列的值为1的概率为0.5,值为2的概率为0.5;
当标签为0时第1列的值为1的概率为0.5,值为2的概率为0.5;
当标签为0时第2列的值为1的概率为0,值为2的概率为1,值为3的概率为0;
当标签为1时第0列的值为1的概率为0.333,值为2的概率为0.666;
当标签为1时第1列的值为1的概率为0.333,值为2的概率为0.666;
当标签为1时第2列的值为1的概率为0.333,值为2的概率为0.333,值为3的概率为0.333;
因此self.label_prob的值如下:
{
0:{
0:{
1:0.5
2:0.5
}
1:{
1:0.5
2:0.5
}
2:{
1:0
2:1
3:0
}
}
1:
{
0:{
1:0.333
2:0.666
}
1:{
1:0.333
2:0.666
}
2:{
1:0.333
2:0.333
3:0.333
}
}
}
'''
self.condition_prob = {}
def fit(self, feature, label):
'''
对模型进行训练,需要将各种概率分别保存在self.label_prob和self.condition_prob中
:param feature: 训练数据集所有特征组成的ndarray
:param label:训练数据集中所有标签组成的ndarray
:return: 无返回
'''


#********* Begin *********#
for l in label:
self.label_prob[l] = self.label_prob.get(l, 0) + 1

for k, v in self.label_prob.items():
self.label_prob[k] = v / len(label)

label2data = {}
for i, data in enumerate(feature):
l = label[i]
old_data = label2data.get(l)
if not old_data:
label2data[l] = [data]
else:
label2data[l].append(data)

for l, all_data in label2data.items():
feat_index2feat_count = {}
for data in all_data:
for i, d in enumerate(data):
feat_index2feat_count[i] = feat_index2feat_count.get(i, {})
feat_index2feat_count[i][d] = feat_index2feat_count[i].get(
d, 0) + 1

feat_index2feat_count["__count__"] = len(all_data)

self.condition_prob[l] = feat_index2feat_count

# print(self.condition_prob, self.label_prob)
#********* End *********#


def predict(self, feature):
'''
对数据进行预测,返回预测结果
:param feature:测试数据集所有特征组成的ndarray
:return:
'''
# ********* Begin *********#
ret_arr = []

for fs in feature:
label2res = {}
for label, feat_index2feat_count in self.condition_prob.items():
p = self.label_prob[label]

for i, f in enumerate(fs):
p *= feat_index2feat_count[i].get(f, 0) / \
feat_index2feat_count["__count__"]

label2res[label] = p

max_prob = {"label": "foo", "prob": -1}
for label, prob in label2res.items():
if(prob > max_prob["prob"]):
max_prob = {"label": label, "prob": prob}

ret_arr.append(max_prob["label"])

return ret_arr
#********* End *********#

4. 拉普拉斯平滑

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np

class NaiveBayesClassifier(object):
def __init__(self):
'''
self.label_prob表示每种类别在数据中出现的概率
例如,{0:0.333, 1:0.667}表示数据中类别0出现的概率为0.333,类别1的概率为0.667
'''
self.label_prob = {}
'''
self.condition_prob表示每种类别确定的条件下各个特征出现的概率
例如训练数据集中的特征为 [[2, 1, 1],
[1, 2, 2],
[2, 2, 2],
[2, 1, 2],
[1, 2, 3]]
标签为[1, 0, 1, 0, 1]
那么当标签为0时第0列的值为1的概率为0.5,值为2的概率为0.5;
当标签为0时第1列的值为1的概率为0.5,值为2的概率为0.5;
当标签为0时第2列的值为1的概率为0,值为2的概率为1,值为3的概率为0;
当标签为1时第0列的值为1的概率为0.333,值为2的概率为0.666;
当标签为1时第1列的值为1的概率为0.333,值为2的概率为0.666;
当标签为1时第2列的值为1的概率为0.333,值为2的概率为0.333,值为3的概率为0.333;
因此self.label_prob的值如下:
{
0:{
0:{
1:0.5
2:0.5
}
1:{
1:0.5
2:0.5
}
2:{
1:0
2:1
3:0
}
}
1:
{
0:{
1:0.333
2:0.666
}
1:{
1:0.333
2:0.666
}
2:{
1:0.333
2:0.333
3:0.333
}
}
}
'''
self.condition_prob = {}

def fit(self, feature, label):
'''
对模型进行训练,需要将各种概率分别保存在self.label_prob和self.condition_prob中
:param feature: 训练数据集所有特征组成的ndarray
:param label:训练数据集中所有标签组成的ndarray
:return: 无返回
'''

#********* Begin *********#
for l in label:
self.label_prob[l] = self.label_prob.get(l, 0) + 1

for k, v in self.label_prob.items():
self.label_prob[k] = (v+1) / (len(label) +
len(self.label_prob.keys()))

label2data = {}
for i, data in enumerate(feature):
l = label[i]
old_data = label2data.get(l)
if not old_data:
label2data[l] = [data]
else:
label2data[l].append(data)

for l, all_data in label2data.items():
feat_index2feat_count = {}
for i in range(len(feature[0])):
feat_index2feat_count[i] = {}
for f in feature:
d = f[i]
feat_index2feat_count[i][d] = 0

for data in all_data:
for i, d in enumerate(data):
feat_index2feat_count[i][d] = feat_index2feat_count[i].get(
d, 0) + 1

for i in feat_index2feat_count.keys():
for k in feat_index2feat_count[i].keys():
feat_index2feat_count[i][k] = (
feat_index2feat_count[i][k]+1) / (len(all_data)+len(feat_index2feat_count[i].keys()))

self.condition_prob[l] = feat_index2feat_count
#********* End *********#


def predict(self, feature):
'''
对数据进行预测,返回预测结果
:param feature:测试数据集所有特征组成的ndarray
:return:
'''

result = []
# 对每条测试数据都进行预测
for i, f in enumerate(feature):
# 可能的类别的概率
prob = np.zeros(len(self.label_prob.keys()))
ii = 0
for label, label_prob in self.label_prob.items():
# 计算概率
prob[ii] = label_prob
for j in range(len(feature[0])):
prob[ii] *= self.condition_prob[label][j][f[j]]
ii += 1
# 取概率最大的类别作为结果
result.append(list(self.label_prob.keys())[np.argmax(prob)])
return np.array(result)

5. 新闻文本主题分类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfTransformer


def news_predict(train_sample, train_label, test_sample):
'''
训练模型并进行预测,返回预测结果
:param train_sample:原始训练集中的新闻文本,类型为ndarray
:param train_label:训练集中新闻文本对应的主题标签,类型为ndarray
:param test_sample:原始测试集中的新闻文本,类型为ndarray
:return 预测结果,类型为ndarray
'''

#********* Begin *********#
vec = CountVectorizer()
X_train_count_vectorizer = vec.fit_transform(train_sample)
X_test_count_vectorizer = vec.transform(test_sample)

tfidf = TfidfTransformer()
X_train = tfidf.fit_transform(X_train_count_vectorizer)
X_test = tfidf.transform(X_test_count_vectorizer)

clf = MultinomialNB(0.03)

clf.fit(X_train, train_label)
return clf.predict(X_test)
#********* End *********#

支持向量机

1. 什么是支持向量机 B

2. 间隔与支持向量 B

3. 对偶问题 AC

4. 核函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#encoding=utf8

import numpy as np

#实现核函数
def kernel(x,sigma=1.0):
'''
input:x(ndarray):样本
output:x(narray):转化后的值
'''
#********* Begin *********#
m = x.shape[0]
for i in range(m):
diff = x[i,0]-x[i,1]
x[i,0] = np.exp(diff*diff.T/(-2*sigma**2))
x[i,1] = np.exp(diff*diff.T/(-2*sigma**2))
#********* End *********#
return x

5. 软间隔

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#encoding=utf8
import numpy as np
class SVM:
def __init__(self, max_iter=100, kernel='linear'):
'''
input:max_iter(int):最大训练轮数
kernel(str):核函数,等于'linear'表示线性,等于'poly'表示多项式
'''
self.max_iter = max_iter
self._kernel = kernel
#初始化模型
def init_args(self, features, labels):
self.m, self.n = features.shape
self.X = features
self.Y = labels
self.b = 0.0
# 将Ei保存在一个列表里
self.alpha = np.ones(self.m)
self.E = [self._E(i) for i in range(self.m)]
# 松弛变量
self.C = 1.0
#********* Begin *********#
#kkt条件
def _KKT(self, i):
y_g = self._g(i)*self.Y[i]
if self.alpha[i] == 0:
return y_g >= 1
elif 0 < self.alpha[i] < self.C:
return y_g == 1
else:
return y_g <= 1

# g(x)预测值,输入xi(X[i])
def _g(self, i):
r = self.b
for j in range(self.m):
r += self.alpha[j]*self.Y[j]*self.kernel(self.X[i], self.X[j])
return r

# 核函数
def kernel(self, x1, x2):
if self._kernel == 'linear':
return sum([x1[k]*x2[k] for k in range(self.n)])
elif self._kernel == 'poly':
return (sum([x1[k]*x2[k] for k in range(self.n)]) + 1)**2
return 0

# E(x)为g(x)对输入x的预测值和y的差
def _E(self, i):
return self._g(i) - self.Y[i]

#初始alpha
def _init_alpha(self):
# 外层循环首先遍历所有满足0<a<C的样本点,检验是否满足KKT
index_list = [i for i in range(self.m) if 0 < self.alpha[i] < self.C]
# 否则遍历整个训练集
non_satisfy_list = [i for i in range(self.m) if i not in index_list]
index_list.extend(non_satisfy_list)

for i in index_list:
if self._KKT(i):
continue

E1 = self.E[i]
# 如果E2是+,选择最小的;如果E2是负的,选择最大的
if E1 >= 0:
j = min(range(self.m), key=lambda x: self.E[x])
else:
j = max(range(self.m), key=lambda x: self.E[x])
return i, j

#选择参数
def _compare(self, _alpha, L, H):
if _alpha > H:
return H
elif _alpha < L:
return L
else:
return _alpha

#训练
def fit(self, features, labels):
'''
input:features(ndarray):特征
label(ndarray):标签
'''
self.init_args(features, labels)

for t in range(self.max_iter):
# train
i1, i2 = self._init_alpha()

# 边界
if self.Y[i1] == self.Y[i2]:
L = max(0, self.alpha[i1]+self.alpha[i2]-self.C)
H = min(self.C, self.alpha[i1]+self.alpha[i2])
else:
L = max(0, self.alpha[i2]-self.alpha[i1])
H = min(self.C, self.C+self.alpha[i2]-self.alpha[i1])

E1 = self.E[i1]
E2 = self.E[i2]
# eta=K11+K22-2K12
eta = self.kernel(self.X[i1], self.X[i1]) + self.kernel(self.X[i2], self.X[i2]) - 2*self.kernel(self.X[i1], self.X[i2])
if eta <= 0:
continue

alpha2_new_unc = self.alpha[i2] + self.Y[i2] * (E2 - E1) / eta
alpha2_new = self._compare(alpha2_new_unc, L, H)

alpha1_new = self.alpha[i1] + self.Y[i1] * self.Y[i2] * (self.alpha[i2] - alpha2_new)

b1_new = -E1 - self.Y[i1] * self.kernel(self.X[i1], self.X[i1]) * (alpha1_new-self.alpha[i1]) - self.Y[i2] * self.kernel(self.X[i2], self.X[i1]) * (alpha2_new-self.alpha[i2])+ self.b
b2_new = -E2 - self.Y[i1] * self.kernel(self.X[i1], self.X[i2]) * (alpha1_new-self.alpha[i1]) - self.Y[i2] * self.kernel(self.X[i2], self.X[i2]) * (alpha2_new-self.alpha[i2])+ self.b

if 0 < alpha1_new < self.C:
b_new = b1_new
elif 0 < alpha2_new < self.C:
b_new = b2_new
else:
# 选择中点
b_new = (b1_new + b2_new) / 2

# 更新参数
self.alpha[i1] = alpha1_new
self.alpha[i2] = alpha2_new
self.b = b_new

self.E[i1] = self._E(i1)
self.E[i2] = self._E(i2)
return 'train done!'
#********* End *********#
def predict(self, data):
r = self.b
for i in range(self.m):
r += self.alpha[i] * self.Y[i] * self.kernel(data, self.X[i])
return 1 if r > 0 else -1
def score(self, X_test, y_test):
right_count = 0
for i in range(len(X_test)):
result = self.predict(X_test[i])
if result == y_test[i]:
right_count += 1
return right_count / len(X_test)
def _weight(self):
yx = self.Y.reshape(-1, 1)*self.X
self.w = np.dot(yx.T, self.alpha)
return self.w

6. sklearn中的支持向量机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#encoding=utf8
from sklearn.svm import SVC

def svm_classifier(train_data,train_label,test_data):
'''
input:train_data(ndarray):训练样本
train_label(ndarray):训练标签
test_data(ndarray):测试样本
output:predict(ndarray):预测结果
'''
#********* Begin *********#
svc = SVC()
svc.fit(train_data,train_label)
predict = svc.predict(test_data)
#********* End *********#
return predict

大实验之鸢尾花

华为云论坛_云计算论坛_开发者论坛_技术论坛-华为云


HUST ML Labs
https://eliteguo.github.io/2023/05/06/HUST-ML-Labs/
作者
EliteGUO
发布于
2023年5月6日
许可协议