kmeans算法用Python怎么实现
答案:1 悬赏:60 手机版
解决时间 2021-04-05 12:20
- 提问者网友:箛茗
- 2021-04-04 15:51
kmeans算法用Python怎么实现
最佳答案
- 五星知识达人网友:未来江山和你
- 2021-04-04 16:23
第一种: 引用scikit-learn包
from sklearn.cluster import KMeans
k = 10 # Kmeans的k值
model = Kmeans(n_clusters=k)
X = [[1, 2], [1, 3], [2, 1], ....] # 改成你的数据
model.fit(X)
# 然后就训练好了, 可以查看model的属性
model.cluster_centers
model.labels_第二种: 自己写代码实现
import numpy as np
import random
data = [[1, 1, 1], [1, 1, 3], [1, 2, 1], [5, 1, 1], [5, 1, 2], [5, 2, 1], [5, 5, 5], [5, 5, 4], [5, 4, 4]]
data = np.array(data)
k = 4 # kmeans的k
n_iteration = 500 # 最大迭代次数
# 求初始化的k个质心(这k个质心必须包含在k个点的凸空间内)
center = np.matrix(np.zeros((k, len(data[0]))))
center_after = np.matrix(np.zeros((k, len(data[0]))))
for i in range(len(data[0])):
center[:, i] = min(data[:, i]) + (max(data[:, i]) - min(data[:, i])) * np.random.rand(k, 1)
def calc_distance(x, y, distance='eucidean'):
x, y = np.array(x), np.array(y)
if distance == 'eucidean':
return np.sqrt(np.sum((y - x)**2))
n = 0
while 1:
n += 1
print('第%s次迭代' % n)
# 计算所有点到每个质心的距离, 将每个点分到距离最近的那个点那一类
# 9个点里哪个质心最近, 就分到第几个类
label = np.argmin(np.array([calc_distance(x, y) for x in data for y in center]).reshape(len(data), k), axis=1)
print(label)
# 重新计算质心
for i in range(k):
center_after[i] = np.mean(np.array([data[j] for j in range(len(data)) if label[j] == i]), axis=0)
if np.sum(np.abs(center_after - center)) < 0.01:
# print(np.sum(np.abs(center_after - center)))
print('相邻两次迭代改变甚小, 迭代结束')
break
if n > n_iteration:
print('迭代次数已达上限, 迭代结束')
break
center = center_after
from sklearn.cluster import KMeans
k = 10 # Kmeans的k值
model = Kmeans(n_clusters=k)
X = [[1, 2], [1, 3], [2, 1], ....] # 改成你的数据
model.fit(X)
# 然后就训练好了, 可以查看model的属性
model.cluster_centers
model.labels_第二种: 自己写代码实现
import numpy as np
import random
data = [[1, 1, 1], [1, 1, 3], [1, 2, 1], [5, 1, 1], [5, 1, 2], [5, 2, 1], [5, 5, 5], [5, 5, 4], [5, 4, 4]]
data = np.array(data)
k = 4 # kmeans的k
n_iteration = 500 # 最大迭代次数
# 求初始化的k个质心(这k个质心必须包含在k个点的凸空间内)
center = np.matrix(np.zeros((k, len(data[0]))))
center_after = np.matrix(np.zeros((k, len(data[0]))))
for i in range(len(data[0])):
center[:, i] = min(data[:, i]) + (max(data[:, i]) - min(data[:, i])) * np.random.rand(k, 1)
def calc_distance(x, y, distance='eucidean'):
x, y = np.array(x), np.array(y)
if distance == 'eucidean':
return np.sqrt(np.sum((y - x)**2))
n = 0
while 1:
n += 1
print('第%s次迭代' % n)
# 计算所有点到每个质心的距离, 将每个点分到距离最近的那个点那一类
# 9个点里哪个质心最近, 就分到第几个类
label = np.argmin(np.array([calc_distance(x, y) for x in data for y in center]).reshape(len(data), k), axis=1)
print(label)
# 重新计算质心
for i in range(k):
center_after[i] = np.mean(np.array([data[j] for j in range(len(data)) if label[j] == i]), axis=0)
if np.sum(np.abs(center_after - center)) < 0.01:
# print(np.sum(np.abs(center_after - center)))
print('相邻两次迭代改变甚小, 迭代结束')
break
if n > n_iteration:
print('迭代次数已达上限, 迭代结束')
break
center = center_after
我要举报
如以上问答信息为低俗、色情、不良、暴力、侵权、涉及违法等信息,可以点下面链接进行举报!
大家都在看
推荐资讯