如题,有大佬写过这个算法吗? sklearn 中的 kmeans 算法用的是欧式距离,且不支持修改。
1
SleipniR 2019-05-06 14:31:05 +08:00 1
|
3
SleipniR 2019-05-06 15:46:17 +08:00 1
好像需要梯子:
from sklearn.cluster import k_means_ from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances from sklearn.preprocessing import StandardScaler def create_cluster(sparse_data, nclust = 10): # Manually override euclidean def euc_dist(X, Y = None, Y_norm_squared = None, squared = False): #return pairwise_distances(X, Y, metric = 'cosine', n_jobs = 10) return cosine_similarity(X, Y) k_means_.euclidean_distances = euc_dist scaler = StandardScaler(with_mean=False) sparse_data = scaler.fit_transform(sparse_data) kmeans = k_means_.KMeans(n_clusters = nclust, n_jobs = 20, random_state = 3425) _ = kmeans.fit(sparse_data) return kmeans.labels_ |