import numpy as np
def three_n(data_set):
m = data_set.shape[0] # m 樣本個數(shù)(n維)
dist_mat = np.diag(np.ones(m) * np.inf) # 初始化 距離矩陣
for r in range(m):
for c in range(r + 1, m):
dist_mat[r, c] = np.linalg.norm(data_set[r] - data_set[c])
dist_mat = dist_mat + dist_mat.T # 距離矩陣 計算完成
adjacency = np.zeros((m, m)) # 初始化 有向圖的鄰接矩陣
nonzero_len_init = 0
r = 0
while 1:
row_min_index = np.argmin(dist_mat, axis=1) # 每一行的最小距離的索引,也就是每個數(shù)據(jù)的第k最近鄰居
for i in range(m): # 行和為每一個頂點的出度,代表著距離每一個頂點最近的(1的個數(shù))個
adjacency[i, row_min_index[i]] = 1 # 列和為每一個頂點的入度,代表著鄰居的個數(shù),也是該頂點出現(xiàn)在其他頂點的鄰居中的次數(shù),即“密度”
dist_mat[i, row_min_index[i]] = np.inf # 更新距離矩陣
density = np.sum(adjacency, axis=0, dtype=np.int32) # 計算每個頂點的“密度”
nonzero_len = len(np.nonzero(density)[0]) # 統(tǒng)計“密度”不為0的頂點個數(shù)
r += 1
if nonzero_len == m or nonzero_len == nonzero_len_init:
nn = {}
p = 0
for i in range(m):
nn[i + 1] = np.nonzero(adjacency.T)[1].tolist()[p:p + density[i]]
nn[i + 1] = [x + 1 for x in nn[i + 1]]
p = p + density[i]
return density.tolist(), nn, r
nonzero_len_init = nonzero_len
s = np.mat('1,2,3;2,3,5;4,6,7;1,2,1;4,4,3;2,6,9;1,2,5')
d = three_n(s)
print(d)

運行結(jié)果
函數(shù)返回結(jié)果:
第一個元素:每一個數(shù)據(jù)點的鄰居數(shù)(被其他數(shù)據(jù)點的鄰域覆蓋的次數(shù),即“密度”)
第二個元素:每一個數(shù)據(jù)點的鄰域集(每一列)
第三個元素:supk