日韩女女同性aa女同,二区不卡在线

KNN的三要素

k值的選擇、距離度量以及分類決策規(guī)則是k近鄰算法的三個基本要素。給定一個訓(xùn)練數(shù)據(jù)集，對新的輸入實例，在訓(xùn)練數(shù)據(jù)集中找到與該實例最鄰近的 k 個實例，這 k 個實例的多數(shù)屬于某個類，就把該輸入實例分為這個類。

1、具體完整的代碼清單（python3版本）

from __future__ import print_function
import numpy as np
import operator
def file2matrix(filename):
    """
    Desc:
        導(dǎo)入訓(xùn)練數(shù)據(jù)
    parameters:
        filename: 數(shù)據(jù)文件路徑
    return: 
        數(shù)據(jù)矩陣 returnMat 和對應(yīng)的類別 classLabelVector
    """
    fr = open(filename)
    # 獲得文件中的數(shù)據(jù)行的行數(shù)
    numberOfLines = len(fr.readlines())
    # 生成對應(yīng)的空矩陣
    # 例如：zeros(2，3)就是生成一個 2*3的矩陣，各個位置上全是 0 
    returnMat = np.zeros((numberOfLines, 3))  # prepare matrix to return
    classLabelVector = []  # prepare labels return
    fr = open(filename)
    index = 0
    for line in fr.readlines():
        # str.strip([chars]) --返回已移除字符串頭尾指定字符所生成的新字符串
        line = line.strip()
        # 以 '\t' 切割字符串
        listFromLine = line.split('\t')
        # 每列的屬性數(shù)據(jù)
        returnMat[index, :] = listFromLine[0:3]
        # 每列的類別數(shù)據(jù)，就是 label 標(biāo)簽數(shù)據(jù)
        classLabelVector.append(int(listFromLine[-1]))
        index += 1
    # 返回數(shù)據(jù)矩陣returnMat和對應(yīng)的類別classLabelVector
    return returnMat, classLabelVector


def autoNorm(dataSet):
    """
    Desc:
        歸一化特征值，消除特征之間量級不同導(dǎo)致的影響
    parameter:
        dataSet: 數(shù)據(jù)集
    return:
        歸一化后的數(shù)據(jù)集 normDataSet. ranges和minVals即最小值與范圍，并沒有用到

    歸一化公式：
        Y = (X-Xmin)/(Xmax-Xmin)
        其中的 min 和 max 分別是數(shù)據(jù)集中的最小特征值和最大特征值。該函數(shù)可以自動將數(shù)字特征值轉(zhuǎn)化為0到1的區(qū)間。
    """
    # 計算每種屬性的最大值、最小值、范圍
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    # 極差
    ranges = maxVals - minVals
    normDataSet = np.zeros(np.shape(dataSet))
    m = dataSet.shape[0]
    # 生成與最小值之差組成的矩陣
    normDataSet = dataSet - np.tile(minVals, (m, 1))
    # 將最小值之差除以范圍組成矩陣
    normDataSet = normDataSet /np.tile(ranges, (m, 1))  # element wise divide
    return normDataSet, ranges, minVals

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    #距離度量 度量公式為歐氏距離
    diffMat = np.tile(inX, (dataSetSize,1))-dataSet  
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5

    #將距離排序：從小到大
    sortedDistIndicies = distances.argsort()
    #選取前K個最短距離， 選取這K個中最多的分類類別
    classCount={}
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

def datingClassTest():
    """
    Desc:
        對約會網(wǎng)站的測試方法
    parameters:
        none
    return:
        錯誤數(shù)
    """
    # 設(shè)置測試數(shù)據(jù)的的一個比例（訓(xùn)練數(shù)據(jù)集比例=1-hoRatio）
    hoRatio = 0.1  # 測試范圍,一部分測試一部分作為樣本
    # 從文件中加載數(shù)據(jù)
    datingDataMat, datingLabels = file2matrix('/Users/mac/Python_projects/ML_data/機器學(xué)習(xí)/2.KNN/datingTestSet2.txt')  # load data setfrom file
    # 歸一化數(shù)據(jù)
    normMat, ranges, minVals = autoNorm(datingDataMat)
    # m 表示數(shù)據(jù)的行數(shù)，即矩陣的第一維
    m = normMat.shape[0]
    # 設(shè)置測試的樣本數(shù)量， numTestVecs:m表示訓(xùn)練樣本的數(shù)量
    numTestVecs = int(m * hoRatio)
    print('numTestVecs=', numTestVecs)
    errorCount = 0.0
    for i in range(numTestVecs):
        # 對數(shù)據(jù)測試
        classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3)
        print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i]))
        if (classifierResult != datingLabels[i]): errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
    print(errorCount)
    
if __name__ == '__main__':
    datingClassTest()

http://www.itdecent.cn/p/9519f1984c70 (tile())

https://blog.csdn.net/yuan1125/article/details/69388446 (argsort())

https://www.cnblogs.com/lianwl/archive/2013/09/11/3314964.html (np.tile(inX, (dataSetSize,1))-dataSet )

https://blog.csdn.net/qq_38669138/article/details/79094478 (operator.itemgetter)