""" 模型的网格搜索与检查验证 程序来源:https://www.cnblogs.com/ll409546297/p/11231299.html 20221113 byp 修正joblib问题 """ from sklearn.datasets import load_iris from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import StandardScaler import joblib # from sklearn.externals import joblib 20221113修改byp # K-近邻算法 def k_near_test(): # 1、原始数据 li = load_iris() # print(li.data) # print(li.DESCR) # 2、处理数据 data = li.data target = li.target x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25) # 3、特征工程 std = StandardScaler() x_train = std.fit_transform(x_train, y_train) x_test = std.transform(x_test) # 4、算法 knn = KNeighborsClassifier(n_neighbors=2) knn.fit(x_train, y_train) # 预估 y_predict = knn.predict(x_test) print("预估值:", y_predict) # 5、评估 source = knn.score(x_test, y_test) print("准确率:", source) """ 交叉验证与网格搜索: 交叉验证: 1、将一个训练集分成对等的n份(cv值) 2、将第一个作为验证集,其他作为训练集,得出准确率 3、将第二个作为验证集,其他作为训练集,知道第n个为验证集,得出准确率 4、把得出的n个准确率,求平均值,得出模型平均准确率 网格搜索: 1、用于参数的调整(比如,k近邻算法中的n_neighbors值) 2、通过不同参数传入进行验证(超参数),得出最优的参数值(最优n_neighbors值) """ # 4、算法 knn_gc = KNeighborsClassifier() # 构造值进行搜索 param = {"n_neighbors": [2, 3, 5]} # 网格搜索 gc = GridSearchCV(knn_gc, param_grid=param,cv=4) gc.fit(x_train, y_train) # 5、评估 print("测试集的准确率:", gc.score(x_test, y_test)) print("交叉验证当中最好的结果:", gc.best_score_) print("选择最好的模型:", gc.best_estimator_) print("每个超参数每次交叉验证结果:", gc.cv_results_) if __name__ == '__main__': k_near_test()