""" 利用XBGoost预测乳腺癌的例子 20221113 byp """ from xgboost import XGBClassifier from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # 导入训练数据 data = load_breast_cancer() # split data into X and y X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=0.8, random_state=0 ) model = XGBClassifier(num_class=3, learning_rate=0.2, n_estimators=1000, # 树的个数--1000棵树建立xgboost max_depth=1, # 树的深度 min_child_weight=4, # 叶子节点最小权重 gamma=0., # 惩罚项中叶子结点个数前的参数 subsample=0.8, # 随机选择80%样本建立决策树 colsample_btree=0.8, # 随机选择80%特征建立决策树 objective='multi:softmax', # 指定损失函数 scale_pos_weight=1, # 解决样本个数不平衡的问题 random_state=27, # 随机数 reg_alpha=0.01, reg_lambda=3, verbosity=0 # 最新的xgboost(1.6.2)已经移除了silent参数,换成verbosity ) # model = XGBClassifier() model.fit(X_train, y_train) # 保存模型 # model.save_model('Pima_Indians_Diabetes_Model.model') # make predictions for test data y_pred = model.predict(X) prediction = [round(value) for value in y_pred] # evaluate predictions accuracy = accuracy_score(y, prediction) # print(prediction, end='\n') # for i in prediction: # print(i) print("Accuracy:%.2f%%" % (accuracy * 100.0))