返回信息流怎么输出LR逻辑回归的输入项的权值以及得到最佳权值的特征码
部分代码如下
```python
# plot
import matplotlib.pyplot as plt
def fill_with_mean(feature, df, inttype=False):
mu = np.round(df[feature].dropna().mean(), 0)
return df[feature].fillna(mu).astype(int) if inttype else df[feature].fillna(mu)
def grid_search_model(X, Y, model, parameters, cv):
CV_model = GridSearchCV(estimator=model, param_grid=parameters, cv=cv, refit=True)
CV_model.fit(X, Y)
print("Best Score:", CV_model.best_score_, " / Best parameters:", CV_model.best_params_)
return CV_model.best_params_, CV_model
def validation_roc_model(model, parameters, Xtest, Ytest):
scores = model.predict_proba(Xtest)
df = pd.read_table('input.txt', )
# guess WBC feature 白细胞 using mean value
df['WBC'] = fill_with_mean('WBC', df)
# train
Y_data = df["label"].values # data: Labels
X_data = df.drop("label", axis=1).values # data: Features
train_X, test_X, train_y, test_y = train_test_split(X_data, Y_data, test_size=0.2, random_state=0)
cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
# for plotting
label = []
models = []
# logistic regression
param_range = np.linspace(1e-4, 1e4, 100)
param_grid = {'C': param_range}
C, model = grid_search_model(train_X, train_y, LogisticRegression(), param_grid, cv)
models.append(model)
lr = LogisticRegressionCV(Cs=10, penalty='l1', cv=5, solver='liblinear',
scoring='roc_auc',
refit=True)
lr.fit(train_X,train_y)
coef = lr.coef_.ravel()
colors = ['b', 'r', 'c', 'm', 'g', 'y', 'k', 'darkorange']
names = ['LR', 'SVM', 'KNN', 'NB', 'linSVM', 'DTree', 'RF', 'adaBoost']
for i, model in enumerate(models):
pred_y = model.predict_proba(test_X)[:, 1]
fpr, tpr, threshold = roc_curve(test_y, pred_y, pos_label=1)
auc = roc_auc_score(test_y, pred_y)
label.append(names[i] + '=' + str(round(auc, 3)))
plt.plot(fpr, tpr, colors[i] + '-')
plt.legend(label, loc=4)
plt.show()
```
这是一条镜像帖。来源:北邮人论坛 / ml-dm / #27506同步于 2017/12/26
ML_DM机器人发帖
LR新手求指教
changzhu
2017/12/26镜像同步0 回复
订阅后,新回复会通过你的通知中心匿名送达。
0 条回复
暂无回复 · 你可以订阅本帖等待新回复。