Skip to content

Commit ff6c25e

Browse files
committed
ML
1 parent 75d04f0 commit ff6c25e

30 files changed

Lines changed: 3408 additions & 0 deletions

.DS_Store

12 KB
Binary file not shown.

LDA.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Tue Mar 27 11:04:41 2018
5+
6+
@author: tinghai
7+
"""
8+
9+
import numpy as np
10+
import lda
11+
import lda.datesets
12+
titles = lda.datasets.load_reuters_titles()
13+
for i in range(0,380):
14+
print(titles[i])
15+
16+
17+

LightGBM.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Mon Mar 12 22:41:38 2018
5+
6+
@author: tinghai
7+
"""
8+
9+
#LightGBM
10+
11+
#%% 代码形式
12+
import json
13+
import lightgbm as lgb
14+
import pandas as pd
15+
from sklearn.metrics import roc_auc_score
16+
17+
18+
#训练集与测试集
19+
lgb_train = lgb.Dataset(X_train, y_train)
20+
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
21+
22+
23+
#模型构建和预测1
24+
params = {
25+
'task': 'train',
26+
'boosting_type': 'gbdt',
27+
'objective': 'binary',
28+
'metric': {'l2', 'auc'},
29+
'num_leaves': 31,
30+
'learning_rate': 0.05,
31+
'feature_fraction': 0.9,
32+
'bagging_fraction': 0.8,
33+
'bagging_freq': 5,
34+
'verbose': 0
35+
}
36+
37+
gbm = lgb.train(params,
38+
lgb_train,
39+
num_boost_round=20,
40+
valid_sets=lgb_eval,
41+
early_stopping_rounds=5)
42+
43+
gbm.save_model('lightgbm/model.txt')
44+
45+
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
46+
47+
48+
#模型构建和预测2
49+
param = {
50+
'max_depth':6,
51+
'num_leaves':64,
52+
'learning_rate':0.03,
53+
'scale_pos_weight':1,
54+
'num_threads':40,
55+
'objective':'binary',
56+
'bagging_fraction':0.7,
57+
'bagging_freq':1,
58+
'min_sum_hessian_in_leaf':100
59+
}
60+
61+
bst=lgb.cv(param,train_data, num_boost_round=1000, nfold=3, early_stopping_rounds=30)
62+
63+
estimators = lgb.train(param,train_data,num_boost_round=len(bst['auc-mean']))
64+
65+
ypred = estimators.predict(dtest[predictors])
66+
67+
68+
#结果评估
69+
print('The roc of prediction is:', roc_auc_score(y_test, y_pred) )
70+
71+
72+
# dump model to json (and save to file)
73+
model_json = gbm.dump_model()
74+
75+
with open('lightgbm/model.json', 'w+') as f:
76+
json.dump(model_json, f, indent=4)
77+
78+
79+
#特征重要性
80+
print('Feature names:', gbm.feature_name())
81+
print('Feature importances:', list(gbm.feature_importance()))
82+
83+
84+
85+
86+
#%% 配置文件形式
87+
88+
#train.conf内容如下:
89+
90+
# 配置目标是用于训练
91+
task = train
92+
93+
# 训练方式
94+
boosting_type = gbdt
95+
96+
#目标 二分类
97+
objective = binary
98+
99+
# 损失函数
100+
metric = binary_logloss,auc
101+
102+
# frequence for metric output
103+
metric_freq = 1
104+
105+
# true if need output metric for training data, alias: tranining_metric, train_metric
106+
is_training_metric = true
107+
108+
# 特征最大分割
109+
max_bin = 255
110+
111+
#训练数据地址
112+
data = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/binary.train
113+
114+
#测试数据
115+
#valid_data = binary.test
116+
117+
# 树的棵树
118+
num_trees = 100
119+
120+
# 学习率
121+
learning_rate = 0.1
122+
123+
# number of leaves for one tree, alias: num_leaf
124+
num_leaves = 63
125+
126+
tree_learner = serial
127+
128+
# 最大线程个数
129+
# num_threads = 8
130+
131+
# feature sub-sample, will random select 80% feature to train on each iteration
132+
# alias: sub_feature
133+
feature_fraction = 0.8
134+
135+
# Support bagging (data sub-sample), will perform bagging every 5 iterations
136+
bagging_freq = 5
137+
138+
# Bagging farction, will random select 80% data on bagging
139+
# alias: sub_row
140+
bagging_fraction = 0.8
141+
142+
# minimal number data for one leaf, use this to deal with over-fit
143+
# alias : min_data_per_leaf, min_data
144+
min_data_in_leaf = 50
145+
146+
# minial sum hessians for one leaf, use this to deal with over-fit
147+
min_sum_hessian_in_leaf = 5.0
148+
149+
# save memory and faster speed for sparse feature, alias: is_sparse
150+
is_enable_sparse = true
151+
152+
# when data is bigger than memory size, set this to true. otherwise set false will have faster speed
153+
# alias: two_round_loading, two_round
154+
use_two_round_loading = false
155+
156+
# true if need to save data to binary file and application will auto load data from binary file next time
157+
# alias: is_save_binary, save_binary
158+
is_save_binary_file = false
159+
160+
# 模型输出文件
161+
output_model = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/LightGBM_model.txt
162+
machine_list_file = /Users/shuubiasahi/Documents/githup/LightGBM/examples/binary_classification/
163+
164+
# end
165+
166+
167+
168+
#模型训练
169+
./lightgbm config=train.conf
170+
171+
172+
173+
174+
175+
176+
177+
178+
179+

0 commit comments

Comments
 (0)