import numpy as np
import xgboost as xgb
dtrain = xgb.DMatrix('./demo/data/agaricus.txt.train')
dtest = xgb.DMatrix('./demo/data/agaricus.txt.test')
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
print('Start running example to start from a initial prediction')
Start running example to start from a initial prediction
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
bst = xgb.train(param, dtrain, 1, watchlist)
[0] eval-error:0.042831 train-error:0.046522
ptrain = bst.predict(dtrain, output_margin = True) # output_margin? margin value 是什么?
ptest = bst.predict(dtest, output_margin = True)
dtrain.set_base_margin(ptrain)
dtest.set_base_margin(ptest)
print('This is result of running from initial prediction')
This is result of running from initial prediction
bst = xgb.train(param, dtrain, 1, watchlist)
[0] eval-error:0.021726 train-error:0.022263
从结果上来看,错误率似乎下降了。