ナード戦隊データマン

データサイエンスを用いて悪と戦うぞ

グリーディ法によるxgboostハイパーパラメータのサーチ

GridSearchCVを用いて、あらゆるパラメータの組合せをサーチできれば、大局的な最適パラメータを探すことが可能ですが、組合せの総数が多すぎて、計算時間がかかりすぎてしまう場合があります。その場合、局所最適な方法でサーチするともう少し早く実行できます。

1回目: max_depth, min_child_weight

param1 = {
 'max_depth':[x for x in range(3,10,2)],
 'min_child_weight':[x for x in range(1,6,2)]
}

grid1 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=5,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param1, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid1.fit(X,y)
grid1.grid_scores_, grid1.best_params_, grid1.best_score_
([mean: 0.99404, std: 0.00425, params: {'max_depth': 3, 'min_child_weight': 1},
  mean: 0.99254, std: 0.00561, params: {'max_depth': 3, 'min_child_weight': 3},
  mean: 0.99095, std: 0.00657, params: {'max_depth': 3, 'min_child_weight': 5},
  mean: 0.99326, std: 0.00472, params: {'max_depth': 5, 'min_child_weight': 1},
  mean: 0.99290, std: 0.00522, params: {'max_depth': 5, 'min_child_weight': 3},
  mean: 0.99082, std: 0.00667, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: 0.99325, std: 0.00451, params: {'max_depth': 7, 'min_child_weight': 1},
  mean: 0.99290, std: 0.00522, params: {'max_depth': 7, 'min_child_weight': 3},
  mean: 0.99082, std: 0.00667, params: {'max_depth': 7, 'min_child_weight': 5},
  mean: 0.99325, std: 0.00451, params: {'max_depth': 9, 'min_child_weight': 1},
  mean: 0.99290, std: 0.00522, params: {'max_depth': 9, 'min_child_weight': 3},
  mean: 0.99082, std: 0.00667, params: {'max_depth': 9, 'min_child_weight': 5}],
 {'max_depth': 3, 'min_child_weight': 1},
 0.9940374806981422)

2回目: max_depth

min_child_weightは1とわかったため、max_depthの範囲をもっと細かくサーチします。

param2 = {
 'max_depth':[1,2,3,4],
}

grid2 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=5,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param2, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid2.fit(X,y)
grid2.grid_scores_, grid2.best_params_, grid2.best_score_
([mean: 0.99112, std: 0.00688, params: {'max_depth': 1},
  mean: 0.99383, std: 0.00391, params: {'max_depth': 2},
  mean: 0.99404, std: 0.00425, params: {'max_depth': 3},
  mean: 0.99351, std: 0.00424, params: {'max_depth': 4}],
 {'max_depth': 3},
 0.9940374806981422)

三回目: gamma

param3 = {
    'gamma': [i/10.0 for i in range(0,5)]
}

grid3 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=3,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param3, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid3.fit(X,y)
grid3.grid_scores_, grid3.best_params_, grid3.best_score_
([mean: 0.99404, std: 0.00425, params: {'gamma': 0.0},
  mean: 0.99437, std: 0.00396, params: {'gamma': 0.1},
  mean: 0.99411, std: 0.00417, params: {'gamma': 0.2},
  mean: 0.99392, std: 0.00460, params: {'gamma': 0.3},
  mean: 0.99352, std: 0.00458, params: {'gamma': 0.4}],
 {'gamma': 0.1},
 0.9943728261039103)

4回目: gamma

gammaについて0.1周辺の値を細かくサーチします。

param4 = {
    'gamma': [0.08, 0.09, 0.1, 0.11, 0.12]
}

grid4 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=3,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param4, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid4.fit(X,y)
grid4.grid_scores_, grid4.best_params_, grid4.best_score_
([mean: 0.99404, std: 0.00420, params: {'gamma': 0.08},
  mean: 0.99417, std: 0.00421, params: {'gamma': 0.09},
  mean: 0.99437, std: 0.00396, params: {'gamma': 0.1},
  mean: 0.99450, std: 0.00403, params: {'gamma': 0.11},
  mean: 0.99424, std: 0.00409, params: {'gamma': 0.12}],
 {'gamma': 0.11},
 0.994504494668268)

5回目: subsample, colsample_bytree

param5 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}

grid5 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=3,
    min_child_weight=1,
    gamma=0.11,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param5, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid5.fit(X,y)
grid5.grid_scores_, grid5.best_params_, grid5.best_score_
([mean: 0.99319, std: 0.00455, params: {'colsample_bytree': 0.6, 'subsample': 0.6},
  mean: 0.99319, std: 0.00446, params: {'colsample_bytree': 0.6, 'subsample': 0.7},
  mean: 0.99331, std: 0.00454, params: {'colsample_bytree': 0.6, 'subsample': 0.8},
  mean: 0.99300, std: 0.00526, params: {'colsample_bytree': 0.6, 'subsample': 0.9},
  mean: 0.99325, std: 0.00452, params: {'colsample_bytree': 0.7, 'subsample': 0.6},
  mean: 0.99325, std: 0.00425, params: {'colsample_bytree': 0.7, 'subsample': 0.7},
  mean: 0.99319, std: 0.00471, params: {'colsample_bytree': 0.7, 'subsample': 0.8},
  mean: 0.99292, std: 0.00464, params: {'colsample_bytree': 0.7, 'subsample': 0.9},
  mean: 0.99318, std: 0.00435, params: {'colsample_bytree': 0.8, 'subsample': 0.6},
  mean: 0.99370, std: 0.00416, params: {'colsample_bytree': 0.8, 'subsample': 0.7},
  mean: 0.99450, std: 0.00403, params: {'colsample_bytree': 0.8, 'subsample': 0.8},
  mean: 0.99338, std: 0.00436, params: {'colsample_bytree': 0.8, 'subsample': 0.9},
  mean: 0.99291, std: 0.00453, params: {'colsample_bytree': 0.9, 'subsample': 0.6},
  mean: 0.99391, std: 0.00415, params: {'colsample_bytree': 0.9, 'subsample': 0.7},
  mean: 0.99397, std: 0.00379, params: {'colsample_bytree': 0.9, 'subsample': 0.8},
  mean: 0.99298, std: 0.00446, params: {'colsample_bytree': 0.9, 'subsample': 0.9}],
 {'colsample_bytree': 0.8, 'subsample': 0.8},
 0.994504494668268)

6回目: reg_alpha

param6 = {
 'reg_alpha':[0.001, 0.01, 0.1, 1, 10]
}

grid6 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=3,
    min_child_weight=1,
    gamma=0.11,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param6, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid6.fit(X,y)
grid6.grid_scores_, grid6.best_params_, grid6.best_score_
([mean: 0.99450, std: 0.00403, params: {'reg_alpha': 0.001},
  mean: 0.99404, std: 0.00422, params: {'reg_alpha': 0.01},
  mean: 0.99411, std: 0.00417, params: {'reg_alpha': 0.1},
  mean: 0.99293, std: 0.00499, params: {'reg_alpha': 1},
  mean: 0.99016, std: 0.00704, params: {'reg_alpha': 10}],
 {'reg_alpha': 0.001},
 0.994504494668268)

7回目: reg_alpha

param7 = {
 'reg_alpha':[0.0008, 0.001, 0.003]
}

grid7 = GridSearchCV(estimator = XGBClassifier(
    learning_rate = 0.1, 
    n_estimators=100, 
    max_depth=3,
    min_child_weight=1,
    gamma=0.11,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
), param_grid = param7, scoring='roc_auc', n_jobs=4, iid=False, cv=5)
grid7.fit(X,y)
grid7.grid_scores_, grid7.best_params_, grid7.best_score_
([mean: 0.99450, std: 0.00403, params: {'reg_alpha': 0.0008},
  mean: 0.99450, std: 0.00403, params: {'reg_alpha': 0.001},
  mean: 0.99437, std: 0.00409, params: {'reg_alpha': 0.003}],
 {'reg_alpha': 0.0008},
 0.994504494668268)

最終的な訓練: learning_rate, n_estimators

clf = XGBClassifier(
    learning_rate = 0.01, 
    n_estimators=1000,
    reg_alpha=0.0008,
    max_depth=3,
    min_child_weight=1,
    gamma=0.11,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'binary:logistic', 
    nthread=4,
    scale_pos_weight=1, 
    seed=42
).fit(X, y)

まとめ

  1. 少数のパラメータを選び、それについてGridSearchCVを回す。
  2. より細かくサーチできそうな場合は、直前のサーチで求まった値の周辺の値を選び、サーチする。
  3. 全て求まったらlearning_lateを減らし、n_estimatorsを増やして最終的な訓練をする。