Struct gbdt::gradient_boost::GBDT
source · [−]pub struct GBDT { /* private fields */ }
Expand description
The gradient boosting decision tree.
Implementations
sourceimpl GBDT
impl GBDT
sourcepub fn new(conf: &Config) -> GBDT
pub fn new(conf: &Config) -> GBDT
Return a new gbdt with manually set config.
Example
use gbdt::config::Config;
use gbdt::gradient_boost::GBDT;
// set config for algorithm
let mut cfg = Config::new();
cfg.set_feature_size(3);
cfg.set_max_depth(2);
cfg.set_min_leaf_size(1);
cfg.set_loss("SquaredError");
cfg.set_iterations(2);
// initialize GBDT algorithm
let mut gbdt = GBDT::new(&cfg);
sourcepub fn fit(&mut self, train_data: &mut DataVec)
pub fn fit(&mut self, train_data: &mut DataVec)
Fit the train data.
First, initialize and configure decision trees. Then train the model with certain iterations set by config.
Example
use gbdt::config::Config;
use gbdt::gradient_boost::GBDT;
use gbdt::decision_tree::{Data, DataVec, PredVec, ValueType};
// set config for algorithm
let mut cfg = Config::new();
cfg.set_feature_size(3);
cfg.set_max_depth(2);
cfg.set_min_leaf_size(1);
cfg.set_loss("SquaredError");
cfg.set_iterations(2);
// initialize GBDT algorithm
let mut gbdt = GBDT::new(&cfg);
// setup training data
let data1 = Data::new_training_data (
vec![1.0, 2.0, 3.0],
1.0,
1.0,
None
);
let data2 = Data::new_training_data (
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data (
vec![2.0, 2.0, 1.0],
1.0,
2.0,
None
);
let data4 = Data::new_training_data (
vec![2.0, 2.3, 1.2],
1.0,
0.0,
None
);
let mut training_data: DataVec = Vec::new();
training_data.push(data1.clone());
training_data.push(data2.clone());
training_data.push(data3.clone());
training_data.push(data4.clone());
// train the decision trees.
gbdt.fit(&mut training_data);
sourcepub fn predict(&self, test_data: &DataVec) -> PredVec
pub fn predict(&self, test_data: &DataVec) -> PredVec
Predict the given data.
Note that for log likelyhood loss type, the predicted value will be normalized between 0 and 1, which is the possibility of label 1
Example
use gbdt::config::Config;
use gbdt::gradient_boost::GBDT;
use gbdt::decision_tree::{Data, DataVec, PredVec, ValueType};
// set config for algorithm
let mut cfg = Config::new();
cfg.set_feature_size(3);
cfg.set_max_depth(2);
cfg.set_min_leaf_size(1);
cfg.set_loss("SquaredError");
cfg.set_iterations(2);
// initialize GBDT algorithm
let mut gbdt = GBDT::new(&cfg);
// setup training data
let data1 = Data::new_training_data (
vec![1.0, 2.0, 3.0],
1.0,
1.0,
None
);
let data2 = Data::new_training_data (
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data (
vec![2.0, 2.0, 1.0],
1.0,
2.0,
None
);
let data4 = Data::new_training_data (
vec![2.0, 2.3, 1.2],
1.0,
0.0,
None
);
let mut training_data: DataVec = Vec::new();
training_data.push(data1.clone());
training_data.push(data2.clone());
training_data.push(data3.clone());
training_data.push(data4.clone());
// train the decision trees.
gbdt.fit(&mut training_data);
// setup the test data
let mut test_data: DataVec = Vec::new();
test_data.push(data1.clone());
test_data.push(data2.clone());
test_data.push(data3.clone());
test_data.push(data4.clone());
println!("{:?}", gbdt.predict(&test_data));
Panic
If the training process is not completed, thus, the number of trees that have been
is less than the iteration configuration in self.conf
, it will panic.
sourcepub fn predict_multiclass(
&self,
test_data: &DataVec,
class_num: usize
) -> (Vec<usize>, Vec<Vec<ValueType>>)
pub fn predict_multiclass(
&self,
test_data: &DataVec,
class_num: usize
) -> (Vec<usize>, Vec<Vec<ValueType>>)
Predict multi class data and return the probabilities for each class. The loss type should be “multi:softmax” or “multi:softprob”
test_data: the test set
class_num: the number of class
output: the predicted class label, the predicted possiblity for each class
Example
use gbdt::gradient_boost::GBDT;
use gbdt::input::{load, InputFormat};
use gbdt::decision_tree::DataVec;
let gbdt =
GBDT::from_xgoost_dump("xgb-data/xgb_multi_softmax/gbdt.model", "multi:softmax").unwrap();
let test_file = "xgb-data/xgb_multi_softmax/dermatology.data.test";
let mut fmt = InputFormat::csv_format();
fmt.set_label_index(34);
let test_data: DataVec = load(test_file, fmt).unwrap();
let (labels, probs) = gbdt.predict_multiclass(&test_data, 6);
sourcepub fn print_trees(&self)
pub fn print_trees(&self)
Print the tress for debug
Example
use gbdt::config::Config;
use gbdt::gradient_boost::GBDT;
use gbdt::decision_tree::{Data, DataVec, PredVec, ValueType};
// set config for algorithm
let mut cfg = Config::new();
cfg.set_feature_size(3);
cfg.set_max_depth(2);
cfg.set_min_leaf_size(1);
cfg.set_loss("SquaredError");
cfg.set_iterations(2);
// initialize GBDT algorithm
let mut gbdt = GBDT::new(&cfg);
// setup training data
let data1 = Data::new_training_data (
vec![1.0, 2.0, 3.0],
1.0,
1.0,
None
);
let data2 = Data::new_training_data (
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data (
vec![2.0, 2.0, 1.0],
1.0,
2.0,
None
);
let data4 = Data::new_training_data (
vec![2.0, 2.3, 1.2],
1.0,
0.0,
None
);
let mut dv: DataVec = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// train the decision trees.
gbdt.fit(&mut dv);
// print the tree.
gbdt.print_trees();
sourcepub fn save_model(&self, filename: &str) -> Result<(), Box<dyn Error>>
pub fn save_model(&self, filename: &str) -> Result<(), Box<dyn Error>>
Save the model to a file using serde.
Example
use gbdt::config::Config;
use gbdt::gradient_boost::GBDT;
use gbdt::decision_tree::{Data, DataVec, PredVec, ValueType};
// set config for algorithm
let mut cfg = Config::new();
cfg.set_feature_size(3);
cfg.set_max_depth(2);
cfg.set_min_leaf_size(1);
cfg.set_loss("SquaredError");
cfg.set_iterations(2);
// initialize GBDT algorithm
let mut gbdt = GBDT::new(&cfg);
// setup training data
let data1 = Data::new_training_data (
vec![1.0, 2.0, 3.0],
1.0,
1.0,
None
);
let data2 = Data::new_training_data (
vec![1.1, 2.1, 3.1],
1.0,
1.0,
None
);
let data3 = Data::new_training_data (
vec![2.0, 2.0, 1.0],
1.0,
2.0,
None
);
let data4 = Data::new_training_data (
vec![2.0, 2.3, 1.2],
1.0,
0.0,
None
);
let mut dv: DataVec = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());
// train the decision trees.
gbdt.fit(&mut dv);
// Save model.
// gbdt.save_model("gbdt.model");
sourcepub fn from_xgoost_dump(
model_file: &str,
objective: &str
) -> Result<Self, Box<dyn Error>>
pub fn from_xgoost_dump(
model_file: &str,
objective: &str
) -> Result<Self, Box<dyn Error>>
Load the model from xgboost’s model. The xgboost’s model should be converted by “convert_xgboost.py”
Example
use gbdt::gradient_boost::GBDT;
let gbdt =
GBDT::from_xgoost_dump("xgb-data/xgb_binary_logistic/gbdt.model", "binary:logistic").unwrap();
Error
Error when get exception during model file parsing.