pub struct DecisionTree { /* private fields */ }
Expand description

The decision tree.

Implementations

Return a new decision tree with default values (feature_size = 1, max_depth = 2, min_leaf_size = 1, loss = Loss::SquaredError, feature_sample_ratio = 1.0)

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();

Set the size of feautures. Training data and test data should have same feature size.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_size(3);

Set the max depth of the decision tree. The root node is considered to be in the layer 0.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_max_depth(2);

Set the minimum number of samples required to be at a leaf node during training.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_min_leaf_size(1);

Set the loss function type.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_loss(Loss::SquaredError);

Set the portion of features to be splited. When spliting a node, a subset of the features (feature_size * feature_sample_ratio) will be randomly selected to calculate impurity.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree};
let mut tree = DecisionTree::new();
tree.set_feature_sample_ratio(0.9);

Use the subset of the train_data to train a decision tree

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0,1,2];
tree.fit_n(&dv, &subset, &mut cache);

Use the samples in train_data to train the decision tree.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    1.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    2.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);

Inference the subset of the test_data. Return a vector of predicted values. If the i is in the subset, then output[i] is the prediction. If i is not in the subset, then output[i] is 0.0

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
let subset = [0,1,2];
println!("{:?}", tree.predict_n(&dv, &subset));


// output:
// [2.0, 0.75, 0.75, 0.0]
Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree’s feature size, it will panic.

Inference the values of samples in the test_data. Return a vector of the predicted values.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);
let data4 = Data::new_training_data(
    vec![2.0, 2.3, 1.2],
    1.0,
    3.0,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
tree.fit(&dv, &mut cache);


// set up the test data
let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());
dv.push(data4.clone());


// inference the test data with the decision tree
println!("{:?}", tree.predict(&dv));


// output:
// [2.0, 0.75, 0.75, 3.0]
Panic

If the function is called before the decision tree is trained, it will panic.

If the test data have a smaller feature size than the tree’s feature size, it will panic.

Print the decision tree. For debug use.

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);


tree.print();

// output:

//  ----DTNode { feature_index: 0, feature_value: 1.05, pred: 1.5, is_leaf: false }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 2.0, is_leaf: true }
//      ----DTNode { feature_index: 0, feature_value: 0.0, pred: 1.0, is_leaf: true }

Build a decision tree from xgboost’s model. xgboost can dump the model in JSON format. We used serde_json to parse a JSON string.

Example
use serde_json::{Result, Value};
use gbdt::decision_tree::DecisionTree;
let data = r#"
      { "nodeid": 0, "depth": 0, "split": 0, "split_condition": 750, "yes": 1, "no": 2, "missing": 2, "children": [
         { "nodeid": 1, "leaf": 25.7333336 },
         { "nodeid": 2, "leaf": 15.791667 }]}"#;
let node: Value = serde_json::from_str(data).unwrap();
let dt = DecisionTree::get_from_xgboost(&node);

For debug use. Return the number of nodes in current decision tree

Example
use gbdt::config::Loss;
use gbdt::decision_tree::{Data, DecisionTree, TrainingCache};
// set up training data
let data1 = Data::new_training_data(
    vec![1.0, 2.0, 3.0],
    1.0,
    2.0,
    None
);
let data2 = Data::new_training_data(
    vec![1.1, 2.1, 3.1],
    1.0,
    1.0,
    None
);
let data3 = Data::new_training_data(
    vec![2.0, 2.0, 1.0],
    1.0,
    0.5,
    None
);

let mut dv = Vec::new();
dv.push(data1.clone());
dv.push(data2.clone());
dv.push(data3.clone());


// train a decision tree
let mut tree = DecisionTree::new();
tree.set_feature_size(3);
tree.set_max_depth(2);
tree.set_min_leaf_size(1);
tree.set_loss(Loss::SquaredError);
let mut cache = TrainingCache::get_cache(3, &dv, 2);
let subset = [0, 1];
tree.fit_n(&dv, &subset, &mut cache);

assert_eq!(tree.len(), 3)

Returns true if the current decision tree is empty

Trait Implementations

Formats the value using the given formatter. Read more
Returns the “default value” for a type. Read more
Deserialize this value from the given Serde deserializer. Read more
Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations

Blanket Implementations

Gets the TypeId of self. Read more
Immutably borrows from an owned value. Read more
Mutably borrows from an owned value. Read more

Returns the argument unchanged.

Calls U::from(self).

That is, this conversion is whatever the implementation of [From]<T> for U chooses to do.

The type returned in the event of a conversion error.
Performs the conversion.
The type returned in the event of a conversion error.
Performs the conversion.