public class Strategy
extends Object
implements scala.Serializable
org.apache.spark.mllib.tree.configuration.Algo.Classification
,
org.apache.spark.mllib.tree.configuration.Algo.Regression
param: impurity Criterion used for information gain calculation.
Supported for Classification: Gini
,
Entropy
.
Supported for Regression: Variance
.
param: maxDepth Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means
1 internal node + 2 leaf nodes).
param: numClasses Number of classes for classification.
(Ignored for regression.)
Default value is 2 (binary classification).
param: maxBins Maximum number of bins used for discretizing continuous features and
for choosing how to split on features at each node.
More bins give higher granularity.
param: quantileCalculationStrategy Algorithm for calculating quantiles. Supported:
org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort
param: categoricalFeaturesInfo A map storing information about the categorical variables and the
number of discrete values they take. An entry (n -> k)
indicates that feature n is categorical with k categories
indexed from 0: {0, 1, ..., k-1}.
param: minInstancesPerNode Minimum number of instances each child must have after split.
Default value is 1. If a split cause left or right child
to have less than minInstancesPerNode,
this split will not be considered as a valid split.
param: minInfoGain Minimum information gain a split must get. Default value is 0.0.
If a split has less information gain than minInfoGain,
this split will not be considered as a valid split.
param: maxMemoryInMB Maximum memory in MB allocated to histogram aggregation. Default value is
256 MB. If too small, then 1 node will be split per iteration, and
its aggregates may exceed this size.
param: subsamplingRate Fraction of the training data used for learning decision tree.
param: useNodeIdCache If this is true, instead of passing trees to executors, the algorithm will
maintain a separate RDD of node Id cache for each row.
param: checkpointInterval How often to checkpoint when the node Id cache gets updated.
E.g. 10 means that the cache will get checkpointed every 10 updates. If
the checkpoint directory is not set in
SparkContext
, this setting is ignored.Constructor and Description |
---|
Strategy(scala.Enumeration.Value algo,
Impurity impurity,
int maxDepth,
int numClasses,
int maxBins,
scala.Enumeration.Value quantileCalculationStrategy,
scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo,
int minInstancesPerNode,
double minInfoGain,
int maxMemoryInMB,
double subsamplingRate,
boolean useNodeIdCache,
int checkpointInterval) |
Strategy(scala.Enumeration.Value algo,
Impurity impurity,
int maxDepth,
int numClasses,
int maxBins,
java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Java-friendly constructor for
Strategy |
Modifier and Type | Method and Description |
---|---|
scala.Enumeration.Value |
algo() |
scala.collection.immutable.Map<Object,Object> |
categoricalFeaturesInfo() |
int |
checkpointInterval() |
Strategy |
copy()
Returns a shallow copy of this instance.
|
static Strategy |
defaultStrategy(scala.Enumeration.Value algo)
Construct a default set of parameters for
DecisionTree |
static Strategy |
defaultStrategy(String algo)
Construct a default set of parameters for
DecisionTree |
scala.Enumeration.Value |
getAlgo() |
scala.collection.immutable.Map<Object,Object> |
getCategoricalFeaturesInfo() |
int |
getCheckpointInterval() |
Impurity |
getImpurity() |
int |
getMaxBins() |
int |
getMaxDepth() |
int |
getMaxMemoryInMB() |
double |
getMinInfoGain() |
int |
getMinInstancesPerNode() |
int |
getNumClasses() |
scala.Enumeration.Value |
getQuantileCalculationStrategy() |
double |
getSubsamplingRate() |
boolean |
getUseNodeIdCache() |
Impurity |
impurity() |
boolean |
isMulticlassClassification() |
boolean |
isMulticlassWithCategoricalFeatures() |
int |
maxBins() |
int |
maxDepth() |
int |
maxMemoryInMB() |
double |
minInfoGain() |
int |
minInstancesPerNode() |
int |
numClasses() |
scala.Enumeration.Value |
quantileCalculationStrategy() |
void |
setAlgo(String algo)
Sets Algorithm using a String.
|
void |
setCategoricalFeaturesInfo(java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Sets categoricalFeaturesInfo using a Java Map.
|
void |
setCheckpointInterval(int x$1) |
void |
setImpurity(Impurity x$1) |
void |
setMaxBins(int x$1) |
void |
setMaxDepth(int x$1) |
void |
setMaxMemoryInMB(int x$1) |
void |
setMinInfoGain(double x$1) |
void |
setMinInstancesPerNode(int x$1) |
void |
setNumClasses(int x$1) |
void |
setQuantileCalculationStrategy(scala.Enumeration.Value x$1) |
void |
setSubsamplingRate(double x$1) |
void |
setUseNodeIdCache(boolean x$1) |
double |
subsamplingRate() |
boolean |
useNodeIdCache() |
public Strategy(scala.Enumeration.Value algo, Impurity impurity, int maxDepth, int numClasses, int maxBins, scala.Enumeration.Value quantileCalculationStrategy, scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo, int minInstancesPerNode, double minInfoGain, int maxMemoryInMB, double subsamplingRate, boolean useNodeIdCache, int checkpointInterval)
public Strategy(scala.Enumeration.Value algo, Impurity impurity, int maxDepth, int numClasses, int maxBins, java.util.Map<Integer,Integer> categoricalFeaturesInfo)
Strategy
algo
- (undocumented)impurity
- (undocumented)maxDepth
- (undocumented)numClasses
- (undocumented)maxBins
- (undocumented)categoricalFeaturesInfo
- (undocumented)public static Strategy defaultStrategy(String algo)
DecisionTree
algo
- "Classification" or "Regression"public static Strategy defaultStrategy(scala.Enumeration.Value algo)
DecisionTree
algo
- Algo.Classification or Algo.Regressionpublic scala.Enumeration.Value algo()
public Impurity impurity()
public void setImpurity(Impurity x$1)
public int maxDepth()
public void setMaxDepth(int x$1)
public int numClasses()
public void setNumClasses(int x$1)
public int maxBins()
public void setMaxBins(int x$1)
public scala.Enumeration.Value quantileCalculationStrategy()
public void setQuantileCalculationStrategy(scala.Enumeration.Value x$1)
public scala.collection.immutable.Map<Object,Object> categoricalFeaturesInfo()
public int minInstancesPerNode()
public void setMinInstancesPerNode(int x$1)
public double minInfoGain()
public void setMinInfoGain(double x$1)
public int maxMemoryInMB()
public void setMaxMemoryInMB(int x$1)
public double subsamplingRate()
public void setSubsamplingRate(double x$1)
public boolean useNodeIdCache()
public void setUseNodeIdCache(boolean x$1)
public int checkpointInterval()
public void setCheckpointInterval(int x$1)
public boolean isMulticlassClassification()
public boolean isMulticlassWithCategoricalFeatures()
public void setAlgo(String algo)
algo
- (undocumented)public void setCategoricalFeaturesInfo(java.util.Map<Integer,Integer> categoricalFeaturesInfo)
categoricalFeaturesInfo
- (undocumented)public Strategy copy()
public scala.Enumeration.Value getAlgo()
public Impurity getImpurity()
public int getMaxDepth()
public int getNumClasses()
public int getMaxBins()
public scala.Enumeration.Value getQuantileCalculationStrategy()
public scala.collection.immutable.Map<Object,Object> getCategoricalFeaturesInfo()
public int getMinInstancesPerNode()
public double getMinInfoGain()
public int getMaxMemoryInMB()
public double getSubsamplingRate()
public boolean getUseNodeIdCache()
public int getCheckpointInterval()