workflow

Type Members

class AutoCacheRule extends Rule with Logging
class AutoCachingOptimizer extends Optimizer

Optimizes a Pipeline DAG, with auto-caching
trait Chainable[A, B] extends AnyRef

This trait provides methods to chain an object with Estimators, LabelEstimators, and other Chainables to construct Pipelines.
abstract class Estimator[A, B] extends EstimatorOperator

An estimator has a fitRDD method which takes an input and emits a Transformer
class FittedPipeline[A, B] extends Chainable[A, B] with Serializable

This is the result of fitting a Pipeline.
case class Identity[T]()(implicit evidence$1: ClassTag[T]) extends Transformer[T, T] with Product with Serializable

This transformer performs a no-op on its input.
abstract class LabelEstimator[A, B, L] extends EstimatorOperator

A LabelEstimator has a fitRDDs method which takes input data and input labels, and emits a Transformer.
class NodeOptimizationRule extends Rule

Node-level optimization, such as selecting a Linear Solver
sealed trait Optimizable extends AnyRef
abstract class OptimizableEstimator[A, B] extends Estimator[A, B] with Optimizable

Represents a node-level optimizable Estimator and its optimization rules
abstract class OptimizableLabelEstimator[A, B, L] extends LabelEstimator[A, B, L] with Optimizable

Represents a node-level optimizable LabelEstimator and its optimization rules
abstract class OptimizableTransformer[A, B] extends Transformer[A, B] with Optimizable

Represents a node-level optimizable transformer and its optimization rules
abstract class Optimizer extends RuleExecutor
class Pipeline[A, B] extends Chainable[A, B]

A Pipeline takes data as input (single item or an RDD), and outputs some transformation of that data.
class PipelineDataset[T] extends PipelineResult[RDD[T]]

This class is a lazy wrapper around the output of a pipeline that was passed an RDD as input.
class PipelineDatum[T] extends PipelineResult[T]

This class is a lazy wrapper around the output of a pipeline that was passed a single datum as input.
class PipelineEnv extends AnyRef

PipelineEnv is an environment shared by multiple Pipelines, containing variables such as the Prefix state table and the current Pipeline Optimizer.
abstract class PipelineResult[T] extends AnyRef

A PipelineResult is a lazy wrapper around the result of applying a Pipeline to data.
case class Profile(ns: Long, rddMem: Long, driverMem: Long) extends Product with Serializable
abstract class Rule extends AnyRef

Represents a DAG transformation rule: A transformation from one DAG to a differently-executed but logically equivalent DAG.
abstract class RuleExecutor extends Logging
case class SampleProfile(scale: Long, profile: Profile) extends Product with Serializable
abstract class Transformer[A, B] extends TransformerOperator with Chainable[A, B]

Transformers are operators that may be applied both to single input items and to RDDs of input items.
case class TransformerChain[A, B, C](first: Transformer[A, B], second: Transformer[B, C])(implicit evidence$1: ClassTag[C]) extends Transformer[A, C] with Product with Serializable

A chain of two Transformers in a row (as a Transformer)
case class TransformerEstimatorChain[A, B, C](first: Transformer[A, B], second: Estimator[B, C])(implicit evidence$2: ClassTag[C]) extends Estimator[A, C] with Product with Serializable

A chain of a Transformer followed by an Estimator (as an Estimator)
case class TransformerLabelEstimatorChain[A, B, C, L](first: Transformer[A, B], second: LabelEstimator[B, C, L])(implicit evidence$3: ClassTag[C]) extends LabelEstimator[A, C, L] with Product with Serializable

A chain of a Transformer followed by a LabelEstimator (as a LabelEstimator)
trait WeightedNode extends AnyRef

A mix-in that attaches a weight to a node that represents how often it must iterate over its input.
trait WeightedOperator extends AnyRef

A mix-in that attaches a weight to an operator that represents how often it must iterate over its input.

Value Members

object AutoCacheRule
object DefaultOptimizer extends Optimizer

The default Pipeline optimizer used when executing pipelines.
object EquivalentNodeMergeRule extends Rule

A rule to merge equivalent nodes in the DAG.
object ExtractSaveablePrefixes extends Rule

Extract the prefixes of all Nodes whose state we want to save for reuse by other Pipeline apply and fit calls.
object Pipeline
object PipelineDataset
object PipelineDatum
object PipelineEnv
object SavedStateLoadRule extends Rule

A rule to load any saved state for the PipelineEnv.state prefix state table for nodes we want to consider either loading or saving the results of.
object Transformer extends Serializable
object UnusedBranchRemovalRule extends Rule

A rule to remove all nodes & sources in a graph that don't lead to any sink, and are effectively unused.
object WorkflowUtils

package workflow

Type Members

class AutoCacheRule extends Rule with Logging

class AutoCachingOptimizer extends Optimizer

trait Chainable[A, B] extends AnyRef

abstract class Estimator[A, B] extends EstimatorOperator

class FittedPipeline[A, B] extends Chainable[A, B] with Serializable

case class Identity[T]()(implicit evidence$1: ClassTag[T]) extends Transformer[T, T] with Product with Serializable

abstract class LabelEstimator[A, B, L] extends EstimatorOperator

class NodeOptimizationRule extends Rule

sealed trait Optimizable extends AnyRef

abstract class OptimizableEstimator[A, B] extends Estimator[A, B] with Optimizable

abstract class OptimizableLabelEstimator[A, B, L] extends LabelEstimator[A, B, L] with Optimizable

abstract class OptimizableTransformer[A, B] extends Transformer[A, B] with Optimizable

abstract class Optimizer extends RuleExecutor

class Pipeline[A, B] extends Chainable[A, B]

class PipelineDataset[T] extends PipelineResult[RDD[T]]

class PipelineDatum[T] extends PipelineResult[T]

class PipelineEnv extends AnyRef

abstract class PipelineResult[T] extends AnyRef

case class Profile(ns: Long, rddMem: Long, driverMem: Long) extends Product with Serializable

abstract class Rule extends AnyRef

abstract class RuleExecutor extends Logging

case class SampleProfile(scale: Long, profile: Profile) extends Product with Serializable

abstract class Transformer[A, B] extends TransformerOperator with Chainable[A, B]

case class TransformerChain[A, B, C](first: Transformer[A, B], second: Transformer[B, C])(implicit evidence$1: ClassTag[C]) extends Transformer[A, C] with Product with Serializable

case class TransformerEstimatorChain[A, B, C](first: Transformer[A, B], second: Estimator[B, C])(implicit evidence$2: ClassTag[C]) extends Estimator[A, C] with Product with Serializable

case class TransformerLabelEstimatorChain[A, B, C, L](first: Transformer[A, B], second: LabelEstimator[B, C, L])(implicit evidence$3: ClassTag[C]) extends LabelEstimator[A, C, L] with Product with Serializable

trait WeightedNode extends AnyRef

trait WeightedOperator extends AnyRef

Value Members

object AutoCacheRule

object DefaultOptimizer extends Optimizer

object EquivalentNodeMergeRule extends Rule

object ExtractSaveablePrefixes extends Rule

object Pipeline

object PipelineDataset

object PipelineDatum

object PipelineEnv

object SavedStateLoadRule extends Rule

object Transformer extends Serializable

object UnusedBranchRemovalRule extends Rule

object WorkflowUtils

Ungrouped