Source code for sktutor.pipeline

# -*- coding: utf-8 -*-
from sklearn.pipeline import (FeatureUnion as SKFeatureUnion,
                              _fit_transform_one, _name_estimators,
                              _transform_one)
from joblib import Parallel, delayed
import pandas as pd
import numpy as np


[docs]class FeatureUnion(SKFeatureUnion): """ Perform a list of transformations in parallel and concat the results :param transformers: list of (string, transformer) tuples :param n_jobs: Number of jobs to run in parallel (default 1). """
[docs] def fit_transform(self, X, y=None, **fit_params): """Transform X separately by each transformer, concatenate results. :param X: Input data to be transformed. :type X: iterable or array-like, depending on transformers :rtype: DataFrame with concatenated results of transformers. """ self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)( transformer=trans, weight=weight, X=X, y=y, **fit_params ) for name, trans, weight in self._iter()) if not result: # All transformers are None return np.zeros((X.shape[0], 0)) Xs, transformers = zip(*result) self._update_transformer_list(transformers) Xs = pd.concat(Xs, axis=1) return Xs
[docs] def transform(self, X): """Transform X separately by each transformer, concatenate results. :param X: Input data to be transformed. :type X: iterable or array-like, depending on transformers :rtype: DataFrame with concatenated results of transformers. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)( transformer=trans, weight=weight, X=X, y=None ) for name, trans, weight in self._iter()) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) Xs = pd.concat(Xs, axis=1) return Xs
[docs]def make_union(*transformers, **kwargs): """Construct a FeatureUnion from the given transformers. This is a shorthand for the FeatureUnion constructor; it does not require, and does not permit, naming the transformers. Instead, they will be given names automatically based on their types. It also does not allow weighting. :param transformers: list of estimators :param n_jobs: Number of jobs to run in parallel (default 1). :rtype: FeatureUnion """ n_jobs = kwargs.pop('n_jobs', 1) if kwargs: # We do not currently support `transformer_weights` as we may want to # change its type spec in make_union raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) return FeatureUnion(_name_estimators(transformers), n_jobs=n_jobs)