Source code for axopy.pipeline.common

"""Common processing tasks implemented as Blocks."""

import warnings
import numpy as np
from scipy import signal

from axopy.pipeline import Pipeline, Block


[docs]class Passthrough(Pipeline):
    """Convenience block for passing input along to output.

    A passthrough pipeline block is useful when you want to process some data
    then provide both the processed output as well as the original input to
    another block downstream::

            -----------------------> x
           |
        x ---> [ subpipeline ] ----> y
    """

    def __init__(self, blocks, expand_output=True, name=None):
        super(Passthrough, self).__init__(blocks, name=name)
        self.expand_output = expand_output

[docs]    def process(self, data):
        out = super(Passthrough, self).process(data)
        if self.expand_output:
            ldata = [data]
            ldata.extend(out)
            return ldata
        else:
            return data, out


[docs]class Callable(Block):
    """A `Block` that does not require persistent attributes.

    Some `Block` implementations don't require attributes to update on
    successive calls to the `process` method, but instead are essentially a
    function that can be called repeatedly. This class is for conveniently
    creating such a block.

    If the function you want to use takes additional arguments, such as a
    keyword argument that

    Note: if you use an anonymous function as the `func` argument, (e.g.
    ``lambda x: 2*x``), it is recommended to explicitly give the block a
    meaningful name.

    Parameters
    ----------
    func : callable(x)
        Function that gets called when the block's `process` method is called.
        Should take a single input and return output which is compatible with
        whatever is connected to the block.
    func_args : list, optional
        List (or tuple) of additional arguments to pass to `func` when calling
        it for processing. If None (default), no arguments are used.
    func_kwargs : dict
        Keyword argument name/value pairs to pass to `func` when calling it for
        processing. If None (default), no keyword arguments are used.
    name : str, optional, default=None
        Name of the block. By default, the name of the `processor` function is
        used.
    hooks : list, optional, default=None
        List of callables (callbacks) to run when after the block's `process`
        method is called.
    """

    def __init__(self, func, func_args=None, func_kwargs=None, name=None,
                 hooks=None):
        if name is None:
            name = func.__name__
        super(Callable, self).__init__(name=name, hooks=hooks)
        self.func = func
        self.func_args = func_args if func_args is not None else []
        self.func_kwargs = func_kwargs if func_kwargs is not None else {}

[docs]    def process(self, data):
        return self.func(data, *self.func_args, **self.func_kwargs)


[docs]class Windower(Block):
    """Windows incoming data to a specific length.

    Takes new input data and combines with past data to maintain a sliding
    window with optional overlap. The window length is specified directly, so
    the overlap depends on the length of the input.

    The input length may change on each iteration, but the ``Windower`` must be
    cleared before the number of channels can change.

    Parameters
    ----------
    length : int
        Total number of samples to output on each iteration. This must be at
        least as large as the number of samples input to the windower on each
        iteration.

    See Also
    --------
    axopy.pipeline.common.Ensure2D: Ensure input to the windower is 2D.

    Examples
    --------
    Basic use of a windower:

    >>> import axopy.pipeline as pipeline
    >>> import numpy as np
    >>> win = pipeline.Windower(4)
    >>> win.process(np.array([[1, 2], [3, 4]]))
    array([[ 0.,  0.,  1.,  2.],
           [ 0.,  0.,  3.,  4.]])
    >>> win.process(np.array([[7, 8], [5, 6]]))
    array([[ 1.,  2.,  7.,  8.],
           [ 3.,  4.,  5.,  6.]])
    >>> win.clear()
    >>> win.process(np.array([[1, 2], [3, 4]]))
    array([[ 0.,  0.,  1.,  2.],
           [ 0.,  0.,  3.,  4.]])

    If your data is 1-dimensional (shape ``(n_samples,)``), use an
    :class:`Ensure2D` block in front of the :class:`Windower`:

    >>> win = pipeline.Windower(4)
    >>> p = pipeline.Pipeline([pipeline.Ensure2D(), win])
    >>> p.process(np.array([1, 2]))
    array([[ 0.,  0.,  1.,  2.]])
    """

    def __init__(self, length):
        super(Windower, self).__init__()
        self.length = length

        self.clear()

[docs]    def clear(self):
        """Clear the buffer containing previous input data.
        """
        self._out = None

[docs]    def process(self, data):
        """Add new data to the end of the window.

        Parameters
        ----------
        data : array, shape (n_channels, n_samples)
            Input data. ``n_samples`` must be less than or equal to the
            windower ``length``.

        Returns
        -------
        out : array, shape (n_channels, length)
            Output window with the input data at the end.
        """
        if data.ndim != 2:
            raise ValueError("data must be 2-dimensional.")

        n = data.shape[1]

        if n > self.length:
            raise ValueError("data must be shorter than window length.")

        if self._out is None:
            self._preallocate(data.shape[0])

        if data.shape[0] != self._out.shape[0]:
            raise ValueError("Number of channels cannot change without "
                             "calling clear first.")

        if n == self.length:
            self._out = data
        else:
            self._out[:, :self.length-n] = self._out[:, -(self.length-n):]
            self._out[:, -n:] = data

        return self._out.copy()

    def _preallocate(self, n_channels):
        self._out = np.zeros((n_channels, self.length))


[docs]class Centerer(Block):
    """Centers data by subtracting out its mean.

    .. math:: \\tilde{x} = x - \\sum_{i=0}^{N-1} x[i]
    """

[docs]    def process(self, data):
        """Center each row of the input.

        Parameters
        ----------
        data : array, shape (n_channels, n_samples)
            Input data.

        Returns
        -------
        out : array, shape (n_channels, n_samples)
            Input data that's been centered.
        """
        return data - np.mean(data)


[docs]class Filter(Block):
    """Filters incoming data with a time domain filter.

    This filter implementation takes filter coefficients that are designed
    by the user -- it merely applies the filter to the input, remembering the
    final inputs/outputs from the previous update and using them as initial
    conditions for the current update.

    Parameters
    ----------
    b : ndarray
        Numerator polynomial coefficients of the filter.
    a : ndarray, optional
        Denominator polynomial coefficients of the filter. Default is 1,
        meaning the filter is FIR.
    overlap : int, optional
        Number of samples overlapping in consecutive inputs. Needed for
        correct filter initial conditions in each filtering operation.
        Default is 0, meaning the final inputs/outputs of the previous update
        are used.

    See Also
    --------
    axopy.pipeline.common.Ensure2D: Ensure input to the filter is 2D.

    Examples
    --------
    Design a filter using scipy and use the coefficients:

    >>> import axopy.pipeline as pipeline
    >>> import numpy as np
    >>> from scipy.signal import butter
    >>> b, a = butter(4, 100/1000/2)
    >>> f = pipeline.Filter(b, a)
    >>> f.process(np.random.randn(1, 5)) # doctest: +ELLIPSIS
    array([...

    Use a filter in combination with a :class:`Windower`, making sure to
    account for overlapping data in consecutive filtering operations. Here,
    we'll use a window of length 5 and pass in 3 samples at a time, so there
    will be an overlap of 2 samples. The overlapping samples in each output
    will agree:

    >>> w = pipeline.Windower(5)
    >>> f = pipeline.Filter(b, a, overlap=2)
    >>> p = pipeline.Pipeline([w, f])
    >>> out1 = p.process(np.random.randn(1, 3))
    >>> out2 = p.process(np.random.randn(1, 3))
    >>> out1[:, -2:] == out2[:, :2]
    array([[ True,  True]], dtype=bool)

    """

    def __init__(self, b, a=1, overlap=0):
        super(Filter, self).__init__()
        self.b = b
        self.a = np.atleast_1d(a)
        self.overlap = overlap

        self.clear()

[docs]    def clear(self):
        """Clears the filter initial conditions.

        Clearing the initial conditions is important when starting a new
        recording if ``overlap`` is nonzero.
        """
        self._x_prev = None
        self._y_prev = None

[docs]    def process(self, data):
        """Applies the filter to the input.

        Parameters
        ----------
        data : ndarray, shape (n_channels, n_samples)
            Input signals.
        """
        if data.ndim != 2:
            raise ValueError("data must be 2-dimensional.")

        if self._x_prev is None:
            # first pass has no initial conditions
            out = signal.lfilter(self.b, self.a, data, axis=-1)
        else:
            # subsequent passes get ICs from previous input/output
            num_ch = data.shape[0]
            K = max(len(self.a)-1, len(self.b)-1)
            self._zi = np.zeros((num_ch, K))

            # unfortunately we have to get zi channel by channel
            for c in range(data.shape[0]):
                self._zi[c, :] = signal.lfiltic(
                    self.b,
                    self.a,
                    self._y_prev[c, -(self.overlap+1)::-1],
                    self._x_prev[c, -(self.overlap+1)::-1])

            out, zf = signal.lfilter(self.b, self.a, data, axis=-1,
                                     zi=self._zi)

        self._x_prev = data
        self._y_prev = out

        return out


[docs]class FeatureExtractor(Block):
    """Computes multiple features from the input, concatenating the results.

    Each feature should be able to take in the same data and output a 1D array,
    so overall output of the FeatureExtractor can be a single 1D array.

    This block isn't strictly necessary, since you could just apply multiple
    feature blocks in parallel and the result of each will be passed to the
    next block. However, the block following feature computation typically
    expects the input to be a single array (or row) per data sample.

    Parameters
    ----------
    features : list
        List of (name, feature) tuples (i.e. implementing a ``compute``
        method).

    Attributes
    ----------
    named_features : dict
        Dictionary of features accessed by name.
    feature_indices : dict
        Dictionary of (start, stop) tuples indicating the bounds of each
        feature, accessed by name. Will be empty until after data is first
        passed through.
    """

    def __init__(self, features, hooks=None):
        super(FeatureExtractor, self).__init__(hooks=hooks)
        self.features = features

        self.feature_indices = {}
        self._output = None

    @property
    def named_features(self):
        return dict(self.features)

[docs]    def clear(self):
        """Clears the output array.

        This should be called if the input is going to change form in some
        way (i.e. the shape of the input array changes).
        """
        self.feature_indices = {}
        self._output = None

[docs]    def process(self, data):
        """Run data through the list of features and concatenates the results.

        The first pass (after a ``clear`` call) will be a little slow since the
        extractor needs to allocate the output array.

        Parameters
        ----------
        data : array, shape (n_channels, n_samples)
            Input data. Must be appropriate for all features.

        Returns
        -------
        out : array, shape (n_features,)
        """
        allocating = (self._output is None)
        ind = 0
        for i, (name, feature) in enumerate(self.features):
            if allocating:
                x = feature.compute(data)
                self.feature_indices[name] = (ind, ind+x.size)
                ind += x.size

                if self._output is None:
                    self._output = x
                else:
                    self._output = np.hstack([self._output, x])
            else:
                self._output[self.feature_indices[name][0]:
                             self.feature_indices[name][1]] = \
                    feature.compute(data)

        return self._output


[docs]class Estimator(Block):
    """A pipeline block wrapper around scikit-learn's idea of an estimator.

    An estimator is an object that can be trained with some data (``fit``) and,
    once trained, can output predictions from novel inputs. A common use-case
    for this block is to utilize a scikit-learn pipeline in the context of a
    axopy pipeline.

    Parameters
    ----------
    estimator : object
        An object implementing the scikit-learn Estimator interface (i.e.
        implementing ``fit`` and ``predict`` methods).
    return_proba : boolean, optional (default: False)
        If True, use the estimator's ``predict_proba`` method instead of
        ``predict`` to return probability estimates.
    return_log_proba : boolean, optional (default: False)
        If True, use the estimator's ``predict_log_proba`` method instead of
        ``predict`` to return probability estimates.
        """

    def __init__(self, estimator, return_proba=False, return_log_proba=False):
        super(Estimator, self).__init__()
        self.estimator = estimator
        self.return_proba = return_proba
        self.return_log_proba = return_log_proba
        self._check_estimator()

[docs]    def process(self, data):
        """Calls the estimator's ``predict`` or ``predict_proba`` method and
        returns the result."""
        if self.return_proba:
            return self.estimator.predict_proba(data)
        elif self.return_log_proba:
            return self.estimator.predict_log_proba(data)
        else:
            return self.estimator.predict(data)

    def _check_estimator(self):
        """Check estimator attributes when either ``return_proba`` or
        ``return_log_proba`` are set to ``True``.

        If both arguments are True use ``predict_proba`` and issue a warning.
        """
        if not hasattr(self.estimator, 'predict_proba') and self.return_proba:
            raise ValueError("Estimator {} does not implement a "
                             "predict_proba method".format(self.estimator))
        if not hasattr(self.estimator, 'predict_log_proba') and \
                self.return_log_proba:
            raise ValueError("Estimator {} does not implement a "
                             "predict_log_proba method".format(self.estimator))

        if self.return_proba and self.return_log_proba:
            warnings.warn("Both predict_proba and predict_log_proba were set "
                          "to True for estimator {}. The process method will "
                          "default to predict_proba.".format(self.estimator))
            self.return_log_proba = False


[docs]class Transformer(Block):
    """A pipeline block wrapper around scikit-learn's idea of a transformer.

    A transformer is trained with some data (``fit``) and, once trained, can
    output projections of the input data to some other space. A common example
    is projecting data in high-dimensional space to a lower-dimensional space
    using principal components analysis.

    Parameters
    ----------
    transformer : object
        An object implementing the scikit-learn Transformer interface (i.e.
        implementing ``fit``, ``transform`` and ``inverse_transform`` methods).
    inverse : boolean, optional (default: False)
        If True, call ``inverse_transform`` instead of ``transform``.
    """

    def __init__(self, transformer, inverse=False, hooks=None):
        super(Transformer, self).__init__(hooks=None)
        self.transformer = transformer
        self.inverse = inverse

[docs]    def process(self, data):
        """Calls the transformer's ``transform`` or ``inverse_transform``
        method and returns the result.
        """
        if self.inverse:
            return self.transformer.inverse_transform(data)
        else:
            return self.transformer.transform(data)


[docs]class Ensure2D(Block):
    """Transforms an array to ensure it has 2 dimensions.

    Input with shape ``(n,)`` can be made to have shape ``(n, 1)`` or
    ``(1, n)``.

    Parameters
    ----------
    orientation : {'row', 'col'}, optional
        Orientation of the output. If 'row', the output will have shape
        ``(1, n)``, meaning the output is a row vector. This is the default
        behavior, useful when the data is something like samples of a 1-channel
        signal.  If 'col', the output will have shape ``(n, 1)``, meaning the
        output is a column vector.

    Examples
    --------
    Output row data:

    >>> import numpy as np
    >>> import axopy.pipeline as pipeline
    >>> block = pipeline.Ensure2D()
    >>> block.process(np.array([1, 2, 3]))
    array([[1, 2, 3]])

    Output column data:

    >>> block = pipeline.Ensure2D(orientation='col')
    >>> block.process(np.array([1, 2, 3]))
    array([[1],
           [2],
           [3]])
    """

    def __init__(self, orientation='row'):
        super(Ensure2D, self).__init__()
        self.orientation = orientation

        if orientation not in ['row', 'col']:
            raise ValueError("orientation must be either 'row' or 'col'")

[docs]    def process(self, data):
        """Make sure data is 2-dimensional.

        If the input already has two dimensions, it is unaffected.

        Parameters
        ----------
        data : array, shape (n,)
            Input data.

        Returns
        -------
        out : array, shape (1, n) or (n, 1)
            Output data, with shape specified by ``orientation``.
        """
        data = np.atleast_2d(data)

        if self.orientation == 'row':
            return data
        else:
            return data.T