Source code for rfbp.Patterns

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import division

import numpy as np
from sklearn.utils import check_X_y
from sklearn.utils import check_array
from ReplicatedFocusingBeliefPropagation.lib.Patterns import _Patterns
from ReplicatedFocusingBeliefPropagation.rfbp.misc import _check_string
from ReplicatedFocusingBeliefPropagation.rfbp.misc import get_int_size

__all__ = ['Pattern']

__author__  = ['Nico Curti', "Daniele Dall'Olio"]
__email__   = ['nico.curti2@unibo.it', 'daniele.dallolio@studio.unibo.it']

[docs]class Pattern (object): ''' Pattern object for C++ compatibility. The Pattern object is just a simple wrap of a data (matrix) + labels (vector). This object type provide a compatibility with the rFBP functions in C++ and it provides also a series of checks for the input validity. Parameters ---------- X : None or 2D array-like or string Input matrix of variables as (Nsample, Nfeatures) or filename with the input stored in the same way y : None or 1D array-like Input labels. The label can be given or read from the input filename as first row in the file. Example ------- >>> import numpy as np >>> from ReplicatedFocusingBeliefPropagation import Pattern >>> >>> n_sample, n_feature = (20, 101) # n_feature must be odd >>> data = np.random.choice(a=(-1, 1), p=(.5, .5), size=(n_sample, n_feature)) >>> labels = np.random.choice(a=(-1, 1), p=(.5, .5), size=(n_sample, )) >>> >>> pt = Pattern(X=data, y=labels) >>> # dimensions >>> assert pt.shape == (n_sample, n_feature) >>> # data >>> np.testing.assert_allclose(pt.data, data) >>> # labels >>> np.testing.assert_allclose(pt.labels, labels) ''' def __init__ (self, X=None, y=None): if X is not None and y is not None: # check array X, y = check_X_y(X, y) N, M = X.shape X = check_array(X) X = X.ravel() X = np.ascontiguousarray(X) y = np.ascontiguousarray(y) X = X.astype('float64') y = y.astype(get_int_size()) self._pattern = _Patterns(X=X, y=y, M=M, N=N) self._check_binary() else: self._pattern = None
[docs] def random (self, shape): ''' Generate Random pattern. The pattern is generated using a Bernoulli distribution and thus it creates a data (matrix) + labels (vector) of binary values. The values are converted into the range (-1, 1) for the compatibility with the rFBP algorithm. Parameters ---------- shapes : tuple a 2-D tuple with (M, N) where M is the number of samples and N the number of probes Example ------- >>> from ReplicatedFocusingBeliefPropagation import Pattern >>> >>> n_sample = 10 >>> n_feature = 20 >>> data = Pattern().random(shape=(n_sample, n_feature)) >>> assert data.shape == (n_sample, n_feature) >>> data Pattern[shapes=(10, 20)] ''' try: M, N = map(int, shape) except ValueError: raise ValueError('Incorrect dimensions. Shapes must be a 2-D tuple with (M, N)') if M <= 0 or N <= 0: raise ValueError('Incorrect dimensions. M and N must be greater than 0. Given ({0:d}, {1:d})'.format(M, N)) self._pattern = _Patterns(M=M, N=N) # We do not need to check the variables since they are correctly generated into the C++ code! return self
[docs] def load (self, filename, binary=False, delimiter='\t'): ''' Load pattern from file. This is the main utility of the Pattern object. You can use this function to load data from csv-like files OR from a binary file. Parameters ---------- filename : str Filename/Path to the Pattern file binary : bool True if the filename is in binary fmt; False for ASCII fmt delimiter : str Separator of input file (valid if binary is False) Example ------- >>> from ReplicatedFocusingBeliefPropagation import Pattern >>> >>> data = Pattern().load(filename='path/to/datafile.csv', delimiter=',', binary=False) >>> data Pattern[shapes=(10, 20)] ''' if not isinstance(filename, str): raise ValueError('Invalid filename found. Filename must be a string. Given : {0}'.format(filename)) filename = _check_string(filename, exist=True) delimiter = _check_string(delimiter, exist=False) self._pattern = _Patterns(filename=filename, binary=binary, delimiter=delimiter) self._check_binary() return self
@property def shape (self): ''' Return the shape of the data matrix Returns ------- shape: tuple The tuple related to the data dimensions (n_sample, n_features) ''' try: return (self._pattern.Nrow, self._pattern.Ncol) except AttributeError: return (0, 0) @property def labels (self): ''' Return the label array Returns ------- labels: array-like The labels vector as (n_sample, ) casted to integers. ''' try: return np.asarray(self._pattern.labels, dtype=int) except AttributeError: return None @property def data (self): ''' Return the data matrix Returns ------- data: array-like The data matrix as (n_sample, n_features) casted to integers. ''' try: return np.asarray(self._pattern.data, dtype=int) except AttributeError: return None @property def pattern (self): ''' Return the pattern Cython object Returns ------- pattern: Cython object The cython object wrapped by the Pattern class Notes ----- .. warning:: We discourage the use of this property if you do not know exactly what you are doing! ''' return self._pattern def _check_binary (self): ''' Check if the input data and labels satisfy the binary requirements ''' if not (((-1 == self.data) | (1 == self.data)).all() or ((-1 == self.labels) | (1 == self.labels)).all()): self._pattern = None # remove the loaded object raise ValueError('Invalid input parameters! Input variables must be +1 or -1') def __repr__ (self): ''' Object representation ''' class_name = self.__class__.__qualname__ if self._pattern is not None: return '{0}[shapes=({1:d}, {2:d})]'.format(class_name, self._pattern.Nrow, self._pattern.Ncol) else: return '{0}[shapes=(0, 0)]'.format(class_name)