Source code for rfbp.ReplicatedFocusingBeliefPropagation

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import division

import pickle
import numpy as np
# import warnings               # for Python non-bayes test version
# from scipy.special import erf # for Python non-bayes test version

from .FocusingProtocol import Focusing_Protocol
from .Patterns import Pattern

from ReplicatedFocusingBeliefPropagation.rfbp.misc import _check_string
from ReplicatedFocusingBeliefPropagation.rfbp.misc import redirect_stdout
from ReplicatedFocusingBeliefPropagation.rfbp.MagP64 import MagP64
from ReplicatedFocusingBeliefPropagation.rfbp.MagT64 import MagT64

from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from ReplicatedFocusingBeliefPropagation.lib.rFBP import Mag
from ReplicatedFocusingBeliefPropagation.lib.rFBP import _rfbp
from ReplicatedFocusingBeliefPropagation.lib.rFBP import _nonbayes_test
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

import multiprocessing

NTH = multiprocessing.cpu_count()

__all__ = ['ReplicatedFocusingBeliefPropagation']

__author__  = ["Nico Curti", "Daniele Dall'Olio"]
__email__   = ['nico.curti2@unibo.it', 'daniele.dallolio@studio.unibo.it']



[docs]class ReplicatedFocusingBeliefPropagation (BaseEstimator, ClassifierMixin):

  '''
  ReplicatedFocusingBeliefPropagation classifier

  Parameters
  ----------
    mag : Enum Mag (default = MagP64)
      Switch magnetization type

    hidden : int (default = 3)
      Number of hidden layers

    max_iters : int (default = 1000)
      Number of iterations

    seed : int (default = 135)
      Random seed

    damping : float (default = 0.5)
      Damping parameter

    accuracy : pair of string (default : ('accurate', 'exact'))
      Accuracy of the messages computation at the hidden units level. Possible values are ('exact', 'accurate', 'approx', 'none')

    randfact : float (default = 0.1)
      Seed random generator of Cavity Messages

    epsil : float (default = 0.1)
      Threshold for convergence

    protocol : string (default = 'pseudo_reinforcement')
      Updating protocol. Possible values are ["scoping", "pseudo_reinforcement", "free_scoping", "standard_reinforcement"]

    size : int (default = 101)
      Number of updates

    nth : int (default = max_num_of_cores)
      Number of thread to use in the computation

    verbose : bool (default = False)
      Enable or disable stdout on shell

  Example
  -------
  >>> import numpy as np
  >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP
  >>>
  >>> N, M = (20, 101) # M must be odd
  >>> data = np.random.choice([-1, 1], p=[.5, .5], size=(N, M))
  >>> label = np.random.choice([-1, 1], p=[.5, .5], size=(N, ))
  >>>
  >>> rfbp = rFBP()
  >>> rfbp.fit(data, label)
    ReplicatedFocusingBeliefPropagation(randfact=0.1, damping=0.5, accuracy=('accurate', 'exact'), nth=1, epsil=0.1, seed=135, size=101, hidden=3, verbose=False, protocol=pseudo_reinforcement, mag=<class 'ReplicatedFocusingBeliefPropagation.rfbp.MagP64.MagP64'>, max_iter=1000)
  >>> predicted_labels = rfbp.predict(data)

  Notes
  -----
  .. note::
    The input data must be composed by binary variables codified as `[-1, 1]`, since the model works only with spin-like variables.

  References
  ----------
  - C. Baldassi, C. Borgs, J. T. Chayes, A. Ingrosso, C. Lucibello, L. Saglietti, and R. Zecchina. "Unreasonable effectiveness of learning neural networks: From accessible states and robust ensembles to basic algorithmic schemes", Proceedings of the National Academy of Sciences, 113(48):E7655-E7662, 2016.
  - C. Baldassi, A. Braunstein, N. Brunel, R. Zecchina. "Efficient supervised learning in networks with binary synapses", Proceedings of the National Academy of Sciences, 104(26):11079-11084, 2007.
  - C. Baldassi, F. Gerace, C. Lucibello, L. Saglietti, R. Zecchina. "Learning may need only a few bits of synaptic precision", Physical Review E, 93, 2016
  - D. Dall'Olio, N. Curti, G. Castellani, A. Bazzani, D. Remondini. "Classification of Genome Wide Association data by Belief Propagation Neural network", CCS Italy, 2019.
  '''

  _ALLOWED_ACCURACY = ('exact', 'accurate', 'approx', 'none')
  _ALLOWED_PROTOCOL = ('scoping', 'pseudo_reinforcement', 'free_scoping', 'standard_reinforcement')

  def __init__ (self, mag=MagP64,
                      hidden=3,
                      max_iter=1000,
                      seed=135,
                      damping=0.5,
                      accuracy=('accurate', 'exact'),
                      randfact=1e-1,
                      epsil=1e-1,
                      protocol='pseudo_reinforcement',
                      size=101,
                      nth=NTH,
                      verbose=False):

    if mag is not MagP64 and mag is not MagT64:
      raise TypeError('Magnetization must be an instance of Mag Enum')

    if len(accuracy) > 2:
      raise TypeError('Too many accuracies given. Max number is two')

    if not all(a in self._ALLOWED_ACCURACY for a in accuracy):
      raise ValueError('Wrong accuracy. Possible values are only {}. Given: {}, {}'.format(','.join(self._ALLOWED_ACCURACY),
                                                                                           accuracy[0], accuracy[1]))
    if protocol not in self._ALLOWED_PROTOCOL:
      raise ValueError('Incorrect Protocol found. Possible values are only {}'.format(','.join(self._ALLOWED_PROTOCOL)))

    self.mag = mag
    self.hidden = hidden
    self.max_iter = max_iter
    self.seed = seed
    self.damping = damping
    self.accuracy = accuracy
    self.randfact = randfact
    self.epsil = epsil
    self.protocol = protocol
    self.size = size
    self.nth = nth
    self.verbose = verbose
    #self.weights_ = None


[docs]  def predict (self, X):
    '''
    Predict the new labels computed by ReplicatedFocusingBeliefPropagation model

    Parameters
    ----------
      X : array of shape [n_samples, n_features]
          The input samples.

    Returns
    -------
      y : array of shape [n_samples]
          The predicted target values.
    '''

    check_is_fitted(self, 'weights_')

    if isinstance(X, Pattern):
      testset = X

    else:
      X = check_array(X)
      testset = Pattern(X, [0] * np.shape(X)[0])

    row_size, column_size = self.weights_.shape

    return np.asarray(_nonbayes_test(self.weights_.ravel(), row_size, column_size, testset.pattern, self.hidden))

    # nrow, ncol = np.shape(testset)
    # predicted_labels = np.empty(nrow, dtype=int)
    #
    # with warnings.catch_warnings(): # catch runtime warning due to s2 == 0
    #   warnings.simplefilter("ignore")
    #
    #   for i, Xi in enumerate(testset):
    #     s = sum( [erf( sum([w * x for w, x in zip(Wj, Xi)])  / np.sqrt(2. * sum([ (1 - w * w) * x * x for w, x in zip(Wj, Xi)])))
    #             for Wj in self.weights] )
    #     predicted_labels[i] = np.sign(s)
    # return predicted_labels


[docs]  def fit (self, X, y=None):
    '''
    Fit the ReplicatedFocusingBeliefPropagation model meta-transformer

    Parameters
    ----------
      X : array-like of shape (n_samples, n_features)
          The training input samples.

      y : array-like, shape (n_samples,)
          The target values (integers that correspond to classes in
          classification, real numbers in regression).

    Returns
    -------
      self : ReplicatedFocusingBeliefPropagation object
    '''

    pattern = X if isinstance(X, Pattern) else Pattern(X, y)

    acc = (_check_string(acc, exist=False) for acc in self.accuracy)

    protocol = Focusing_Protocol(protocol=self.protocol, size=self.size)

    mag = Mag.MagP64 if self.mag is MagP64 else Mag.MagT64

    with redirect_stdout(self.verbose):
      self.weights_ = _rfbp(mag=mag,
                            pattern=pattern.pattern,
                            protocol=protocol.fprotocol,
                            hidden=self.hidden,
                            max_iter=self.max_iter,
                            max_steps=protocol.num_of_replicas,
                            randfact=self.randfact,
                            damping=self.damping,
                            epsil=self.epsil,
                            accuracy=acc,
                            seed=self.seed,
                            nth=self.nth
                            )

    self.weights_ = np.asarray(self.weights_, dtype=int)

    return self


[docs]  def load_weights (self, weightfile, delimiter='\t', binary=False):
    '''
    Load weights from file

    Parameters
    ----------
      weightfile : string
        Filename of weights

      delimiter : char
        Separator for ascii loading

      binary : bool
        Switch between binary and ascii loading style

    Returns
    -------
      self : ReplicatedFocusingBeliefPropagation object

    Example
    -------
    >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP
    >>>
    >>> clf = rFBP()
    >>> clf.load_weights('path/to/weights_filename.csv', delimiter=',', binary=False)
      ReplicatedFocusingBeliefPropagation(randfact=0.1, damping=0.5, accuracy=('accurate', 'exact'), nth=1, epsil=0.1, seed=135, size=101, hidden=3, verbose=False, protocol=pseudo_reinforcement, mag=<class 'ReplicatedFocusingBeliefPropagation.rfbp.MagP64.MagP64'>, max_iter=1000)
    '''

    if binary:
      with open(weightfile, 'rb') as fp:
        self.weights_ = pickle.load(fp)

    else:
      self.weights_ = np.loadtxt(weightfile, delimiter=delimiter)

    self.hidden = len(self.weights_)

    return self


[docs]  def save_weights (self, weightfile, delimiter='\t', binary=False):
    '''
    Load weights from file

    Parameters
    ----------
      weightfile : string
        Filename to dump the weights

      delimiter : char
        Separator for ascii dump

      binary : bool
        Switch between binary and ascii dumping style

    Example
    -------
    >>> import numpy as np
    >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP
    >>>
    >>> N, M = (20, 101) # M must be odd
    >>> data = np.random.choice([-1, 1], p=[.5, .5], size=(N, M))
    >>> label = np.random.choice([-1, 1], p=[.5, .5], size=(N, ))
    >>>
    >>> rfbp = rFBP()
    >>> rfbp.fit(data, label)
    >>> rfbp.save_weights('path/to/weights_filename.csv', delimiter=',', binary=False)
    '''

    check_is_fitted(self, 'weights_')

    if binary:
      with open(weightfile, 'wb') as fp:
        pickle.dump(self.weights_, fp)

    else:
      np.savetxt(weightfile, self.weights_, delimiter=delimiter)

  def __repr__ (self):
    '''
    Object representation
    '''
    class_name = self.__class__.__qualname__

    params = self.__init__.__code__.co_varnames
    params = set(params) - {'self'}
    args = ', '.join(['{}={}'.format(k, str(getattr(self, k))) for k in params])

    return '{0}({1})'.format(class_name, args)