Source code for rfbp.ReplicatedFocusingBeliefPropagation

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import division

import pickle
import numpy as np
# import warnings               # for Python non-bayes test version
# from scipy.special import erf # for Python non-bayes test version

from .FocusingProtocol import Focusing_Protocol
from .Patterns import Pattern

from ReplicatedFocusingBeliefPropagation.rfbp.misc import _check_string
from ReplicatedFocusingBeliefPropagation.rfbp.misc import redirect_stdout
from ReplicatedFocusingBeliefPropagation.rfbp.MagP64 import MagP64
from ReplicatedFocusingBeliefPropagation.rfbp.MagT64 import MagT64

from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from ReplicatedFocusingBeliefPropagation.lib.rFBP import Mag
from ReplicatedFocusingBeliefPropagation.lib.rFBP import _rfbp
from ReplicatedFocusingBeliefPropagation.lib.rFBP import _nonbayes_test
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

import multiprocessing

NTH = multiprocessing.cpu_count()

__all__ = ['ReplicatedFocusingBeliefPropagation']

__author__  = ["Nico Curti", "Daniele Dall'Olio"]
__email__   = ['nico.curti2@unibo.it', 'daniele.dallolio@studio.unibo.it']



[docs]class ReplicatedFocusingBeliefPropagation (BaseEstimator, ClassifierMixin): ''' ReplicatedFocusingBeliefPropagation classifier Parameters ---------- mag : Enum Mag (default = MagP64) Switch magnetization type hidden : int (default = 3) Number of hidden layers max_iters : int (default = 1000) Number of iterations seed : int (default = 135) Random seed damping : float (default = 0.5) Damping parameter accuracy : pair of string (default : ('accurate', 'exact')) Accuracy of the messages computation at the hidden units level. Possible values are ('exact', 'accurate', 'approx', 'none') randfact : float (default = 0.1) Seed random generator of Cavity Messages epsil : float (default = 0.1) Threshold for convergence protocol : string (default = 'pseudo_reinforcement') Updating protocol. Possible values are ["scoping", "pseudo_reinforcement", "free_scoping", "standard_reinforcement"] size : int (default = 101) Number of updates nth : int (default = max_num_of_cores) Number of thread to use in the computation verbose : bool (default = False) Enable or disable stdout on shell Example ------- >>> import numpy as np >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP >>> >>> N, M = (20, 101) # M must be odd >>> data = np.random.choice([-1, 1], p=[.5, .5], size=(N, M)) >>> label = np.random.choice([-1, 1], p=[.5, .5], size=(N, )) >>> >>> rfbp = rFBP() >>> rfbp.fit(data, label) ReplicatedFocusingBeliefPropagation(randfact=0.1, damping=0.5, accuracy=('accurate', 'exact'), nth=1, epsil=0.1, seed=135, size=101, hidden=3, verbose=False, protocol=pseudo_reinforcement, mag=<class 'ReplicatedFocusingBeliefPropagation.rfbp.MagP64.MagP64'>, max_iter=1000) >>> predicted_labels = rfbp.predict(data) Notes ----- .. note:: The input data must be composed by binary variables codified as `[-1, 1]`, since the model works only with spin-like variables. References ---------- - C. Baldassi, C. Borgs, J. T. Chayes, A. Ingrosso, C. Lucibello, L. Saglietti, and R. Zecchina. "Unreasonable effectiveness of learning neural networks: From accessible states and robust ensembles to basic algorithmic schemes", Proceedings of the National Academy of Sciences, 113(48):E7655-E7662, 2016. - C. Baldassi, A. Braunstein, N. Brunel, R. Zecchina. "Efficient supervised learning in networks with binary synapses", Proceedings of the National Academy of Sciences, 104(26):11079-11084, 2007. - C. Baldassi, F. Gerace, C. Lucibello, L. Saglietti, R. Zecchina. "Learning may need only a few bits of synaptic precision", Physical Review E, 93, 2016 - D. Dall'Olio, N. Curti, G. Castellani, A. Bazzani, D. Remondini. "Classification of Genome Wide Association data by Belief Propagation Neural network", CCS Italy, 2019. ''' _ALLOWED_ACCURACY = ('exact', 'accurate', 'approx', 'none') _ALLOWED_PROTOCOL = ('scoping', 'pseudo_reinforcement', 'free_scoping', 'standard_reinforcement') def __init__ (self, mag=MagP64, hidden=3, max_iter=1000, seed=135, damping=0.5, accuracy=('accurate', 'exact'), randfact=1e-1, epsil=1e-1, protocol='pseudo_reinforcement', size=101, nth=NTH, verbose=False): if mag is not MagP64 and mag is not MagT64: raise TypeError('Magnetization must be an instance of Mag Enum') if len(accuracy) > 2: raise TypeError('Too many accuracies given. Max number is two') if not all(a in self._ALLOWED_ACCURACY for a in accuracy): raise ValueError('Wrong accuracy. Possible values are only {}. Given: {}, {}'.format(','.join(self._ALLOWED_ACCURACY), accuracy[0], accuracy[1])) if protocol not in self._ALLOWED_PROTOCOL: raise ValueError('Incorrect Protocol found. Possible values are only {}'.format(','.join(self._ALLOWED_PROTOCOL))) self.mag = mag self.hidden = hidden self.max_iter = max_iter self.seed = seed self.damping = damping self.accuracy = accuracy self.randfact = randfact self.epsil = epsil self.protocol = protocol self.size = size self.nth = nth self.verbose = verbose #self.weights_ = None
[docs] def predict (self, X): ''' Predict the new labels computed by ReplicatedFocusingBeliefPropagation model Parameters ---------- X : array of shape [n_samples, n_features] The input samples. Returns ------- y : array of shape [n_samples] The predicted target values. ''' check_is_fitted(self, 'weights_') if isinstance(X, Pattern): testset = X else: X = check_array(X) testset = Pattern(X, [0] * np.shape(X)[0]) row_size, column_size = self.weights_.shape return np.asarray(_nonbayes_test(self.weights_.ravel(), row_size, column_size, testset.pattern, self.hidden))
# nrow, ncol = np.shape(testset) # predicted_labels = np.empty(nrow, dtype=int) # # with warnings.catch_warnings(): # catch runtime warning due to s2 == 0 # warnings.simplefilter("ignore") # # for i, Xi in enumerate(testset): # s = sum( [erf( sum([w * x for w, x in zip(Wj, Xi)]) / np.sqrt(2. * sum([ (1 - w * w) * x * x for w, x in zip(Wj, Xi)]))) # for Wj in self.weights] ) # predicted_labels[i] = np.sign(s) # return predicted_labels
[docs] def fit (self, X, y=None): ''' Fit the ReplicatedFocusingBeliefPropagation model meta-transformer Parameters ---------- X : array-like of shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) The target values (integers that correspond to classes in classification, real numbers in regression). Returns ------- self : ReplicatedFocusingBeliefPropagation object ''' pattern = X if isinstance(X, Pattern) else Pattern(X, y) acc = (_check_string(acc, exist=False) for acc in self.accuracy) protocol = Focusing_Protocol(protocol=self.protocol, size=self.size) mag = Mag.MagP64 if self.mag is MagP64 else Mag.MagT64 with redirect_stdout(self.verbose): self.weights_ = _rfbp(mag=mag, pattern=pattern.pattern, protocol=protocol.fprotocol, hidden=self.hidden, max_iter=self.max_iter, max_steps=protocol.num_of_replicas, randfact=self.randfact, damping=self.damping, epsil=self.epsil, accuracy=acc, seed=self.seed, nth=self.nth ) self.weights_ = np.asarray(self.weights_, dtype=int) return self
[docs] def load_weights (self, weightfile, delimiter='\t', binary=False): ''' Load weights from file Parameters ---------- weightfile : string Filename of weights delimiter : char Separator for ascii loading binary : bool Switch between binary and ascii loading style Returns ------- self : ReplicatedFocusingBeliefPropagation object Example ------- >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP >>> >>> clf = rFBP() >>> clf.load_weights('path/to/weights_filename.csv', delimiter=',', binary=False) ReplicatedFocusingBeliefPropagation(randfact=0.1, damping=0.5, accuracy=('accurate', 'exact'), nth=1, epsil=0.1, seed=135, size=101, hidden=3, verbose=False, protocol=pseudo_reinforcement, mag=<class 'ReplicatedFocusingBeliefPropagation.rfbp.MagP64.MagP64'>, max_iter=1000) ''' if binary: with open(weightfile, 'rb') as fp: self.weights_ = pickle.load(fp) else: self.weights_ = np.loadtxt(weightfile, delimiter=delimiter) self.hidden = len(self.weights_) return self
[docs] def save_weights (self, weightfile, delimiter='\t', binary=False): ''' Load weights from file Parameters ---------- weightfile : string Filename to dump the weights delimiter : char Separator for ascii dump binary : bool Switch between binary and ascii dumping style Example ------- >>> import numpy as np >>> from ReplicatedFocusingBeliefPropagation import ReplicatedFocusingBeliefPropagation as rFBP >>> >>> N, M = (20, 101) # M must be odd >>> data = np.random.choice([-1, 1], p=[.5, .5], size=(N, M)) >>> label = np.random.choice([-1, 1], p=[.5, .5], size=(N, )) >>> >>> rfbp = rFBP() >>> rfbp.fit(data, label) >>> rfbp.save_weights('path/to/weights_filename.csv', delimiter=',', binary=False) ''' check_is_fitted(self, 'weights_') if binary: with open(weightfile, 'wb') as fp: pickle.dump(self.weights_, fp) else: np.savetxt(weightfile, self.weights_, delimiter=delimiter)
def __repr__ (self): ''' Object representation ''' class_name = self.__class__.__qualname__ params = self.__init__.__code__.co_varnames params = set(params) - {'self'} args = ', '.join(['{}={}'.format(k, str(getattr(self, k))) for k in params]) return '{0}({1})'.format(class_name, args)