11"""
2- This file implements the Mean & Covariance RBM discussed in
2+ This file implements the Mean & Covariance RBM discussed in
33
44 Ranzato, M. and Hinton, G. E. (2010)
55 Modeling pixel means and covariances using factored third-order Boltzmann machines.
3030Version in paper
3131----------------
3232
33- Full Energy of the Mean and Covariance RBM, with
33+ Full Energy of the Mean and Covariance RBM, with
3434:math:`h_k = h_k^{(c)}`,
3535:math:`g_j = h_j^{(m)}`,
3636:math:`b_k = b_k^{(c)}`,
3737:math:`c_j = b_j^{(m)}`,
3838:math:`U_{if} = C_{if}`,
3939
4040 E (v, h, g) =
41- - 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i (U_{if} v_i) / |U_{.f}|*|v| )^2
41+ - 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i (U_{if} v_i) / |U_{.f}|*|v| )^2
4242 - \sum_k b_k h_k
4343 + 0.5 \sum_i v_i^2
4444 - \sum_j \sum_i W_{ij} g_j v_i
5555The train_mcRBM file implements learning in a similar but technically different Energy function:
5656
5757 E (v, h, g) =
58- 0.5 \sum_f \sum_k P_{fk} h_k (\sum_i U_{if} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
58+ 0.5 \sum_f \sum_k P_{fk} h_k (\sum_i U_{if} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
5959 - \sum_k b_k h_k
6060 + 0.5 \sum_i v_i^2
6161 - \sum_j \sum_i W_{ij} g_j v_i
8484omitted for clarity, and replaced analytically with a negative identity matrix.
8585
8686 E (v, h, g) =
87- + 0.5 \sum_k h_k (\sum_i U_{ik} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
87+ + 0.5 \sum_k h_k (\sum_i U_{ik} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
8888 - \sum_k b_k h_k
8989 + 0.5 \sum_i v_i^2
9090 - \sum_j \sum_i W_{ij} g_j v_i
9191 - \sum_j c_j g_j
9292
9393 E (v, h, g) =
94- - 0.5 \sum_f \sum_k P_{fk} h_k (\sum_i U_{if} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
94+ - 0.5 \sum_f \sum_k P_{fk} h_k (\sum_i U_{if} v_i / sqrt(\sum_i v_i^2/I + 0.5))^2
9595 - \sum_k b_k h_k
9696 + 0.5 \sum_i v_i^2
9797 - \sum_j \sum_i W_{ij} g_j v_i
9898 - \sum_j c_j g_j
9999
100-
100+
101101
102102Conventions in this file
103103========================
107107
108108
109109Global functions like `free_energy` work on an mcRBM as parametrized in a particular way.
110- Suppose we have
111- - I input dimensions,
112- - F squared filters,
110+ Suppose we have
111+ - I input dimensions,
112+ - F squared filters,
113113 - J mean variables, and
114114 - K covariance variables.
115115
131131# NOT THE ENERGY FUNCTION IN THE CODE!!!
132132#
133133# Free energy is the marginal energy of visible units
134- # Recall:
134+ # Recall:
135135# Q(x) = exp(-E(x))/Z ==> -log(Q(x)) - log(Z) = E(x)
136136#
137137#
154154# - \sum_k b_k h_k
155155# + 0.5 \sum_i v_i^2
156156# - \sum_j \sum_i W_{ij} g_j v_i
157- # - \sum_j c_j g_j
157+ # - \sum_j c_j g_j
158158# - \sum_i a_i v_i ))
159159#
160160# Get rid of double negs in exp
165165# ) * \sum_{g} exp(
166166# + \sum_j \sum_i W_{ij} g_j v_i
167167# + \sum_j c_j g_j))
168- # - \sum_i a_i v_i
168+ # - \sum_i a_i v_i
169169#
170170# Break up log
171171# = -\log( \sum_{h} exp(
176176# + \sum_j \sum_i W_{ij} g_j v_i
177177# + \sum_j c_j g_j )))
178178# + 0.5 \sum_i v_i^2
179- # - \sum_i a_i v_i
179+ # - \sum_i a_i v_i
180180#
181181# Use domain h is binary to turn log(sum(exp(sum...))) into sum(log(..
182182# = -\log(\sum_{h} exp(
185185# ))
186186# - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j ))
187187# + 0.5 \sum_i v_i^2
188- # - \sum_i a_i v_i
188+ # - \sum_i a_i v_i
189189#
190190# = - \sum_{k} \log(1 + exp(b_k + 0.5 \sum_f P_{fk}( \sum_i U_{if} v_i )^2 / (|U_{*f}|*|v|)))
191191# - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j ))
192192# + 0.5 \sum_i v_i^2
193- # - \sum_i a_i v_i
193+ # - \sum_i a_i v_i
194194#
195195# For negative-one-diagonal P this gives:
196196#
197197# = - \sum_{k} \log(1 + exp(b_k - 0.5 \sum_i (U_{ik} v_i )^2 / (|U_{*k}|*|v|)))
198198# - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j ))
199199# + 0.5 \sum_i v_i^2
200- # - \sum_i a_i v_i
200+ # - \sum_i a_i v_i
201201
202202import sys , os , logging
203203import numpy as np
@@ -272,7 +272,7 @@ def contrastive_grad(free_energy_fn, pos_v, neg_v, wrt, other_cost=0):
272272 :param neg_v: negative-phase sample of visible units
273273 :param wrt: TensorType variables with respect to which we want gradients (similar to the
274274 'wrt' argument to tensor.grad)
275- :param other_cost: TensorType scalar
275+ :param other_cost: TensorType scalar
276276
277277 :returns: TensorType variables for the gradient on each of the 'wrt' arguments
278278
@@ -358,7 +358,7 @@ def expected_h_g_given_v(self, v):
358358
359359 `h` is the conditional on the covariance units.
360360 `g` is the conditional on the mean units.
361-
361+
362362 """
363363 h = TT .nnet .sigmoid (self .hidden_cov_units_preactivation_given_v (v ))
364364 g = TT .nnet .sigmoid (self .c + dot (v ,self .W ))
@@ -369,7 +369,7 @@ def n_visible_units(self):
369369
370370 For an RBM made from shared variables, this will return an integer,
371371 for a purely symbolic RBM this will return a theano expression.
372-
372+
373373 """
374374 try :
375375 return self .W .get_value (borrow = True ).shape [0 ]
@@ -381,7 +381,7 @@ def n_hidden_cov_units(self):
381381
382382 For an RBM made from shared variables, this will return an integer,
383383 for a purely symbolic RBM this will return a theano expression.
384-
384+
385385 """
386386 try :
387387 return self .U .get_value (borrow = True ).shape [1 ]
@@ -393,7 +393,7 @@ def n_hidden_mean_units(self):
393393
394394 For an RBM made from shared variables, this will return an integer,
395395 for a purely symbolic RBM this will return a theano expression.
396-
396+
397397 """
398398 try :
399399 return self .W .get_value (borrow = True ).shape [1 ]
@@ -449,7 +449,7 @@ def params(self):
449449
450450 WRITEME : a *prescriptive* definition of this method suitable for mention in the API
451451 doc.
452-
452+
453453 """
454454 return list (self ._params )
455455
@@ -467,7 +467,7 @@ def alloc(cls, n_I, n_K, n_J, rng = 8923402190,
467467 :param n_K: number of covariance hidden units
468468 :param n_J: number of mean filters (linear)
469469 :param rng: seed or numpy RandomState object to initialize parameters
470-
470+
471471 :note:
472472 Constants for initial ranges and values taken from train_mcRBM.py.
473473 """
@@ -503,7 +503,7 @@ def topological_connectivity(out_shape=(12,12), window_shape=(3,3), window_strid
503503 in_c = out_c * window_stride [1 ] + win_c
504504 rval [in_r % A , in_c % B , out_r % C , out_c % D ] += 1
505505
506- # This normalization algorithm is a guess, based on inspection of the matrix loaded from
506+ # This normalization algorithm is a guess, based on inspection of the matrix loaded from
507507 # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat
508508 rval = rval .reshape ((A * B , C * D ))
509509 rval = (rval .T / rval .sum (axis = 1 )).T
@@ -542,7 +542,7 @@ def n_hidden_cov_units(self):
542542
543543 For an RBM made from shared variables, this will return an integer,
544544 for a purely symbolic RBM this will return a theano expression.
545-
545+
546546 """
547547 try :
548548 return self .P .get_value (borrow = True ).shape [1 ]
@@ -558,7 +558,7 @@ def alloc(cls, n_I, n_K, n_J, *args, **kwargs):
558558 :param n_K: number of covariance hidden units
559559 :param n_J: number of mean filters (linear)
560560 :param rng: seed or numpy RandomState object to initialize parameters
561-
561+
562562 :note:
563563 Constants for initial ranges and values taken from train_mcRBM.py.
564564 """
@@ -596,7 +596,7 @@ def alloc_with_P(cls, Pval, n_I, n_J, rng = 8923402190,
596596 return rval
597597
598598class mcRBMTrainer (object ):
599- """Light-weight class encapsulating math for mcRBM training
599+ """Light-weight class encapsulating math for mcRBM training
600600
601601 Attributes:
602602 - rbm - an mcRBM instance
@@ -697,7 +697,7 @@ def normalize_U(self, new_U):
697697 """
698698 :param new_U: a proposed new value for rbm.U
699699
700- :returns: a pair of TensorType variables:
700+ :returns: a pair of TensorType variables:
701701 a corrected new value for U, and a new value for self.normVF
702702
703703 This is a weird normalization procedure, but the sample code for the paper has it, and
@@ -713,7 +713,7 @@ def contrastive_grads(self, neg_v = None):
713713 neg_v = self .sampler .positions
714714 return contrastive_grad (
715715 free_energy_fn = self .rbm .free_energy_given_v ,
716- pos_v = self .visible_batch ,
716+ pos_v = self .visible_batch ,
717717 neg_v = neg_v ,
718718 wrt = self .rbm .params (),
719719 other_cost = (l1 (self .rbm .U )+ l1 (self .rbm .W )) * self .effective_l1_penalty )
@@ -747,7 +747,7 @@ def cd_updates(self):
747747 # go through that mechanism.
748748
749749 lr = TT .clip (
750- self .learn_rate * TT .cast (self .lr_anneal_start / (self .iter + 1 ), floatX ),
750+ self .learn_rate * TT .cast (self .lr_anneal_start / (self .iter + 1 ), floatX ),
751751 0.0 , #min
752752 self .learn_rate ) #max
753753
0 commit comments