From 58dd5112032819bce5c2fb7fa84c3e1c1dc38fc3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 29 Aug 2016 01:03:12 +0800 Subject: [PATCH 001/119] added bp ep stump --- chapter08/fgSp.m | 7 +++++++ chapter08/hmmBp.m | 6 ++++++ chapter08/mrfSp.m | 5 +++++ chapter10/gpEp.m | 7 +++++++ chapter10/hmmEp.m | 7 +++++++ chapter10/mixGaussEp.m | 7 +++++++ 6 files changed, 39 insertions(+) create mode 100644 chapter08/fgSp.m create mode 100644 chapter08/hmmBp.m create mode 100644 chapter08/mrfSp.m create mode 100644 chapter10/gpEp.m create mode 100644 chapter10/hmmEp.m create mode 100644 chapter10/mixGaussEp.m diff --git a/chapter08/fgSp.m b/chapter08/fgSp.m new file mode 100644 index 0000000..a41b324 --- /dev/null +++ b/chapter08/fgSp.m @@ -0,0 +1,7 @@ +function model = fgSp(A) +% sum product belief propagation on factor graph +% support parallel schedule and serial schdule +% A: affinity matrix (sparse) of a MRF graph + + + diff --git a/chapter08/hmmBp.m b/chapter08/hmmBp.m new file mode 100644 index 0000000..0c86c69 --- /dev/null +++ b/chapter08/hmmBp.m @@ -0,0 +1,6 @@ +function [ output_args ] = hmmBp( input_args ) +% sum product belief propagation for HMM model +% support parallel schedule and serial schdule +% A: affinity matrix (sparse) of a MRF graph + + diff --git a/chapter08/mrfSp.m b/chapter08/mrfSp.m new file mode 100644 index 0000000..1e4671d --- /dev/null +++ b/chapter08/mrfSp.m @@ -0,0 +1,5 @@ +function model = mrfSp(A) +% sum product belief propagation on Markov random field (undirected graphical model) +% support parallel schedule and serial schdule +% A: affinity matrix (sparse) of a MRF graph + diff --git a/chapter10/gpEp.m b/chapter10/gpEp.m new file mode 100644 index 0000000..0ee4a59 --- /dev/null +++ b/chapter10/gpEp.m @@ -0,0 +1,7 @@ +function [ output_args ] = gpEp( input_args ) +%GPEP Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter10/hmmEp.m b/chapter10/hmmEp.m new file mode 100644 index 0000000..00d0807 --- /dev/null +++ b/chapter10/hmmEp.m @@ -0,0 +1,7 @@ +function [ output_args ] = hmmEp( input_args ) +%HMMEP Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter10/mixGaussEp.m b/chapter10/mixGaussEp.m new file mode 100644 index 0000000..6d56405 --- /dev/null +++ b/chapter10/mixGaussEp.m @@ -0,0 +1,7 @@ +function [ output_args ] = mixGaussEp( input_args ) +%MIXGAUSSEP Summary of this function goes here +% Detailed explanation goes here + + +end + From 7067c9450d80b78cbabb44e2e9db33d3d5f6d751 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 9 Feb 2017 21:20:26 +0800 Subject: [PATCH 002/119] minor tweak --- common/logsumexp.m | 2 +- common/normalize.m | 2 +- common/standardize.m | 5 ----- common/unitize.m | 2 +- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/common/logsumexp.m b/common/logsumexp.m index 9838e4f..1098342 100644 --- a/common/logsumexp.m +++ b/common/logsumexp.m @@ -2,7 +2,7 @@ % Compute log(sum(exp(X),dim)) while avoiding numerical underflow. % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1, +if nargin == 1 % Determine which dimension sum will use dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end diff --git a/common/normalize.m b/common/normalize.m index be81e2c..c7ae7a1 100644 --- a/common/normalize.m +++ b/common/normalize.m @@ -2,7 +2,7 @@ % Normalize the vectors to be summing to one % By default dim = 1 (columns). % Written by Michael Chen (sth4nth@gmail.com). -if nargin == 1, +if nargin == 1 % Determine which dimension sum will use dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end diff --git a/common/standardize.m b/common/standardize.m index 233fadd..14321ab 100644 --- a/common/standardize.m +++ b/common/standardize.m @@ -2,11 +2,6 @@ % Unitize the vectors to be unit length % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1, - % Determine which dimension sum will use - dim = find(size(X)~=1,1); - if isempty(dim), dim = 1; end -end X = bsxfun(@minux,X,mean(X,2)); s = sqrt(mean(sum(X.^2,1))); Y = X/s; \ No newline at end of file diff --git a/common/unitize.m b/common/unitize.m index feb12bb..22297be 100644 --- a/common/unitize.m +++ b/common/unitize.m @@ -2,7 +2,7 @@ % Unitize the vectors to be unit length % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1, +if nargin == 1 % Determine which dimension sum will use dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end From 03bdc614a3438b17985ae508965496e4a0ffe819 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 14 Feb 2017 16:59:36 +0800 Subject: [PATCH 003/119] tweak logsumexp a little. nothing serious --- chapter04/softmax.m | 2 +- common/logsumexp.m | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/chapter04/softmax.m b/chapter04/softmax.m index e7ab72e..429e5d5 100644 --- a/chapter04/softmax.m +++ b/chapter04/softmax.m @@ -2,7 +2,7 @@ % Softmax function % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1, +if nargin == 1 % Determine which dimension sum will use dim = find(size(x)~=1,1); if isempty(dim), dim = 1; end diff --git a/common/logsumexp.m b/common/logsumexp.m index 1098342..67b36bc 100644 --- a/common/logsumexp.m +++ b/common/logsumexp.m @@ -3,15 +3,10 @@ % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). if nargin == 1 - % Determine which dimension sum will use dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end - -% subtract the largest in each dim -y = max(X,[],dim); -s = y+log(sum(exp(bsxfun(@minus,X,y)),dim)); % TODO: use log1p -i = isinf(y); -if any(i(:)) - s(i) = y(i); -end \ No newline at end of file +a = max(X,[],dim); +s = a+log(sum(exp(X-a),dim)); % TODO: use log1p +i = isinf(a); +s(i) = a(i); \ No newline at end of file From 03857d11afa607f643c7e6f516164c1e4ca8d658 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 14 Feb 2017 18:33:09 +0800 Subject: [PATCH 004/119] tweak softmax --- chapter04/softmax.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chapter04/softmax.m b/chapter04/softmax.m index 429e5d5..3f82baf 100644 --- a/chapter04/softmax.m +++ b/chapter04/softmax.m @@ -1,10 +1,10 @@ -function s = softmax(x, dim) +function [Y,s] = softmax(X, dim) % Softmax function % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). if nargin == 1 - % Determine which dimension sum will use - dim = find(size(x)~=1,1); + dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end -s = exp(bsxfun(@minus,x,logsumexp(x,dim))); +s = logsumexp(X,dim); +Y = exp(X-s); From da5b8c690264db83eb3c5c6469dbc2a9b4fb5a0b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 27 Feb 2017 00:18:10 +0800 Subject: [PATCH 005/119] remove empty stump functions --- chapter08/fgSp.m | 7 ------- chapter08/hmmBp.m | 6 ------ chapter08/mrfSp.m | 5 ----- chapter10/gpEp.m | 7 ------- chapter10/hmmEp.m | 7 ------- chapter10/mixGaussEp.m | 7 ------- 6 files changed, 39 deletions(-) delete mode 100644 chapter08/fgSp.m delete mode 100644 chapter08/hmmBp.m delete mode 100644 chapter08/mrfSp.m delete mode 100644 chapter10/gpEp.m delete mode 100644 chapter10/hmmEp.m delete mode 100644 chapter10/mixGaussEp.m diff --git a/chapter08/fgSp.m b/chapter08/fgSp.m deleted file mode 100644 index a41b324..0000000 --- a/chapter08/fgSp.m +++ /dev/null @@ -1,7 +0,0 @@ -function model = fgSp(A) -% sum product belief propagation on factor graph -% support parallel schedule and serial schdule -% A: affinity matrix (sparse) of a MRF graph - - - diff --git a/chapter08/hmmBp.m b/chapter08/hmmBp.m deleted file mode 100644 index 0c86c69..0000000 --- a/chapter08/hmmBp.m +++ /dev/null @@ -1,6 +0,0 @@ -function [ output_args ] = hmmBp( input_args ) -% sum product belief propagation for HMM model -% support parallel schedule and serial schdule -% A: affinity matrix (sparse) of a MRF graph - - diff --git a/chapter08/mrfSp.m b/chapter08/mrfSp.m deleted file mode 100644 index 1e4671d..0000000 --- a/chapter08/mrfSp.m +++ /dev/null @@ -1,5 +0,0 @@ -function model = mrfSp(A) -% sum product belief propagation on Markov random field (undirected graphical model) -% support parallel schedule and serial schdule -% A: affinity matrix (sparse) of a MRF graph - diff --git a/chapter10/gpEp.m b/chapter10/gpEp.m deleted file mode 100644 index 0ee4a59..0000000 --- a/chapter10/gpEp.m +++ /dev/null @@ -1,7 +0,0 @@ -function [ output_args ] = gpEp( input_args ) -%GPEP Summary of this function goes here -% Detailed explanation goes here - - -end - diff --git a/chapter10/hmmEp.m b/chapter10/hmmEp.m deleted file mode 100644 index 00d0807..0000000 --- a/chapter10/hmmEp.m +++ /dev/null @@ -1,7 +0,0 @@ -function [ output_args ] = hmmEp( input_args ) -%HMMEP Summary of this function goes here -% Detailed explanation goes here - - -end - diff --git a/chapter10/mixGaussEp.m b/chapter10/mixGaussEp.m deleted file mode 100644 index 6d56405..0000000 --- a/chapter10/mixGaussEp.m +++ /dev/null @@ -1,7 +0,0 @@ -function [ output_args ] = mixGaussEp( input_args ) -%MIXGAUSSEP Summary of this function goes here -% Detailed explanation goes here - - -end - From a08bbf1f67f868eeee30d704291853eb28499a53 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 27 Feb 2017 00:25:23 +0800 Subject: [PATCH 006/119] Update TODO.txt --- TODO.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.txt b/TODO.txt index 09388db..4b7292b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,4 +1,3 @@ TODO: -ch10: EP ch13: LDS numerical stability (numerical stable (square root) version of Kalman filter and smoother) ch05: MLP bias and gradient unit From a765f9e6c405f47638306655b926c035dffefe9a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 9 Mar 2017 23:39:23 +0800 Subject: [PATCH 007/119] refactorized HMM code --- chapter13/HMM/hmmEm.m | 37 +++++++++++++++++++++------------ chapter13/HMM/hmmFilter.m | 31 ++++++++++++++++++--------- chapter13/HMM/hmmFilter_.m | 19 ----------------- chapter13/HMM/hmmRecSmoother_.m | 23 -------------------- chapter13/HMM/hmmSmoother.m | 37 ++++++++++++++++++++++----------- chapter13/HMM/hmmSmoother_.m | 29 -------------------------- chapter13/HMM/hmmViterbi.m | 36 +++++++++++++++++++++----------- chapter13/HMM/hmmViterbi_.m | 25 ---------------------- demo/ch13/hmm_demo.m | 17 +++++++-------- 9 files changed, 102 insertions(+), 152 deletions(-) delete mode 100644 chapter13/HMM/hmmFilter_.m delete mode 100644 chapter13/HMM/hmmRecSmoother_.m delete mode 100644 chapter13/HMM/hmmSmoother_.m delete mode 100644 chapter13/HMM/hmmViterbi_.m diff --git a/chapter13/HMM/hmmEm.m b/chapter13/HMM/hmmEm.m index c14a897..ef5829e 100644 --- a/chapter13/HMM/hmmEm.m +++ b/chapter13/HMM/hmmEm.m @@ -8,37 +8,48 @@ % llh: loglikelihood % Written by Mo Chen (sth4nth@gmail.com). n = size(x,2); -d = max(x); -X = sparse(x,1:n,1,d,n); - +X = sparse(x,1:n,1); +d = size(X,1); if isstruct(init) % init with a model A = init.A; E = init.E; s = init.s; elseif numel(init) == 1 % random init with latent k k = init; + s = normalize(rand(k,1),1); A = normalize(rand(k,k),2); E = normalize(rand(k,d),2); - s = normalize(rand(k,1),1); end -M = E*X; - tol = 1e-4; maxIter = 100; llh = -inf(1,maxIter); for iter = 2:maxIter + M = E*X; % E-step - [gamma,alpha,beta,c] = hmmSmoother_(M,A,s); - llh(iter) = sum(log(c(c>0))); + [gamma,alpha,beta,c] = hmmSmoother(M,A,s); + llh(iter) = mean(log(c)); if llh(iter)-llh(iter-1) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % M-step - A = normalize(A.*(alpha(:,1:n-1)*bsxfun(@times,beta(:,2:n).*M(:,2:n),1./c(2:end))'),2); % 13.19 s = gamma(:,1); % 13.18 - M = bsxfun(@times,gamma*X',1./sum(gamma,2))*X; + A = normalize(A.*(alpha(:,1:n-1)*(beta(:,2:n).*M(:,2:n)./c(2:n))'),2); % 13.19 13.43 13.65 + E = bsxfun(@times,gamma*X',1./sum(gamma,2)); % 13.23 end -llh = llh(2:iter); +model.s = s; model.A = A; model.E = E; -model.s = s; - +llh = llh(2:iter); +function [gamma, alpha, beta, c] = hmmSmoother(M, A, s) +[K,T] = size(M); +At = A'; +c = zeros(1,T); +alpha = zeros(K,T); +[alpha(:,1),c(1)] = normalize(s.*M(:,1),1); +for t = 2:T + [alpha(:,t),c(t)] = normalize((At*alpha(:,t-1)).*M(:,t),1); % 13.59 +end +beta = ones(K,T); +for t = T-1:-1:1 + beta(:,t) = A*(beta(:,t+1).*M(:,t+1))/c(t+1); % 13.62 +end +gamma = alpha.*beta; % 13.64 diff --git a/chapter13/HMM/hmmFilter.m b/chapter13/HMM/hmmFilter.m index 1be2c63..c6fd1da 100644 --- a/chapter13/HMM/hmmFilter.m +++ b/chapter13/HMM/hmmFilter.m @@ -1,20 +1,31 @@ -function [alpha, energy] = hmmFilter(x, model) -% HMM forward filtering algorithm. This is a wrapper function which transform input and call underlying algorithm -% Unlike the method described in the book of PRML, the alpha returned is the normalized version: alpha(t)=p(z_t|x_{1:t}) -% Computing unnormalized version alpha(t)=p(z_t,x_{1:t}) is numerical unstable, which grows exponential fast to infinity. +function [alpha, llh] = hmmFilter0(model, x) +% HMM forward filtering algorithm. +% The alpha returned by this function is the normalized version (posterior): alpha(t)=p(z_t|x_{1:t}) +% Unnormalized version (joint distribution): alpha(t)=p(z_t,x_{1:t}) is numerical unstable. % Input: % x: 1 x n integer vector which is the sequence of observations -% model: model structure +% model: model structure which contains +% model.s: k x 1 start probability vector +% model.A: k x k transition matrix +% model.E: k x d emission matrix % Output: % alpha: k x n matrix of posterior alpha(t)=p(z_t|x_{1:t}) -% enery: loglikelihood +% llh: loglikelihood % Written by Mo Chen (sth4nth@gmail.com). +s = model.s; A = model.A; E = model.E; -s = model.s; n = size(x,2); -d = max(x); -X = sparse(x,1:n,1,d,n); +X = sparse(x,1:n,1); M = E*X; -[alpha, energy] = hmmFilter_(M, A, s); \ No newline at end of file + +[K,T] = size(M); +At = A'; +llh = zeros(1,T); +alpha = zeros(K,T); +[alpha(:,1),llh(1)] = normalize(s.*M(:,1),1); +for t = 2:T + [alpha(:,t),llh(t)] = normalize((At*alpha(:,t-1)).*M(:,t),1); % 13.59 +end +llh = sum(log(llh(llh>0))); \ No newline at end of file diff --git a/chapter13/HMM/hmmFilter_.m b/chapter13/HMM/hmmFilter_.m deleted file mode 100644 index ffd8fcc..0000000 --- a/chapter13/HMM/hmmFilter_.m +++ /dev/null @@ -1,19 +0,0 @@ -function [alpha, energy] = hmmFilter_(M, A, s) -% Implmentation function of HMM forward filtering algorithm. -% Input: -% M: k x n emmision data matrix M=E*X -% A: k x k transition matrix -% s: k x 1 starting probability (prior) -% Output: -% alpha: k x n matrix of posterior alpha(t)=p(z_t|x_{1:t}) -% enery: loglikelihood -% Written by Mo Chen (sth4nth@gmail.com). -[K,T] = size(M); -At = A'; -energy = zeros(1,T); -alpha = zeros(K,T); -[alpha(:,1),energy(1)] = normalize(s.*M(:,1),1); -for t = 2:T - [alpha(:,t),energy(t)] = normalize((At*alpha(:,t-1)).*M(:,t),1); % 13.59 -end -energy = sum(log(energy(energy>0))); \ No newline at end of file diff --git a/chapter13/HMM/hmmRecSmoother_.m b/chapter13/HMM/hmmRecSmoother_.m deleted file mode 100644 index 8c4139b..0000000 --- a/chapter13/HMM/hmmRecSmoother_.m +++ /dev/null @@ -1,23 +0,0 @@ -function [ gamma, c ] = hmmRecSmoother_( M, A, s ) -% Forward-backward (recursive gamma no alpha-beta) alogrithm for HMM to compute posterior p(z_i|x) -% Input: -% x: 1xn observation -% s: kx1 starting probability of p(z_1|s) -% A: kxk transition probability -% E: kxd emission probability -% Output: -% gamma: 1xn posterier p(z_i|x) -% llh: loglikelihood or evidence lnp(x) -% Written by Mo Chen sth4nth@gmail.com -[K,T] = size(M); -At = A'; -c = zeros(1,T); % normalization constant -gamma = zeros(K,T); -[gamma(:,1),c(1)] = normalize(s.*M(:,1),1); -for t = 2:T - [gamma(:,t),c(t)] = normalize((At*gamma(:,t-1)).*M(:,t),1); % 13.59 -end -for t = T-1:-1:1 - gamma(:,t) = normalize(bsxfun(@times,A,gamma(:,t)),1)*gamma(:,t+1); -end - diff --git a/chapter13/HMM/hmmSmoother.m b/chapter13/HMM/hmmSmoother.m index aa904ed..01bbdac 100644 --- a/chapter13/HMM/hmmSmoother.m +++ b/chapter13/HMM/hmmSmoother.m @@ -1,24 +1,37 @@ -function [gamma, alpha, beta, c] = hmmSmoother(x, model) -% HMM smoothing alogrithm (normalized forward-backward or normalized alpha-beta algorithm). This is a wrapper function which transform input and call underlying algorithm -% Unlike the method described in the book of PRML, the alpha and beta -% returned is the normalized. -% Computing unnormalized version alpha and beta is numerical unstable, which grows exponential fast to infinity. +function [gamma, alpha, beta, c] = hmmSmoother0(model, x) +% HMM smoothing alogrithm (normalized forward-backward or normalized alpha-beta algorithm). +% The alpha and beta returned by this function are the normalized version. % Input: % x: 1 x n integer vector which is the sequence of observations -% model: model structure +% model: model structure which contains +% model.s: k x 1 start probability vector +% model.A: k x k transition matrix +% model.E: k x d emission matrix % Output: % gamma: k x n matrix of posterior gamma(t)=p(z_t,x_{1:T}) % alpha: k x n matrix of posterior alpha(t)=p(z_t|x_{1:T}) % beta: k x n matrix of posterior beta(t)=gamma(t)/alpha(t) -% c: loglikelihood +% c: 1 x n normalization constant vector % Written by Mo Chen (sth4nth@gmail.com). +s = model.s; A = model.A; E = model.E; -s = model.s; n = size(x,2); -d = max(x); -X = sparse(x,1:n,1,d,n); +X = sparse(x,1:n,1); M = E*X; -[gamma, alpha, beta, c] = hmmSmoother_(M, A, s); -% [gamma,c] = hmmRecSmoother_(M, A, s); \ No newline at end of file + +[K,T] = size(M); +At = A'; +c = zeros(1,T); % normalization constant +alpha = zeros(K,T); +[alpha(:,1),c(1)] = normalize(s.*M(:,1),1); +for t = 2:T + [alpha(:,t),c(t)] = normalize((At*alpha(:,t-1)).*M(:,t),1); % 13.59 +end +beta = ones(K,T); +for t = T-1:-1:1 + beta(:,t) = A*(beta(:,t+1).*M(:,t+1))/c(t+1); % 13.62 +end +gamma = alpha.*beta; % 13.64 + diff --git a/chapter13/HMM/hmmSmoother_.m b/chapter13/HMM/hmmSmoother_.m deleted file mode 100644 index f6d2a71..0000000 --- a/chapter13/HMM/hmmSmoother_.m +++ /dev/null @@ -1,29 +0,0 @@ -function [gamma, alpha, beta, c] = hmmSmoother_(M, A, s) -% Implmentation function HMM smoothing alogrithm. -% Unlike the method described in the book of PRML, the alpha and beta -% returned is the normalized. -% Computing unnormalized version alpha and beta is numerical unstable, which grows exponential fast to infinity. -% Input: -% M: k x n emmision data matrix M=E*X -% A: k x k transition matrix -% s: k x 1 start prior probability -% Output: -% gamma: k x n matrix of posterior gamma(t)=p(z_t,x_{1:T}) -% alpha: k x n matrix of posterior alpha(t)=p(z_t|x_{1:T}) -% beta: k x n matrix of posterior beta(t)=gamma(t)/alpha(t) -% c: loglikelihood -% Written by Mo Chen (sth4nth@gmail.com). -[K,T] = size(M); -At = A'; -c = zeros(1,T); % normalization constant -alpha = zeros(K,T); -[alpha(:,1),c(1)] = normalize(s.*M(:,1),1); -for t = 2:T - [alpha(:,t),c(t)] = normalize((At*alpha(:,t-1)).*M(:,t),1); % 13.59 -end -beta = ones(K,T); -for t = T-1:-1:1 - beta(:,t) = A*(beta(:,t+1).*M(:,t+1))/c(t+1); % 13.62 -end -gamma = alpha.*beta; % 13.64 - diff --git a/chapter13/HMM/hmmViterbi.m b/chapter13/HMM/hmmViterbi.m index 1f76a01..44ae94b 100644 --- a/chapter13/HMM/hmmViterbi.m +++ b/chapter13/HMM/hmmViterbi.m @@ -1,19 +1,31 @@ -function [z, llh] = hmmViterbi(x, model) -% Viterbi algorithm calculated in log scale to improve numerical stability. -% This is a wrapper function which transform input and call underlying algorithm +function [z, llh] = hmmViterbi(model, x) +% Viterbi algorithm (calculated in log scale to improve numerical stability). % Input: % x: 1 x n integer vector which is the sequence of observations -% model: model structure +% model: model structure which contains +% model.s: k x 1 start probability vector +% model.A: k x k transition matrix +% model.E: k x d emission matrix % Output: % z: 1 x n latent state % llh: loglikelihood % Written by Mo Chen (sth4nth@gmail.com). -A = model.A; -E = model.E; -s = model.s; - n = size(x,2); -d = max(x); -X = sparse(x,1:n,1,d,n); -M = E*X; -[z,llh] = hmmViterbi_(M, A, s); +X = sparse(x,1:n,1); +s = log(model.s); +A = log(model.A); +M = log(model.E*X); + +k = numel(s); +Z = zeros(k,n); +Z(:,1) = 1:k; +v = s(:)+M(:,1); +for t = 2:n + [v,idx] = max(bsxfun(@plus,A,v),[],1); % 13.68 + v = v(:)+M(:,t); + Z = Z(idx,:); + Z(:,t) = 1:k; +end +[llh,idx] = max(v); +z = Z(idx,:); + diff --git a/chapter13/HMM/hmmViterbi_.m b/chapter13/HMM/hmmViterbi_.m deleted file mode 100644 index 07480be..0000000 --- a/chapter13/HMM/hmmViterbi_.m +++ /dev/null @@ -1,25 +0,0 @@ -function [z, llh] = hmmViterbi_(M, A, s) -% Implmentation function of Viterbi algorithm. -% Input: -% M: k x n emmision data matrix M=E*X -% A: k x k transition matrix -% s: k x 1 starting probability (prior) -% Output: -% z: 1 x n latent state -% llh: loglikelihood -% Written by Mo Chen (sth4nth@gmail.com). -[k,n] = size(M); -Z = zeros(k,n); -A = log(A); -M = log(M); -Z(:,1) = 1:k; -v = log(s(:))+M(:,1); -for t = 2:n - [v,idx] = max(bsxfun(@plus,A,v),[],1); % 13.68 - v = v(:)+M(:,t); - Z = Z(idx,:); - Z(:,t) = 1:k; -end -[llh,idx] = max(v); -z = Z(idx,:); - diff --git a/demo/ch13/hmm_demo.m b/demo/ch13/hmm_demo.m index 58156b8..66e994f 100644 --- a/demo/ch13/hmm_demo.m +++ b/demo/ch13/hmm_demo.m @@ -3,13 +3,12 @@ d = 3; k = 2; n = 10000; -[x, model] = hmmRnd(d, k, n); -%% -[z,p] = hmmViterbi(x,model); -%% -[alpha,llh] = hmmFilter(x,model); -%% -[gamma,alpha,beta,c] = hmmSmoother(x,model); -%% -[model, llh] = hmmEm(x,k); +%% Viterbi algorithm +[z, llh] = hmmViterbi(model, x); +%% HMM filter (forward algorithm) +[alpha, llh] = hmmFilter(model, x); +%% HMM smoother (forward backward) +[gamma,alpha,beta,c] = hmmSmoother(model, x); +%% Baum-Welch algorithm +[model, llh] = hmmEm(x,init); plot(llh) From 0d88ef5e06ba5dd492917e9cff6e7374c8c6eb88 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 9 Mar 2017 23:54:39 +0800 Subject: [PATCH 008/119] minor fix for function names of HMM --- chapter13/HMM/hmmFilter.m | 2 +- chapter13/HMM/hmmSmoother.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter13/HMM/hmmFilter.m b/chapter13/HMM/hmmFilter.m index c6fd1da..dad93a2 100644 --- a/chapter13/HMM/hmmFilter.m +++ b/chapter13/HMM/hmmFilter.m @@ -1,4 +1,4 @@ -function [alpha, llh] = hmmFilter0(model, x) +function [alpha, llh] = hmmFilter(model, x) % HMM forward filtering algorithm. % The alpha returned by this function is the normalized version (posterior): alpha(t)=p(z_t|x_{1:t}) % Unnormalized version (joint distribution): alpha(t)=p(z_t,x_{1:t}) is numerical unstable. diff --git a/chapter13/HMM/hmmSmoother.m b/chapter13/HMM/hmmSmoother.m index 01bbdac..fb97ec5 100644 --- a/chapter13/HMM/hmmSmoother.m +++ b/chapter13/HMM/hmmSmoother.m @@ -1,4 +1,4 @@ -function [gamma, alpha, beta, c] = hmmSmoother0(model, x) +function [gamma, alpha, beta, c] = hmmSmoother(model, x) % HMM smoothing alogrithm (normalized forward-backward or normalized alpha-beta algorithm). % The alpha and beta returned by this function are the normalized version. % Input: From ccedd275ad070e3b89662bc1a8dc35657e4f6083 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 00:20:53 +0800 Subject: [PATCH 009/119] update LDS --- chapter13/LDS/TODO.txt | 2 -- chapter13/LDS/kalmanFilter.m | 8 +++++--- chapter13/LDS/kalmanSmoother.m | 6 ++++-- chapter13/LDS/ldsEm.m | 21 ++++++++++++++++++--- demo/ch13/lds_demo.m | 10 +++++----- 5 files changed, 32 insertions(+), 15 deletions(-) delete mode 100644 chapter13/LDS/TODO.txt diff --git a/chapter13/LDS/TODO.txt b/chapter13/LDS/TODO.txt deleted file mode 100644 index 19e6c48..0000000 --- a/chapter13/LDS/TODO.txt +++ /dev/null @@ -1,2 +0,0 @@ -(1) test against matlab implementation of kalman filter -(2) simplify ldsEm with less parameters (G=diag(g), S=I) diff --git a/chapter13/LDS/kalmanFilter.m b/chapter13/LDS/kalmanFilter.m index 83cf979..0005ee6 100644 --- a/chapter13/LDS/kalmanFilter.m +++ b/chapter13/LDS/kalmanFilter.m @@ -1,5 +1,7 @@ -function [mu, V, llh] = kalmanFilter(X, model) -% Kalman filter +function [mu, V, llh] = kalmanFilter(model, X) +% Kalman filter (forward algorithm for linear dynamic system) +% NOTE: This is the exact implementation of the Kalman filter algorithm in PRML. +% However, this algorithm is not practical. It is numerical unstable. % Input: % X: d x n data matrix % model: model structure @@ -23,7 +25,7 @@ I = eye(k); PC = P*C'; -R = (C*PC+S); +R = C*PC+S; K = PC/R; % 13.97 mu(:,1) = mu0+K*(X(:,1)-C*mu0); % 13.94 V(:,:,1) = (I-K*C)*P; % 13.95 diff --git a/chapter13/LDS/kalmanSmoother.m b/chapter13/LDS/kalmanSmoother.m index c0aa02a..8254230 100644 --- a/chapter13/LDS/kalmanSmoother.m +++ b/chapter13/LDS/kalmanSmoother.m @@ -1,5 +1,7 @@ -function [nu, U, Ezz, Ezy, llh] = kalmanSmoother(X, model) +function [nu, U, Ezz, Ezy, llh] = kalmanSmoother(model, X) % Kalman smoother (forward-backward algorithm for linear dynamic system) +% NOTE: This is the exact implementation of the Kalman smoother algorithm in PRML. +% However, this algorithm is not practical. It is numerical unstable. % Input: % X: d x n data matrix % model: model structure @@ -28,7 +30,7 @@ % forward PC = P0*C'; -R = (C*PC+S); +R = C*PC+S; K = PC/R; mu(:,1) = mu0+K*(X(:,1)-C*mu0); V(:,:,1) = (I-K*C)*P0; diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index e9d548b..7f283e4 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -1,5 +1,8 @@ -function [model, llh] = ldsEm(X, model) +function [model, llh] = ldsEm(X, init) % EM algorithm for parameter estimation of linear dynamic system. +% NOTE: This is the exact implementation of the EM algorithm in PRML. +% However, this algorithm is not practical. It is numerical unstable and +% there is too much redundant degree of freedom. % Input: % X: d x n data matrix % model: prior model structure @@ -7,12 +10,24 @@ % model: trained model structure % llh: loglikelihood % Written by Mo Chen (sth4nth@gmail.com). -tol = 1e-4; +d = size(X,1); +if isstruct(init) % init with a model + model = init; +elseif numel(init) == 1 % random init with latent k + k = init; + model.A = randn(k,k); + model.G = iwishrnd(eye(k),k); + model.C = randn(d,k); + model.S = iwishrnd(eye(d),d); + model.mu0 = randn(k,1); + model.P0 = iwishrnd(eye(k),k); +end +tol = 1e-2; maxIter = 100; llh = -inf(1,maxIter); for iter = 2:maxIter % E-step - [nu, U, Ezz, Ezy, llh(iter)] = kalmanSmoother(X, model); + [nu, U, Ezz, Ezy, llh(iter)] = kalmanSmoother(model,X); if llh(iter)-llh(iter-1) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % M-step model = maximization(X, nu, U, Ezz, Ezy); diff --git a/demo/ch13/lds_demo.m b/demo/ch13/lds_demo.m index 4181123..8c0b30e 100644 --- a/demo/ch13/lds_demo.m +++ b/demo/ch13/lds_demo.m @@ -6,9 +6,9 @@ n = 100; [X,Z,model] = ldsRnd(d,k,n); -[mu, V, llh] = kalmanFilter(X, model); - -[nu, U, Ezz, Ezy, llh] = kalmanSmoother(X, model); -[model, llh] = ldsEm(X, model); -plot(llh); +[mu, V, llh] = kalmanFilter(model, X); +[nu, U, Ezz, Ezy, llh] = kalmanSmoother(model, X); +% [model, llh] = ldsEm(X,k); +% plot(llh); +% From 88f8e5a29248a646cdddbbd69a0aa1a9ad200295 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 00:24:18 +0800 Subject: [PATCH 010/119] minor fix --- chapter09/mixGaussPred.m | 2 +- demo/ch09/mixGaussEm_demo.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter09/mixGaussPred.m b/chapter09/mixGaussPred.m index ebebaa0..f614081 100644 --- a/chapter09/mixGaussPred.m +++ b/chapter09/mixGaussPred.m @@ -1,4 +1,4 @@ -function [label, R] = mixGaussPred(X, model) +function [label, R] = mixGaussPred(model, X) % Predict label and responsibility for Gaussian mixture model. % Input: % X: d x n data matrix diff --git a/demo/ch09/mixGaussEm_demo.m b/demo/ch09/mixGaussEm_demo.m index 95a14bb..a6ab295 100644 --- a/demo/ch09/mixGaussEm_demo.m +++ b/demo/ch09/mixGaussEm_demo.m @@ -16,6 +16,6 @@ figure; plotClass(X1,z1); % predict -z2 = mixGaussPred(X2,model); +z2 = mixGaussPred(model,X2); figure; plotClass(X2,z2); \ No newline at end of file From 170e45b05df2a7df180ccbcdb60b6ba507154ab6 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 00:26:06 +0800 Subject: [PATCH 011/119] update todo --- TODO.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/TODO.txt b/TODO.txt index 4b7292b..de2a55b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,3 +1 @@ -TODO: -ch13: LDS numerical stability (numerical stable (square root) version of Kalman filter and smoother) -ch05: MLP bias and gradient unit +ch05: MLP bias and gradient unit (2nd order) From 018fe824d96943a50c4a6b3adc0e4b7564675192 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 00:34:13 +0800 Subject: [PATCH 012/119] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d87c6e7..a8c7b35 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ Introduction This package is a Matlab implementation of the algorithms described in the classical machine learning textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). +Note: this package requires Matlab R2016b or later, since it utilizes a new syntax of Matlab. + Description ------- The design goal of the code are as follows: From 362845e062f61749775fc80821bf0166482a540c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 02:03:36 +0800 Subject: [PATCH 013/119] add sample function of mixture model from prior --- chapter11/GaussWishart.m | 15 +++++++++++++++ chapter11/mixDpGb.m | 4 ++-- chapter11/mixGaussSample.m | 18 ++++++++++++++++++ demo/ch11/mixGaussGb_demo.m | 13 +++++++++---- 4 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 chapter11/mixGaussSample.m diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index b802718..a867735 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -20,6 +20,10 @@ function obj = clone(obj) end + function d = dim(obj) + d = numel(obj.m_); + end + function obj = addData(obj, X) kappa0 = obj.kappa_; m0 = obj.m_; @@ -89,5 +93,16 @@ c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(U))))/2; y = c+o; end + + function [mu, Sigma] = sample(obj) +% Sample a Gaussian distribution from GaussianWishart prior + kappa = obj.kappa_; + m = obj.m_; + nu = obj.nu_; + U = obj.U_; + + Sigma = iwishrnd(U'*U,nu); + mu = gaussRnd(m,Sigma/kappa); + end end end diff --git a/chapter11/mixDpGb.m b/chapter11/mixDpGb.m index 4396c7b..e0e3ba2 100644 --- a/chapter11/mixDpGb.m +++ b/chapter11/mixDpGb.m @@ -14,7 +14,7 @@ n = size(X,2); [label,Theta,w] = mixDpGbOl(X,alpha,theta); nk = n*w; -maxIter = 200; +maxIter = 50; llh = zeros(1,maxIter); for iter = 1:maxIter for i = randperm(n) @@ -34,7 +34,7 @@ llh(iter) = llh(iter)+sum(p-log(n)); k = discreteRnd(exp(p-logsumexp(p))); if k == numel(Theta)+1 % add extra cluster - Theta{k} = theta.clone.addSample(x); + Theta{k} = theta.clone().addSample(x); nk = [nk,1]; else Theta{k} = Theta{k}.addSample(x); diff --git a/chapter11/mixGaussSample.m b/chapter11/mixGaussSample.m new file mode 100644 index 0000000..44d9aa5 --- /dev/null +++ b/chapter11/mixGaussSample.m @@ -0,0 +1,18 @@ +function [X, z] = mixGaussSample(Theta, w, n ) +% Genarate samples form a Gaussian mixture model with GaussianWishart prior. +% Input: +% Theta: cell of GaussianWishart priors of components +% w: weight of components +% n: number of data +% Output: +% X: d x n data matrix +% z: 1 x n response variable +% Written by Mo Chen (sth4nth@gmail.com). +z = discreteRnd(w,n); +d = Theta{1}.dim(); +X = zeros(d,n); +for i = 1:numel(w) + idx = z==i; + [mu,Sigma] = Theta{i}.sample(); % invpd(wishrnd(W0,v0)); + X(:,idx) = gaussRnd(mu,Sigma,sum(idx)); +end diff --git a/demo/ch11/mixGaussGb_demo.m b/demo/ch11/mixGaussGb_demo.m index 326c71c..9f8154f 100644 --- a/demo/ch11/mixGaussGb_demo.m +++ b/demo/ch11/mixGaussGb_demo.m @@ -3,9 +3,14 @@ d = 2; k = 3; n = 500; -[X,label] = mixGaussRnd(d,k,n); -plotClass(X,label); +[X,z] = mixGaussRnd(d,k,n); +plotClass(X,z); -[y,model] = mixGaussGb(X); +[z,Theta,w,llh] = mixGaussGb(X); figure -plotClass(X,y); \ No newline at end of file +plotClass(X,z); + +[X,z] = mixGaussSample(Theta,w,n); +figure +plotClass(X,z); + From 5074a1122cf75804ce7178d2d7654a976a34653d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 02:06:51 +0800 Subject: [PATCH 014/119] remove todo.txt --- TODO.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 TODO.txt diff --git a/TODO.txt b/TODO.txt deleted file mode 100644 index de2a55b..0000000 --- a/TODO.txt +++ /dev/null @@ -1 +0,0 @@ -ch05: MLP bias and gradient unit (2nd order) From 9e70e291a080b4465c32fadf4232209b409efb6e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 02:11:42 +0800 Subject: [PATCH 015/119] minor tweak logistic regression --- chapter04/logitBinPred.m | 2 +- chapter04/logitMnPred.m | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/chapter04/logitBinPred.m b/chapter04/logitBinPred.m index 01ee10b..14cbed9 100644 --- a/chapter04/logitBinPred.m +++ b/chapter04/logitBinPred.m @@ -9,6 +9,6 @@ % Written by Mo Chen (sth4nth@gmail.com). X = [X;ones(1,size(X,2))]; w = model.w; -p = exp(-log1pexp(-w'*X)); +p = sigmoid(w'*X); y = round(p); diff --git a/chapter04/logitMnPred.m b/chapter04/logitMnPred.m index 60010e0..f30db00 100644 --- a/chapter04/logitMnPred.m +++ b/chapter04/logitMnPred.m @@ -9,6 +9,5 @@ % Written by Mo Chen (sth4nth@gmail.com). W = model.W; X = [X; ones(1,size(X,2))]; -A = W'*X; -P = exp(bsxfun(@minus,A,logsumexp(A,1))); +P = softmax(W'*X); [~, y] = max(P,[],1); \ No newline at end of file From 4db85d6585df4072b04431b0772541049fb6b835 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 15:51:05 +0800 Subject: [PATCH 016/119] fix hmm demo --- chapter13/HMM/hmmEm.m | 2 +- demo/ch13/hmm_demo.m | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chapter13/HMM/hmmEm.m b/chapter13/HMM/hmmEm.m index ef5829e..455106b 100644 --- a/chapter13/HMM/hmmEm.m +++ b/chapter13/HMM/hmmEm.m @@ -32,7 +32,7 @@ % M-step s = gamma(:,1); % 13.18 A = normalize(A.*(alpha(:,1:n-1)*(beta(:,2:n).*M(:,2:n)./c(2:n))'),2); % 13.19 13.43 13.65 - E = bsxfun(@times,gamma*X',1./sum(gamma,2)); % 13.23 + E = (gamma*X')./sum(gamma,2); % 13.23 end model.s = s; model.A = A; diff --git a/demo/ch13/hmm_demo.m b/demo/ch13/hmm_demo.m index 66e994f..59e68a5 100644 --- a/demo/ch13/hmm_demo.m +++ b/demo/ch13/hmm_demo.m @@ -3,6 +3,7 @@ d = 3; k = 2; n = 10000; +[x,model] = hmmRnd(d,k,n); %% Viterbi algorithm [z, llh] = hmmViterbi(model, x); %% HMM filter (forward algorithm) @@ -10,5 +11,5 @@ %% HMM smoother (forward backward) [gamma,alpha,beta,c] = hmmSmoother(model, x); %% Baum-Welch algorithm -[model, llh] = hmmEm(x,init); +[model, llh] = hmmEm(x,2); plot(llh) From 63b08a17d9c0a663c2eee106bfc69e33c4aedc20 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 15:51:50 +0800 Subject: [PATCH 017/119] fix hmm demo --- demo/ch13/hmm_demo.m | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/demo/ch13/hmm_demo.m b/demo/ch13/hmm_demo.m index 59e68a5..025f009 100644 --- a/demo/ch13/hmm_demo.m +++ b/demo/ch13/hmm_demo.m @@ -1,8 +1,5 @@ % demos for HMM in ch13 - -d = 3; -k = 2; -n = 10000; +d = 3; k = 2; n = 10000; [x,model] = hmmRnd(d,k,n); %% Viterbi algorithm [z, llh] = hmmViterbi(model, x); @@ -11,5 +8,5 @@ %% HMM smoother (forward backward) [gamma,alpha,beta,c] = hmmSmoother(model, x); %% Baum-Welch algorithm -[model, llh] = hmmEm(x,2); +[model, llh] = hmmEm(x,k); plot(llh) From df0f61ae34b78daf07282c36a1c74ecbb4d32140 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 10 Mar 2017 16:29:28 +0800 Subject: [PATCH 018/119] working on mlp --- chapter05/mlp.m | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/chapter05/mlp.m b/chapter05/mlp.m index 65c6467..294443f 100644 --- a/chapter05/mlp.m +++ b/chapter05/mlp.m @@ -1,5 +1,5 @@ -function [model, mse] = mlp(X, Y, h) -% Multilayer perceptron +function [model, mse] = mlp(X, Y, h, eta) +% Train a multilayer perceptron neural network % Input: % X: d x n data matrix % Y: p x n response matrix @@ -8,6 +8,9 @@ % model: model structure % mse: mean square error % Written by Mo Chen (sth4nth@gmail.com). +if nargin < 4 + eta = 1/size(X,2); +end h = [size(X,1);h(:);size(Y,1)]; L = numel(h); W = cell(L-1); @@ -16,8 +19,7 @@ end Z = cell(L); Z{1} = X; -eta = 1/size(X,2); -maxiter = 2000; +maxiter = 200; mse = zeros(1,maxiter); for iter = 1:maxiter % forward @@ -26,7 +28,7 @@ end % backward E = Y-Z{L}; - mse(iter) = mean(dot(E(:),E(:))); + mse(iter) = mean(E.*E); for l = L-1:-1:1 df = Z{l+1}.*(1-Z{l+1}); dG = df.*E; From 3979a238b0be614545e057a775849cba83d8ce10 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 01:17:42 +0800 Subject: [PATCH 019/119] minor fix --- chapter05/mlp.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter05/mlp.m b/chapter05/mlp.m index 294443f..e19105c 100644 --- a/chapter05/mlp.m +++ b/chapter05/mlp.m @@ -28,7 +28,7 @@ end % backward E = Y-Z{L}; - mse(iter) = mean(E.*E); + mse(iter) = mean(dot(E,E),1); for l = L-1:-1:1 df = Z{l+1}.*(1-Z{l+1}); dG = df.*E; From 3ca6be8655e249581e8f9774ec7f3a6dfc6e1f10 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 15:03:03 +0800 Subject: [PATCH 020/119] tweak kmeans --- chapter06/knKmeans.m | 16 +++++++--------- chapter09/kmeans.m | 20 +++++++++----------- chapter09/kmeansPred.m | 8 ++++---- demo/ch06/knLin_demo.m | 4 ++-- demo/ch09/kmeans_demo.m | 2 +- 5 files changed, 23 insertions(+), 27 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 62d4c5c..2796aba 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,5 +1,5 @@ -function [label, energy, model] = knKmeans(X, init, kn) -% Perform kernel k-means clustering. +function [label, model, energy] = knKmeans(X, init, kn) +% Perform kernel kmeans clustering. % Input: % K: n x n kernel matrix % init: either number of clusters (k) or initial label (1xn) @@ -21,15 +21,13 @@ kn = @knGauss; end K = kn(X,X); -last = 0; +last = zeros(1,n); while any(label ~= last) - [u,~,label(:)] = unique(label); % remove empty clusters - k = numel(u); - E = sparse(label,1:n,1,k,n,n); - E = spdiags(1./sum(E,2),0,k,k)*E; + [~,~,last(:)] = unique(label); % remove empty clusters + E = sparse(last,1:n,1); + E = E./sum(E,2); T = E*K; - last = label; - [val, label] = max(bsxfun(@minus,T,diag(T*E')/2),[],1); + [val, label] = max(T-diag(T*E')/2,[],1); end energy = trace(K)-2*sum(val); if nargout == 3 diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index f5175be..29a2e6b 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -1,5 +1,5 @@ -function [label, energy, model] = kmeans(X, init) -% Perform k-means clustering. +function [label, m, energy] = kmeans(X, init) +% Perform kmeans clustering. % Input: % X: d x n data matrix % init: k number of clusters or label (1 x n vector) @@ -9,20 +9,18 @@ % model: trained model structure % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); +idx = 1:n; +last = zeros(1,n); if numel(init)==1 k = init; label = ceil(k*rand(1,n)); elseif numel(init)==n label = init; end -last = 0; while any(label ~= last) - [u,~,label(:)] = unique(label); % remove empty clusters - k = numel(u); - E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix - m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute centers - last = label; - [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign labels + [~,~,last(:)] = unique(label); % remove empty clusters + E = sparse(idx,last,1); % transform label into indicator matrix + m = X*(E./sum(E,1)); % compute centers + [val,label] = min(dot(m,m,1)'/2-m'*X,[],1); % assign labels end -energy = dot(X(:),X(:))-2*sum(val); -model.means = m; \ No newline at end of file +energy = dot(X(:),X(:),1)+2*sum(val); \ No newline at end of file diff --git a/chapter09/kmeansPred.m b/chapter09/kmeansPred.m index fc71464..83dc633 100644 --- a/chapter09/kmeansPred.m +++ b/chapter09/kmeansPred.m @@ -1,11 +1,11 @@ -function [label, energy] = kmeansPred(model, Xt) +function [label, energy] = kmeansPred(m, X) % Prediction for kmeans clusterng % Input: -% model: trained model structure -% Xt: d x n testing data +% model: dx k cluster center matrix +% X: d x n testing data % Output: % label: 1 x n cluster label % energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). -[val,label] = min(sqdist(model.means, Xt)); +[val,label] = min(dot(X,X,1)+dot(m,m,1)'-2*m'*X,[],1); % assign labels energy = sum(val); \ No newline at end of file diff --git a/demo/ch06/knLin_demo.m b/demo/ch06/knLin_demo.m index 1073a74..9ae12b3 100644 --- a/demo/ch06/knLin_demo.m +++ b/demo/ch06/knLin_demo.m @@ -25,8 +25,8 @@ n = 500; [X,y] = kmeansRnd(d,k,n); init = ceil(k*rand(1,n)); -[y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); -[y_lin,en_lin,model_lin] = kmeans(X,init); +[y_kn,model_kn,en_kn] = knKmeans(X,init,@knLin); +[y_lin,model_lin,en_lin] = kmeans(X,init); idx = 1:2:n; Xt = X(:,idx); diff --git a/demo/ch09/kmeans_demo.m b/demo/ch09/kmeans_demo.m index 3083b94..4c22e94 100644 --- a/demo/ch09/kmeans_demo.m +++ b/demo/ch09/kmeans_demo.m @@ -3,7 +3,7 @@ k = 3; n = 5000; [X,label] = kmeansRnd(d,k,n); -y = kmeans(X,k); +y = litekmeans(X,k); plotClass(X,label); figure; plotClass(X,y); From e0593a1f6ba897e5dc38abb41ded6b62c407545c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 15:09:24 +0800 Subject: [PATCH 021/119] tweak kmedoids --- chapter09/kmedoids.m | 15 +++++++-------- demo/ch09/kmedoids_demo.m | 4 +++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/chapter09/kmedoids.m b/chapter09/kmedoids.m index e633a1f..74d269c 100644 --- a/chapter09/kmedoids.m +++ b/chapter09/kmedoids.m @@ -1,12 +1,12 @@ -function [label, energy, index] = kmedoids(X, init) +function [label, index, energy] = kmedoids0(X, init) % Perform k-medoids clustering. % Input: % X: d x n data matrix % init: k number of clusters or label (1 x n vector) % Output: % label: 1 x n cluster label -% energy: optimization target value % index: index of medoids +% energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). [d,n] = size(X); if numel(init)==1 @@ -15,15 +15,14 @@ elseif numel(init)==n label = init; end -X = bsxfun(@minus,X,mean(X,2)); % reduce chance of numerical problems +X = X-mean(X,2); % reduce chance of numerical problems v = dot(X,X,1); -D = bsxfun(@plus,v,v')-2*(X'*X); % Euclidean distance matrix +D = v+v'-2*(X'*X); % Euclidean distance matrix D(sub2ind([d,d],1:d,1:d)) = 0; % reduce chance of numerical problems -last = 0; +last = zeros(1,n); while any(label ~= last) - [u,~,label(:)] = unique(label); % remove empty clusters - [~, index] = min(D*sparse(1:n,label,1,n,numel(u),n),[],1); % find k medoids - last = label; + [~,~,last(:)] = unique(label); % remove empty clusters + [~, index] = min(D*sparse(1:n,last,1),[],1); % find k medoids [val, label] = min(D(index,:),[],1); % assign labels end energy = sum(val); diff --git a/demo/ch09/kmedoids_demo.m b/demo/ch09/kmedoids_demo.m index 90c764f..1f36b16 100644 --- a/demo/ch09/kmedoids_demo.m +++ b/demo/ch09/kmedoids_demo.m @@ -3,7 +3,9 @@ k = 3; n = 5000; [X,label] = kmeansRnd(d,k,n); -y = kmedoids(X,k); +init = ceil(k*rand(1,n)); +[y, idx, v] = kmedoids(X,init); plotClass(X,label); figure; plotClass(X,y); + From 634f7b66f95e55a3747c9eef0b3aef7ebd96fda8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 15:09:54 +0800 Subject: [PATCH 022/119] tweak kmedoids --- chapter09/kmedoids.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter09/kmedoids.m b/chapter09/kmedoids.m index 74d269c..7499905 100644 --- a/chapter09/kmedoids.m +++ b/chapter09/kmedoids.m @@ -1,4 +1,4 @@ -function [label, index, energy] = kmedoids0(X, init) +function [label, index, energy] = kmedoids(X, init) % Perform k-medoids clustering. % Input: % X: d x n data matrix From d854233615874f010766fd983cdb5570b1e84162 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 15:17:54 +0800 Subject: [PATCH 023/119] tweak knkmeans --- chapter06/knKmeans.m | 2 +- demo/ch06/knLin_demo.m | 78 +++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 2796aba..80b6d5a 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -27,7 +27,7 @@ E = sparse(last,1:n,1); E = E./sum(E,2); T = E*K; - [val, label] = max(T-diag(T*E')/2,[],1); + [val, label] = max(T-dot(T,E,2)/2,[],1); end energy = trace(K)-2*sum(val); if nargout == 3 diff --git a/demo/ch06/knLin_demo.m b/demo/ch06/knLin_demo.m index 9ae12b3..9259164 100644 --- a/demo/ch06/knLin_demo.m +++ b/demo/ch06/knLin_demo.m @@ -1,23 +1,23 @@ -%% Kernel regression with linear kernel is EQUIVALENT to linear regression -clear; close all; -n = 100; -x = linspace(0,2*pi,n); % test data -t = sin(x)+rand(1,n)/2; - -lambda = 1e-4; -model_kn = knReg(x,t,lambda,@knLin); -model_lin = linReg(x,t,lambda); - -idx = 1:2:n; -xt = x(:,idx); -tt = t(idx); - -[y_kn, sigma_kn,p_kn] = knRegPred(model_kn,xt,tt); -[y_lin, sigma_lin,p_lin] = linRegPred(model_lin,xt,tt); - -maxdiff(y_kn,y_lin) -maxdiff(sigma_kn,sigma_lin) -maxdiff(p_kn,p_lin) +% %% Kernel regression with linear kernel is EQUIVALENT to linear regression +% clear; close all; +% n = 100; +% x = linspace(0,2*pi,n); % test data +% t = sin(x)+rand(1,n)/2; +% +% lambda = 1e-4; +% model_kn = knReg(x,t,lambda,@knLin); +% model_lin = linReg(x,t,lambda); +% +% idx = 1:2:n; +% xt = x(:,idx); +% tt = t(idx); +% +% [y_kn, sigma_kn,p_kn] = knRegPred(model_kn,xt,tt); +% [y_lin, sigma_lin,p_lin] = linRegPred(model_lin,xt,tt); +% +% maxdiff(y_kn,y_lin) +% maxdiff(sigma_kn,sigma_lin) +% maxdiff(p_kn,p_lin) %% Kernel kmeans with linear kernel is EQUIVALENT to kmeans clear; close all; d = 2; @@ -40,22 +40,22 @@ maxdiff(t_kn,t_lin) maxdiff(ent_kn,ent_lin) %% Kernel PCA with linear kernel is EQUIVALENT TO PCA -clear; close all; -d = 10; -q = 2; -n = 500; -X = randn(d,n); - - -model_kn = knPca(X,q,@knLin); -idx = 1:2:n; -Xt = X(:,idx); - -Y_kn = knPcaPred(model_kn,Xt); - -[U,L,mu,mse] = pca(X,q); -Y_lin = U'*bsxfun(@minus,Xt,mu); % projection - - -R = Y_lin/Y_kn; % the results are equivalent up to a rotation. -maxdiff(R*R', eye(q)) +% clear; close all; +% d = 10; +% q = 2; +% n = 500; +% X = randn(d,n); +% +% +% model_kn = knPca(X,q,@knLin); +% idx = 1:2:n; +% Xt = X(:,idx); +% +% Y_kn = knPcaPred(model_kn,Xt); +% +% [U,L,mu,mse] = pca(X,q); +% Y_lin = U'*bsxfun(@minus,Xt,mu); % projection +% +% +% R = Y_lin/Y_kn; % the results are equivalent up to a rotation. +% maxdiff(R*R', eye(q)) From d051acda7364ad981093d917494c0b5152028b6e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 17:23:12 +0800 Subject: [PATCH 024/119] tweak sqdist --- common/sqdist.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/sqdist.m b/common/sqdist.m index 31668b5..e836d50 100644 --- a/common/sqdist.m +++ b/common/sqdist.m @@ -5,4 +5,4 @@ % Output: % D: n1 x n2 square Euclidean distance matrix % Written by Mo Chen (sth4nth@gmail.com). -D = bsxfun(@plus,dot(X2,X2,1),dot(X1,X1,1)')-2*(X1'*X2); +D = dot(X1,X1,1)'+dot(X2,X2,1)-2*(X1'*X2); From 57ec8a3352238a2fc35fadd22cab6a77922a5f3f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 17:46:49 +0800 Subject: [PATCH 025/119] tweak nb --- chapter08/nbBern.m | 9 ++++----- chapter08/nbBernPred.m | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/chapter08/nbBern.m b/chapter08/nbBern.m index 50a1726..51e9dee 100644 --- a/chapter08/nbBern.m +++ b/chapter08/nbBern.m @@ -6,12 +6,11 @@ % Output: % model: trained model structure % Written by Mo Chen (sth4nth@gmail.com). -k = max(t); n = size(X,2); -E = sparse(t,1:n,1,k,n,n); -nk = full(sum(E,2)); -w = nk/n; -mu = full(sparse(X)*E'*spdiags(1./nk,0,k,k)); +E = sparse(1:n,t,1); +nk = sum(E,1); +w = full(nk/n); +mu = X*(E./nk); model.mu = mu; % d x k means model.w = w; \ No newline at end of file diff --git a/chapter08/nbBernPred.m b/chapter08/nbBernPred.m index b7c5890..2f308df 100644 --- a/chapter08/nbBernPred.m +++ b/chapter08/nbBernPred.m @@ -10,6 +10,6 @@ w = model.w; X = sparse(X); R = log(mu)'*X+log(1-mu)'*(1-X); -R = bsxfun(@plus,R,log(w)); +R = bsxfun(@plus,R,log(w(:))); [~,y] = max(R,[],1); From 467b74c5a8f5c819136e4368354deb9e2fa246ef Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 17:47:45 +0800 Subject: [PATCH 026/119] tweak nb --- chapter08/nbBern.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter08/nbBern.m b/chapter08/nbBern.m index 51e9dee..f260b34 100644 --- a/chapter08/nbBern.m +++ b/chapter08/nbBern.m @@ -9,7 +9,7 @@ n = size(X,2); E = sparse(1:n,t,1); nk = sum(E,1); -w = full(nk/n); +w = full(nk)/n; mu = X*(E./nk); model.mu = mu; % d x k means From 0c32f995d30d68a3f19f29b426b7234f5cab0af8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 18:05:00 +0800 Subject: [PATCH 027/119] tweak mixBernEM --- chapter09/mixBernEm.m | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/chapter09/mixBernEm.m b/chapter09/mixBernEm.m index 65612e4..4c1f36d 100644 --- a/chapter09/mixBernEm.m +++ b/chapter09/mixBernEm.m @@ -13,7 +13,7 @@ X = sparse(X); n = size(X,2); label = ceil(k*rand(1,n)); % random initialization -R = sparse(label,1:n,1,k,n,n); +R = full(sparse(1:n,label,1)); tol = 1e-8; maxiter = 500; llh = -inf(1,maxiter); @@ -22,23 +22,20 @@ [R, llh(iter)] = expectation(X,model); if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end; end -[~,label(:)] = max(R,[],1); +[~,label(:)] = max(R,[],2); llh = llh(2:iter); function [R, llh] = expectation(X, model) mu = model.mu; w = model.w; -n = size(X,2); -R = full(log(mu)'*X+log(1-mu)'*(1-X)); -R = bsxfun(@plus,R,log(w)); -T = logsumexp(R,1); -llh = sum(T)/n; % loglikelihood -R = exp(bsxfun(@minus,R,T)); +R = X'*log(mu)+(1-X)'*log(1-mu)+log(w); +T = logsumexp(R,2); +llh = mean(T); % loglikelihood +R = exp(R-T); function model = maximization(X, R) -n = size(R,2); -nk = full(sum(R,2)); -w = nk/n; -mu = bsxfun(@times,full(X*R'),1./nk'); +nk = sum(R,1); +w = nk/sum(nk); +mu = (X*R)./nk; model.mu = mu; model.w = w; \ No newline at end of file From 9465a1f4998ac993529c1fbf264516f82239afab Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 18:07:57 +0800 Subject: [PATCH 028/119] tweak nbBernPred --- chapter08/nbBernPred.m | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/chapter08/nbBernPred.m b/chapter08/nbBernPred.m index 2f308df..525a890 100644 --- a/chapter08/nbBernPred.m +++ b/chapter08/nbBernPred.m @@ -8,8 +8,5 @@ % Written by Mo Chen (sth4nth@gmail.com). mu = model.mu; w = model.w; -X = sparse(X); -R = log(mu)'*X+log(1-mu)'*(1-X); -R = bsxfun(@plus,R,log(w(:))); -[~,y] = max(R,[],1); +[~,y] = max(log(mu)'*X+log(1-mu)'*(1-X)+log(w(:)),[],1); From be1bbc32f311956b9295e3c05d51b1e84345027e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 18:19:36 +0800 Subject: [PATCH 029/119] tweak logGauss --- chapter02/logGauss.m | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m index edefd89..912e1c2 100644 --- a/chapter02/logGauss.m +++ b/chapter02/logGauss.m @@ -7,21 +7,13 @@ % Output: % y: 1 x n probability density in logrithm scale y=log p(x) % Written by Mo Chen (sth4nth@gmail.com). -[d,k] = size(mu); -if all(size(sigma)==d) && k==1 % one mu and one dxd sigma - X = bsxfun(@minus,X,mu); - [R,p]= chol(sigma); - if p ~= 0 - error('ERROR: sigma is not PD.'); - end - Q = R'\X; - q = dot(Q,Q,1); % quadratic term (M distance) - c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant - y = -0.5*(c+q); -elseif size(sigma,1)==1 && size(sigma,2)==size(mu,2) % k mu and (k or one) scalar sigma - X2 = repmat(dot(X,X,1)',1,k); - D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); - q = bsxfun(@times,D,1./sigma); % M distance - c = d*(log(2*pi)+2*log(sigma)); % normalization constant - y = -0.5*bsxfun(@plus,q,c); -end \ No newline at end of file +d = size(X,1); +X = X-mu; +[U,p]= chol(sigma); +if p ~= 0 + error('ERROR: sigma is not PD.'); +end +Q = U'\X; +q = dot(Q,Q,1); % quadratic term (M distance) +c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant +y = -(c+q)/2; From 7c97784fe48b055841ccc0d493e2337885ae97bb Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 18:20:53 +0800 Subject: [PATCH 030/119] tweak logKde --- chapter02/logKde.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter02/logKde.m b/chapter02/logKde.m index 1a869bd..98c6079 100644 --- a/chapter02/logKde.m +++ b/chapter02/logKde.m @@ -6,5 +6,5 @@ % Output: % z: probability density in logrithm scale z=log p(x|y) % Written by Mo Chen (sth4nth@gmail.com). -D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X)); +D = dot(X,X,1)+dot(Y,Y,1)'-2*(Y'*X); z = logsumexp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2),1); From 60f5c5ec2afbec8ff6be0bce5f9f5b53c0f1098a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 22:12:48 +0800 Subject: [PATCH 031/119] tweak gson --- chapter02/logMvGamma.m | 6 +++--- common/gson.m | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chapter02/logMvGamma.m b/chapter02/logMvGamma.m index fd27c5e..d1ee2b2 100644 --- a/chapter02/logMvGamma.m +++ b/chapter02/logMvGamma.m @@ -9,8 +9,8 @@ % Output: % y: m x n logarithm multivariate Gamma % Written by Michael Chen (sth4nth@gmail.com). -s = size(x); -x = reshape(x,1,prod(s)); +sz = size(x); +x = reshape(x,1,prod(sz)); x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2); y = d*(d-1)/4*log(pi)+sum(gammaln(x),1); -y = reshape(y,s); \ No newline at end of file +y = reshape(y,sz); \ No newline at end of file diff --git a/common/gson.m b/common/gson.m index 1fcdda4..66e4c46 100644 --- a/common/gson.m +++ b/common/gson.m @@ -4,10 +4,10 @@ [d,n] = size(X); m = min(d,n); R = zeros(m,n); -Q = zeros(d,m); +Q = zeros(d,0); for i = 1:m - R(1:i-1,i) = Q(:,1:i-1)'*X(:,i); - v = X(:,i)-Q(:,1:i-1)*R(1:i-1,i); + R(1:i-1,i) = Q'*X(:,i); + v = X(:,i)-Q*R(1:i-1,i); R(i,i) = norm(v); Q(:,i) = v/R(i,i); end From 3294bebfb0e6bd63480539805cef1d59d37315fa Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 22:26:49 +0800 Subject: [PATCH 032/119] remove gsog and mgsog --- common/gsog.m | 14 -------------- common/mgsog.m | 18 ------------------ 2 files changed, 32 deletions(-) delete mode 100644 common/gsog.m delete mode 100644 common/mgsog.m diff --git a/common/gsog.m b/common/gsog.m deleted file mode 100644 index 3a2b5b5..0000000 --- a/common/gsog.m +++ /dev/null @@ -1,14 +0,0 @@ -function [Q, R] = gsog(X) -% Gram-Schmidt orthogonalization -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -m = min(d,n); -R = eye(m,n); -Q = zeros(d,m); -D = zeros(1,m); -for i = 1:m - R(1:i-1,i) = bsxfun(@times,Q(:,1:i-1),1./D(1:i-1))'*X(:,i); - Q(:,i) = X(:,i)-Q(:,1:i-1)*R(1:i-1,i); - D(i) = dot(Q(:,i),Q(:,i)); -end -R(:,m+1:n) = bsxfun(@times,Q,1./D)'*X(:,m+1:n); \ No newline at end of file diff --git a/common/mgsog.m b/common/mgsog.m deleted file mode 100644 index 003ce87..0000000 --- a/common/mgsog.m +++ /dev/null @@ -1,18 +0,0 @@ -function [Q, R] = mgsog(X) -% Modified Gram-Schmidt orthogonalization -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -m = min(d,n); -R = eye(m,n); -Q = zeros(d,m); -D = zeros(1,m); -for i = 1:m - v = X(:,i); - for j = 1:i-1 - R(j,i) = Q(:,j)'*v/D(j); - v = v-R(j,i)*Q(:,j); - end - Q(:,i) = v; - D(i) = dot(Q(:,i),Q(:,i)); -end -R(:,m+1:n) = bsxfun(@times,Q,1./D)'*X(:,m+1:n); \ No newline at end of file From d389cfadf7e0ab8dfda82fdbb9d4e0c767bb1c9f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 22:48:37 +0800 Subject: [PATCH 033/119] tweak logMvGamma --- chapter02/logMvGamma.m | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/chapter02/logMvGamma.m b/chapter02/logMvGamma.m index d1ee2b2..d33ed47 100644 --- a/chapter02/logMvGamma.m +++ b/chapter02/logMvGamma.m @@ -1,4 +1,4 @@ -function y = logMvGamma(x,d) +function y = logMvGamma(x, d) % Compute logarithm multivariate Gamma function % which is used in the probability density function of the Wishart and inverse Wishart distributions. % Gamma_d(x) = pi^(d(d-1)/4) \prod_(j=1)^d Gamma(x+(1-j)/2) @@ -9,8 +9,5 @@ % Output: % y: m x n logarithm multivariate Gamma % Written by Michael Chen (sth4nth@gmail.com). -sz = size(x); -x = reshape(x,1,prod(sz)); -x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2); -y = d*(d-1)/4*log(pi)+sum(gammaln(x),1); -y = reshape(y,sz); \ No newline at end of file +y = d*(d-1)/4*log(pi)+sum(gammaln(x(:)+(1-(1:d))/2),2); +y = reshape(y,size(x)); \ No newline at end of file From a88a79dc2aad1f4bbdb6b35529d27a52adc419e7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 22:54:24 +0800 Subject: [PATCH 034/119] fix rvmBinPred --- chapter07/rvmBinPred.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter07/rvmBinPred.m b/chapter07/rvmBinPred.m index 2b00a0f..1bb97f5 100644 --- a/chapter07/rvmBinPred.m +++ b/chapter07/rvmBinPred.m @@ -11,5 +11,5 @@ X = [X;ones(1,size(X,2))]; X = X(index,:); w = model.w; -p = exp(-log1pexp(w'*X)); +p = sigmoid(w'*X); y = round(p); From b208345359ba16972361cceb46afb4101699b458 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 23:10:53 +0800 Subject: [PATCH 035/119] roll back logGauss.m --- chapter02/logGauss.m | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m index 912e1c2..edefd89 100644 --- a/chapter02/logGauss.m +++ b/chapter02/logGauss.m @@ -7,13 +7,21 @@ % Output: % y: 1 x n probability density in logrithm scale y=log p(x) % Written by Mo Chen (sth4nth@gmail.com). -d = size(X,1); -X = X-mu; -[U,p]= chol(sigma); -if p ~= 0 - error('ERROR: sigma is not PD.'); -end -Q = U'\X; -q = dot(Q,Q,1); % quadratic term (M distance) -c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant -y = -(c+q)/2; +[d,k] = size(mu); +if all(size(sigma)==d) && k==1 % one mu and one dxd sigma + X = bsxfun(@minus,X,mu); + [R,p]= chol(sigma); + if p ~= 0 + error('ERROR: sigma is not PD.'); + end + Q = R'\X; + q = dot(Q,Q,1); % quadratic term (M distance) + c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant + y = -0.5*(c+q); +elseif size(sigma,1)==1 && size(sigma,2)==size(mu,2) % k mu and (k or one) scalar sigma + X2 = repmat(dot(X,X,1)',1,k); + D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); + q = bsxfun(@times,D,1./sigma); % M distance + c = d*(log(2*pi)+2*log(sigma)); % normalization constant + y = -0.5*bsxfun(@plus,q,c); +end \ No newline at end of file From 38c3f9b6a35b3293a70487f7183c7a87f38b64b2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 11 Mar 2017 23:12:18 +0800 Subject: [PATCH 036/119] tweak knKmeansPred --- chapter06/knKmeansPred.m | 4 +-- demo/ch06/knLin_demo.m | 78 ++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m index b4ae474..30dc653 100644 --- a/chapter06/knKmeansPred.m +++ b/chapter06/knKmeansPred.m @@ -14,7 +14,7 @@ n = size(X,2); k = max(t); E = sparse(t,1:n,1,k,n,n); -E = bsxfun(@times,E,1./sum(E,2)); -Z = bsxfun(@minus,E*kn(X,Xt),diag(E*kn(X,X)*E')/2); +E = E./sum(E,2); +Z = E*kn(X,Xt)-dot(E*kn(X,X),E,2)/2; [val, label] = max(Z,[],1); energy = sum(kn(Xt))-2*sum(val); diff --git a/demo/ch06/knLin_demo.m b/demo/ch06/knLin_demo.m index 9259164..9ae12b3 100644 --- a/demo/ch06/knLin_demo.m +++ b/demo/ch06/knLin_demo.m @@ -1,23 +1,23 @@ -% %% Kernel regression with linear kernel is EQUIVALENT to linear regression -% clear; close all; -% n = 100; -% x = linspace(0,2*pi,n); % test data -% t = sin(x)+rand(1,n)/2; -% -% lambda = 1e-4; -% model_kn = knReg(x,t,lambda,@knLin); -% model_lin = linReg(x,t,lambda); -% -% idx = 1:2:n; -% xt = x(:,idx); -% tt = t(idx); -% -% [y_kn, sigma_kn,p_kn] = knRegPred(model_kn,xt,tt); -% [y_lin, sigma_lin,p_lin] = linRegPred(model_lin,xt,tt); -% -% maxdiff(y_kn,y_lin) -% maxdiff(sigma_kn,sigma_lin) -% maxdiff(p_kn,p_lin) +%% Kernel regression with linear kernel is EQUIVALENT to linear regression +clear; close all; +n = 100; +x = linspace(0,2*pi,n); % test data +t = sin(x)+rand(1,n)/2; + +lambda = 1e-4; +model_kn = knReg(x,t,lambda,@knLin); +model_lin = linReg(x,t,lambda); + +idx = 1:2:n; +xt = x(:,idx); +tt = t(idx); + +[y_kn, sigma_kn,p_kn] = knRegPred(model_kn,xt,tt); +[y_lin, sigma_lin,p_lin] = linRegPred(model_lin,xt,tt); + +maxdiff(y_kn,y_lin) +maxdiff(sigma_kn,sigma_lin) +maxdiff(p_kn,p_lin) %% Kernel kmeans with linear kernel is EQUIVALENT to kmeans clear; close all; d = 2; @@ -40,22 +40,22 @@ maxdiff(t_kn,t_lin) maxdiff(ent_kn,ent_lin) %% Kernel PCA with linear kernel is EQUIVALENT TO PCA -% clear; close all; -% d = 10; -% q = 2; -% n = 500; -% X = randn(d,n); -% -% -% model_kn = knPca(X,q,@knLin); -% idx = 1:2:n; -% Xt = X(:,idx); -% -% Y_kn = knPcaPred(model_kn,Xt); -% -% [U,L,mu,mse] = pca(X,q); -% Y_lin = U'*bsxfun(@minus,Xt,mu); % projection -% -% -% R = Y_lin/Y_kn; % the results are equivalent up to a rotation. -% maxdiff(R*R', eye(q)) +clear; close all; +d = 10; +q = 2; +n = 500; +X = randn(d,n); + + +model_kn = knPca(X,q,@knLin); +idx = 1:2:n; +Xt = X(:,idx); + +Y_kn = knPcaPred(model_kn,Xt); + +[U,L,mu,mse] = pca(X,q); +Y_lin = U'*bsxfun(@minus,Xt,mu); % projection + + +R = Y_lin/Y_kn; % the results are equivalent up to a rotation. +maxdiff(R*R', eye(q)) From 23dc01d6a34dbe3f3d2d25599183744a6f328b5f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 12 Mar 2017 00:04:56 +0800 Subject: [PATCH 037/119] fix linRegPred knRegPred --- chapter02/logGauss.m | 28 ++++++++++------------------ chapter03/linRegPred.m | 3 +-- chapter06/knRegPred.m | 2 +- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m index edefd89..912e1c2 100644 --- a/chapter02/logGauss.m +++ b/chapter02/logGauss.m @@ -7,21 +7,13 @@ % Output: % y: 1 x n probability density in logrithm scale y=log p(x) % Written by Mo Chen (sth4nth@gmail.com). -[d,k] = size(mu); -if all(size(sigma)==d) && k==1 % one mu and one dxd sigma - X = bsxfun(@minus,X,mu); - [R,p]= chol(sigma); - if p ~= 0 - error('ERROR: sigma is not PD.'); - end - Q = R'\X; - q = dot(Q,Q,1); % quadratic term (M distance) - c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant - y = -0.5*(c+q); -elseif size(sigma,1)==1 && size(sigma,2)==size(mu,2) % k mu and (k or one) scalar sigma - X2 = repmat(dot(X,X,1)',1,k); - D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); - q = bsxfun(@times,D,1./sigma); % M distance - c = d*(log(2*pi)+2*log(sigma)); % normalization constant - y = -0.5*bsxfun(@plus,q,c); -end \ No newline at end of file +d = size(X,1); +X = X-mu; +[U,p]= chol(sigma); +if p ~= 0 + error('ERROR: sigma is not PD.'); +end +Q = U'\X; +q = dot(Q,Q,1); % quadratic term (M distance) +c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant +y = -(c+q)/2; diff --git a/chapter03/linRegPred.m b/chapter03/linRegPred.m index 0926ff0..9ddf650 100644 --- a/chapter03/linRegPred.m +++ b/chapter03/linRegPred.m @@ -26,7 +26,6 @@ end if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); + p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); end diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 8e89ad9..6910657 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -25,5 +25,5 @@ end if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); + p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); end \ No newline at end of file From 81c2796be32f88405e9bf1f8c9b6913d450d25c3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 12 Mar 2017 00:10:11 +0800 Subject: [PATCH 038/119] fix rvmRegPred --- chapter07/rvmRegPred.m | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/chapter07/rvmRegPred.m b/chapter07/rvmRegPred.m index 24ab3e8..8c80bd0 100644 --- a/chapter07/rvmRegPred.m +++ b/chapter07/rvmRegPred.m @@ -25,6 +25,5 @@ end if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); + p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); end From 3f04958673dc596597a5eaa3627c0590ead86986 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 12 Mar 2017 00:45:40 +0800 Subject: [PATCH 039/119] Update README.md --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a8c7b35..a272327 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,11 @@ Description ------- The design goal of the code are as follows: -1. Clean: Code is very succinct. There are little nasty guarding code that distracts readers' attention. As a result, the core of the algorithms can be easily spot. -2. Efficient: Many tricks for making Matlab scripts efficient were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementation. Usually, functions in this package are orders faster than Matlab builtin functions which provide the same functionality (eg. kmeans). If anyone found any Matlab implementation that is faster than mine, I am happy to further optimize. +1. Succinct: Code is extremely terse. Minimizing the number of line of code is one of the primal target. As a result, the core of the algorithms can be easily spot. +2. Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementation. Usually, functions in this package are orders faster than Matlab builtin functions which provide the same functionality (eg. kmeans). If anyone found any Matlab implementation that is faster than mine, I am happy to further optimize. 3. Robust: Many numerical stability techniques are applied, such as probability computation in log scale to avoid numerical underflow and overflow, square root form update of symmetric matrix, etc. -4. Easy to learn: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. -5. Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). - +4. Easy to learn: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. +5. Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). Installation ------- @@ -22,7 +21,11 @@ Installation 2. Run Matlab and navigate to package location as working directory, then run the init.m script. -3. Run some demos in the demo directory. Enjoy! +3. Run some demos in the your_location/demo directory. Enjoy! + +FeedBack +------- +If you found any bugs or have any suggestion, please do fire issues. I am graceful for any feedback and do my best to improve this package. License ------- From 444f001c9fea6307513b22974dd3e9b7c0944104 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 12 Mar 2017 00:50:57 +0800 Subject: [PATCH 040/119] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a272327..5f53e50 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Introduction This package is a Matlab implementation of the algorithms described in the classical machine learning textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -Note: this package requires Matlab R2016b or later, since it utilizes a new syntax of Matlab. +Note: this package requires Matlab R2016b or later, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). Description ------- @@ -25,7 +25,7 @@ Installation FeedBack ------- -If you found any bugs or have any suggestion, please do fire issues. I am graceful for any feedback and do my best to improve this package. +If you found any bug or have any suggestion, please do fire issues. I am graceful for any feedback and will do my best to improve this package. License ------- From e31258d89e81a875acc85cb228ab76ee72bba690 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 13 Mar 2017 01:40:02 +0800 Subject: [PATCH 041/119] tweak kmeans to the extreme --- chapter09/kmeans.m | 9 ++++----- chapter09/kmeansPred.m | 4 ++-- chapter09/litekmeans.m | 17 +++++++++++++++++ common/normalize.m | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 chapter09/litekmeans.m diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 29a2e6b..6e9caa1 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -1,12 +1,12 @@ -function [label, m, energy] = kmeans(X, init) +function [label, mu, energy] = kmeans(X, init) % Perform kmeans clustering. % Input: % X: d x n data matrix % init: k number of clusters or label (1 x n vector) % Output: % label: 1 x n cluster label +% mu: d x k center of clusters % energy: optimization target value -% model: trained model structure % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); idx = 1:n; @@ -19,8 +19,7 @@ end while any(label ~= last) [~,~,last(:)] = unique(label); % remove empty clusters - E = sparse(idx,last,1); % transform label into indicator matrix - m = X*(E./sum(E,1)); % compute centers - [val,label] = min(dot(m,m,1)'/2-m'*X,[],1); % assign labels + mu = X*normalize(sparse(idx,last,1),1); % compute centers + [val,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); % assign labels end energy = dot(X(:),X(:),1)+2*sum(val); \ No newline at end of file diff --git a/chapter09/kmeansPred.m b/chapter09/kmeansPred.m index 83dc633..7ed1278 100644 --- a/chapter09/kmeansPred.m +++ b/chapter09/kmeansPred.m @@ -1,4 +1,4 @@ -function [label, energy] = kmeansPred(m, X) +function [label, energy] = kmeansPred(mu, X) % Prediction for kmeans clusterng % Input: % model: dx k cluster center matrix @@ -7,5 +7,5 @@ % label: 1 x n cluster label % energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). -[val,label] = min(dot(X,X,1)+dot(m,m,1)'-2*m'*X,[],1); % assign labels +[val,label] = min(dot(X,X,1)+dot(mu,mu,1)'-2*mu'*X,[],1); % assign labels energy = sum(val); \ No newline at end of file diff --git a/chapter09/litekmeans.m b/chapter09/litekmeans.m new file mode 100644 index 0000000..9bf7b37 --- /dev/null +++ b/chapter09/litekmeans.m @@ -0,0 +1,17 @@ +function [label, mu] = litekmeans(X, k) +n = size(X,2); +last = zeros(1,n); +label = ceil(k*rand(1,n)); +while any(label ~= last) + [~,~,last(:)] = unique(label); % remove empty clusters + mu = X*normalize(sparse(1:n,last,1),1); % compute cluster centers + [~,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); % assign sample labels +end +% Perform kmeans clustering. +% Input: +% X: d x n data matrix +% k: number of clusters +% Output: +% label: 1 x n cluster label +% mu: d x k center of clusters +% Written by Mo Chen (sth4nth@gmail.com). \ No newline at end of file diff --git a/common/normalize.m b/common/normalize.m index c7ae7a1..9bca004 100644 --- a/common/normalize.m +++ b/common/normalize.m @@ -8,4 +8,4 @@ if isempty(dim), dim = 1; end end s = sum(X,dim); -Y = bsxfun(@times,X,1./s); \ No newline at end of file +Y = X./s; \ No newline at end of file From 3a9055dfbdb5e3ee78c4caea94da0deeacfcf041 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 13 Mar 2017 01:57:54 +0800 Subject: [PATCH 042/119] tweak kmeans to the extreme --- chapter09/litekmeans.m | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/chapter09/litekmeans.m b/chapter09/litekmeans.m index 9bf7b37..03afa99 100644 --- a/chapter09/litekmeans.m +++ b/chapter09/litekmeans.m @@ -1,17 +1,16 @@ -function [label, mu] = litekmeans(X, k) -n = size(X,2); -last = zeros(1,n); -label = ceil(k*rand(1,n)); +function [label, mu] = litekmeans(X, label) +idx = 1:size(X,2); +last = idx; while any(label ~= last) [~,~,last(:)] = unique(label); % remove empty clusters - mu = X*normalize(sparse(1:n,last,1),1); % compute cluster centers + mu = X*normalize(sparse(idx,last,1),1); % compute cluster centers [~,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); % assign sample labels end % Perform kmeans clustering. % Input: % X: d x n data matrix -% k: number of clusters +% label: initial sample labels % Output: -% label: 1 x n cluster label +% label: 1 x n sample label % mu: d x k center of clusters % Written by Mo Chen (sth4nth@gmail.com). \ No newline at end of file From ef94fc35ca6d18e35685a9d5e9b227e25da9414f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 13 Mar 2017 18:55:57 +0800 Subject: [PATCH 043/119] doc update --- chapter06/knKmeans.m | 4 ++-- chapter09/kmeans.m | 2 +- chapter09/kmedoids.m | 2 +- chapter09/litekmeans.m | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 80b6d5a..49c6c15 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -4,9 +4,9 @@ % K: n x n kernel matrix % init: either number of clusters (k) or initial label (1xn) % Output: -% label: 1 x n clustering result label -% energy: optimization target value +% label: 1 x n sample labels % model: trained model structure +% energy: optimization target value % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 6e9caa1..eb154b3 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -4,7 +4,7 @@ % X: d x n data matrix % init: k number of clusters or label (1 x n vector) % Output: -% label: 1 x n cluster label +% label: 1 x n sample labels % mu: d x k center of clusters % energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter09/kmedoids.m b/chapter09/kmedoids.m index 7499905..ff94a60 100644 --- a/chapter09/kmedoids.m +++ b/chapter09/kmedoids.m @@ -4,7 +4,7 @@ % X: d x n data matrix % init: k number of clusters or label (1 x n vector) % Output: -% label: 1 x n cluster label +% label: 1 x n sample labels % index: index of medoids % energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter09/litekmeans.m b/chapter09/litekmeans.m index 03afa99..8d68434 100644 --- a/chapter09/litekmeans.m +++ b/chapter09/litekmeans.m @@ -11,6 +11,6 @@ % X: d x n data matrix % label: initial sample labels % Output: -% label: 1 x n sample label +% label: 1 x n sample labels % mu: d x k center of clusters % Written by Mo Chen (sth4nth@gmail.com). \ No newline at end of file From 3cc9d1396568679979794949c1970010ef08cd6a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 13 Mar 2017 18:56:29 +0800 Subject: [PATCH 044/119] kmeanspp tbd --- chapter09/kmeanspp.m | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 chapter09/kmeanspp.m diff --git a/chapter09/kmeanspp.m b/chapter09/kmeanspp.m new file mode 100644 index 0000000..c590175 --- /dev/null +++ b/chapter09/kmeanspp.m @@ -0,0 +1,45 @@ +function [label, mu, energy] = kmeanspp(X, k) +% Perform kmeans clustering. +% Input: +% X: d x n data matrix +% k: number of clusters +% Output: +% label: 1 x n sample labels +% mu: d x k center of clusters +% energy: optimization target value +% Written by Mo Chen (sth4nth@gmail.com). +[label, mu, energy] = kmeans(X, kseeds(X,k)); + +% TBD: label and energy +function [label, mu, energy] = kseeds(X, k) +% kmeans++ seeding +[d,n] = size(X); +v = inf(1,n); +mu = zeros(d,k); +mu(:,1) = X(:,ceil(n*rand)); +label = zeros(1,n); +for i = 2:k + X0 = X-mu(:,i-1); + [v,label] = min(v,dot(X0,X0,1)); + mu(:,i) = X(:,randp(v)); +end +energy = sum(v); + +% Done +function idx = randp(p) +% sample one of k by probability +p = cumsum(p); +p = p/p(end); +idx = find(rand Date: Mon, 13 Mar 2017 22:47:08 +0800 Subject: [PATCH 045/119] reorgnize. I'm done with kmeans --- chapter09/kmeans.m | 32 +++++++++++++++++------------ chapter09/kmeanspp.m | 45 ----------------------------------------- chapter09/kseeds.m | 17 ++++++++++++++++ chapter09/litekmeans.m | 16 --------------- common/randp.m | 3 +++ demo/ch09/kmeans_demo.m | 22 +++++++++++++++++++- 6 files changed, 60 insertions(+), 75 deletions(-) delete mode 100644 chapter09/kmeanspp.m create mode 100644 chapter09/kseeds.m delete mode 100644 chapter09/litekmeans.m create mode 100644 common/randp.m diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index eb154b3..74fd82c 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -1,25 +1,31 @@ -function [label, mu, energy] = kmeans(X, init) +function [label, mu, energy] = kmeans(X, m) % Perform kmeans clustering. % Input: % X: d x n data matrix -% init: k number of clusters or label (1 x n vector) +% m: initialization parameter % Output: % label: 1 x n sample labels % mu: d x k center of clusters % energy: optimization target value % Written by Mo Chen (sth4nth@gmail.com). -n = size(X,2); +label = init(X, m); +n = numel(label); idx = 1:n; last = zeros(1,n); -if numel(init)==1 - k = init; - label = ceil(k*rand(1,n)); -elseif numel(init)==n - label = init; -end while any(label ~= last) - [~,~,last(:)] = unique(label); % remove empty clusters - mu = X*normalize(sparse(idx,last,1),1); % compute centers - [val,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); % assign labels + [~,~,last(:)] = unique(label); % remove empty clusters + mu = X*normalize(sparse(idx,last,1),1); % compute cluster centers + [val,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); % assign sample labels end -energy = dot(X(:),X(:),1)+2*sum(val); \ No newline at end of file +energy = dot(X(:),X(:),1)+2*sum(val); + +function label = init(X, m) +[d,n] = size(X); +if numel(m) == 1 % random initialization + mu = X(:,randperm(n,m)); + [~,label] = min(dot(mu,mu,1)'/2-mu'*X,[],1); +elseif all(size(m) == [1,n]) % init with labels + label = m; +elseif size(m,1) == d % init with seeds (centers) + [~,label] = min(dot(m,m,1)'/2-m'*X,[],1); +end \ No newline at end of file diff --git a/chapter09/kmeanspp.m b/chapter09/kmeanspp.m deleted file mode 100644 index c590175..0000000 --- a/chapter09/kmeanspp.m +++ /dev/null @@ -1,45 +0,0 @@ -function [label, mu, energy] = kmeanspp(X, k) -% Perform kmeans clustering. -% Input: -% X: d x n data matrix -% k: number of clusters -% Output: -% label: 1 x n sample labels -% mu: d x k center of clusters -% energy: optimization target value -% Written by Mo Chen (sth4nth@gmail.com). -[label, mu, energy] = kmeans(X, kseeds(X,k)); - -% TBD: label and energy -function [label, mu, energy] = kseeds(X, k) -% kmeans++ seeding -[d,n] = size(X); -v = inf(1,n); -mu = zeros(d,k); -mu(:,1) = X(:,ceil(n*rand)); -label = zeros(1,n); -for i = 2:k - X0 = X-mu(:,i-1); - [v,label] = min(v,dot(X0,X0,1)); - mu(:,i) = X(:,randp(v)); -end -energy = sum(v); - -% Done -function idx = randp(p) -% sample one of k by probability -p = cumsum(p); -p = p/p(end); -idx = find(rand Date: Mon, 13 Mar 2017 23:22:54 +0800 Subject: [PATCH 046/119] fix doc --- chapter09/kseeds.m | 2 -- 1 file changed, 2 deletions(-) diff --git a/chapter09/kseeds.m b/chapter09/kseeds.m index 1f108bc..ad37a4f 100644 --- a/chapter09/kseeds.m +++ b/chapter09/kseeds.m @@ -4,9 +4,7 @@ % X: d x n data matrix % k: number of seeds % Output: -% label: 1 x n sample labels % mu: d x k seeds -% energy: kmeans target value % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); D = inf(1,n); From 2a9d1365caf615c4973e2fe839ba6a6899faf75a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 25 Mar 2017 03:14:53 +0800 Subject: [PATCH 047/119] add back the naive method of model evidence --- chapter10/mixGaussEvidence.m | 68 ++++++++++++++++++++++++++++++++++++ chapter10/mixGaussVb.m | 2 +- demo/ch10/mixGaussVb_demo.m | 8 +++++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 chapter10/mixGaussEvidence.m diff --git a/chapter10/mixGaussEvidence.m b/chapter10/mixGaussEvidence.m new file mode 100644 index 0000000..4a53599 --- /dev/null +++ b/chapter10/mixGaussEvidence.m @@ -0,0 +1,68 @@ +function L = mixGaussEvidence(X, model, prior) +% Variational lower bound of the model evidence (log of marginal) +% This the method by the book. It is equivalent to the bound inside mixGaussVb. +% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) +% Written by Mo Chen (sth4nth@gmail.com). +alpha0 = prior.alpha; +kappa0 = prior.kappa; +m0 = prior.m; +v0 = prior.v; +M0 = prior.M; + +alpha = model.alpha; % Dirichlet +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +v = model.v; % Whishart +% M = model.M; % Whishart: inv(W) = V'*V +U = model.U; +R = model.R; +logR = model.logR; + +[d,k] = size(m); +nk = sum(R,1); % 10.51 + +Elogpi = psi(0,alpha)-psi(0,sum(alpha)); +Epz = dot(nk,Elogpi); +Eqz = dot(R(:),logR(:)); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Eppi = logCalpha0+(alpha0-1)*sum(Elogpi); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqpi = dot(alpha-1,Elogpi)+logCalpha; + +U0 = chol(M0); +sqrtR = sqrt(R); +xbar = bsxfun(@times,X*R,1./nk); % 10.52 + +logW = zeros(1,k); +trSW = zeros(1,k); +trM0W = zeros(1,k); +xbarmWxbarm = zeros(1,k); +mm0Wmm0 = zeros(1,k); +for i = 1:k + Ui = U(:,:,i); + logW(i) = -2*sum(log(diag(Ui))); + + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + V = chol(Xs*Xs'/nk(i)); + Q = V/Ui; + trSW(i) = dot(Q(:),Q(:)); % equivalent to tr(SW)=trace(S/M) + Q = U0/Ui; + trM0W(i) = dot(Q(:),Q(:)); + + q = Ui'\(xbar(:,i)-m(:,i)); + xbarmWxbarm(i) = dot(q,q); + q = Ui'\(m(:,i)-m0); + mm0Wmm0(i) = dot(q,q); +end +ElogLambda = sum(psi(0,bsxfun(@minus,v+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 +Epmu = sum(d*log(kappa0/(2*pi))+ElogLambda-d*kappa0./kappa-kappa0*(v.*mm0Wmm0))/2; +logB0 = v0*sum(log(diag(U0)))-0.5*v0*d*log(2)-logMvGamma(0.5*v0,d); +EpLambda = k*logB0+0.5*(v0-d-1)*sum(ElogLambda)-0.5*dot(v,trM0W); + +Eqmu = 0.5*sum(ElogLambda+d*log(kappa/(2*pi)))-0.5*d*k; +logB = -v.*(logW+d*log(2))/2-logMvGamma(0.5*v,d); +EqLambda = 0.5*sum((v-d-1).*ElogLambda-v*d)+sum(logB); + +EpX = 0.5*dot(nk,ElogLambda-d./kappa-v.*trSW-v.*xbarmWxbarm-d*log(2*pi)); + +L = Epz-Eqz+Eppi-Eqpi+Epmu-Eqmu+EpLambda-EqLambda+EpX; \ No newline at end of file diff --git a/chapter10/mixGaussVb.m b/chapter10/mixGaussVb.m index b784c8f..1daf32a 100644 --- a/chapter10/mixGaussVb.m +++ b/chapter10/mixGaussVb.m @@ -27,7 +27,7 @@ for iter = 2:maxiter model = expect(X,model); model = maximize(X,model,prior); - L(iter) = bound(X,model,prior)/n; + L(iter) = bound(X,model,prior); if abs(L(iter)-L(iter-1)) < tol*abs(L(iter)); break; end end L = L(2:iter); diff --git a/demo/ch10/mixGaussVb_demo.m b/demo/ch10/mixGaussVb_demo.m index a91b6f2..ee336d5 100755 --- a/demo/ch10/mixGaussVb_demo.m +++ b/demo/ch10/mixGaussVb_demo.m @@ -15,6 +15,14 @@ plotClass(X1,y1); figure; plot(L) +% Model Evidence +prior.alpha = 1; +prior.kappa = 1; +prior.m = mean(X1,2); +prior.v = d+1; +prior.M = eye(d); % M = inv(W) +L0 = mixGaussEvidence(X1, model, prior); +L0-L(end) % Predict testing data [y2, R] = mixGaussVbPred(model,X2); figure; From 8ffe5dc5d8575f670e0f27df0d7be04bd452f90a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 26 Mar 2017 03:49:44 +0800 Subject: [PATCH 048/119] fix comment --- chapter10/mixGaussEvidence.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter10/mixGaussEvidence.m b/chapter10/mixGaussEvidence.m index 4a53599..046dd56 100644 --- a/chapter10/mixGaussEvidence.m +++ b/chapter10/mixGaussEvidence.m @@ -1,6 +1,6 @@ function L = mixGaussEvidence(X, model, prior) -% Variational lower bound of the model evidence (log of marginal) -% This the method by the book. It is equivalent to the bound inside mixGaussVb. +% Variational lower bound of the model evidence (log of marginal likelihood) +% This function implements the method in the book PRML. It is equivalent to the bound inside mixGaussVb function. % Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) % Written by Mo Chen (sth4nth@gmail.com). alpha0 = prior.alpha; From 330fe228a0d9e1706d7f6dab3782a178b8d2d20f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 27 Mar 2017 02:31:04 +0800 Subject: [PATCH 049/119] add lognormexp --- common/lognormexp.m | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 common/lognormexp.m diff --git a/common/lognormexp.m b/common/lognormexp.m new file mode 100644 index 0000000..10d9f95 --- /dev/null +++ b/common/lognormexp.m @@ -0,0 +1,10 @@ +function [Y,s] = lognormexp(X, dim) +% Compute log(normalize(exp(x),dim)) while avoiding numerical underflow. +% By default dim = 1 (columns). +% Written by Mo Chen (sth4nth@gmail.com). +if nargin == 1 + dim = find(size(X)~=1,1); + if isempty(dim), dim = 1; end +end +s = logsumexp(X,dim); +Y = exp(X-s); From 88a9ee42f94d02bcc339a0aa79e752da0ebdba38 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 29 Mar 2017 05:36:19 +0800 Subject: [PATCH 050/119] add common functions --- common/slice.m | 20 ++++++++++++++++++++ common/sub.m | 16 ++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 common/slice.m create mode 100644 common/sub.m diff --git a/common/slice.m b/common/slice.m new file mode 100644 index 0000000..bb1e57e --- /dev/null +++ b/common/slice.m @@ -0,0 +1,20 @@ +function B = slice(A, dim, index) +% slice(A,2,index) = A(:,index,:) +sz = size(A); +sz(dim) = numel(index); +IDX = cell(1,ndims(A)); +for i = 1:ndims(A) + if i == dim + idx = index; + else + idx = 1:sz(i); + end + shape = ones(1,ndims(A)); + shape(i) = sz(i); + idx = reshape(idx,shape); + shape = sz; + shape(i) = 1; + idx = repmat(idx,shape); + IDX{i} = idx(:); +end +B = reshape(A(sub2ind(size(A),IDX{:})),sz); \ No newline at end of file diff --git a/common/sub.m b/common/sub.m new file mode 100644 index 0000000..6a800f1 --- /dev/null +++ b/common/sub.m @@ -0,0 +1,16 @@ +function B = sub(A, varargin) +% submat(A,i,j,k) = A(i;j;k) +assert(ndims(A)==numel(varargin)); +sz = cellfun(@numel,varargin); +IDX = cell(1,ndims(A)); +for i = 1:ndims(A) + idx = varargin{i}; + shape = ones(1,ndims(A)); + shape(i) = sz(i); + idx = reshape(idx,shape); + shape = sz; + shape(i) = 1; + idx = repmat(idx,shape); + IDX{i} = idx(:); +end +B = reshape(A(sub2ind(size(A),IDX{:})),sz); \ No newline at end of file From fa8d013f9086aaf1e327dbfd669c579aafc85707 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 29 Mar 2017 05:38:01 +0800 Subject: [PATCH 051/119] fix doc --- common/slice.m | 1 + common/sub.m | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/common/slice.m b/common/slice.m index bb1e57e..56be588 100644 --- a/common/slice.m +++ b/common/slice.m @@ -1,5 +1,6 @@ function B = slice(A, dim, index) % slice(A,2,index) = A(:,index,:) +% Written by Mo Chen (sth4nth@gmail.com). sz = size(A); sz(dim) = numel(index); IDX = cell(1,ndims(A)); diff --git a/common/sub.m b/common/sub.m index 6a800f1..8d7de28 100644 --- a/common/sub.m +++ b/common/sub.m @@ -1,5 +1,6 @@ function B = sub(A, varargin) -% submat(A,i,j,k) = A(i;j;k) +% sub(A,i,j,k) = A(i;j;k) +% Written by Mo Chen (sth4nth@gmail.com). assert(ndims(A)==numel(varargin)); sz = cellfun(@numel,varargin); IDX = cell(1,ndims(A)); From 6de925744d2a36421794a0b24a50a3bc052d0e07 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 29 Mar 2017 22:36:32 -0700 Subject: [PATCH 052/119] add lattice --- common/lattice.m | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 common/lattice.m diff --git a/common/lattice.m b/common/lattice.m new file mode 100644 index 0000000..0f2f515 --- /dev/null +++ b/common/lattice.m @@ -0,0 +1,17 @@ +function A = lattice( sz ) +% Create an undirected graph corresponding to sz lattice +% Example: +% plot(graph(lattice([2,2,3]))) +% Input: +% sz: 1 x d size of lattice +% Output: +% A: prod(sz) x prod(sz) adjacent matrix of an undirected graph +% Written by Mo Chen (sth4nth@gmail.com) +d = numel(sz); +step = cumprod(sz); +n = step(end); +M = reshape(1:n,sz); +S = arrayfun(@(i) reshape(slice(M,i,1:sz(i)-1),1,[]), 1:d,'UniformOutput',false); +T = arrayfun(@(i) reshape(slice(M,i,2:sz(i)),1,[]), 1:d,'UniformOutput',false); +A = sparse([S{:}],[T{:}],1,n,n); +A = A+A'; \ No newline at end of file From d1b3fe21e87cbcadff6f265ee109060528a08a09 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 2 Apr 2017 04:05:46 +0800 Subject: [PATCH 053/119] fix lognormexp --- common/lognormexp.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lognormexp.m b/common/lognormexp.m index 10d9f95..8db9c78 100644 --- a/common/lognormexp.m +++ b/common/lognormexp.m @@ -7,4 +7,4 @@ if isempty(dim), dim = 1; end end s = logsumexp(X,dim); -Y = exp(X-s); +Y = X-s; From 3aedcb4d0d9a4eae460ab002a3d4a18ddff29f94 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 12:53:51 +0800 Subject: [PATCH 054/119] add MRF mean field --- chapter08/betheEnergy.m | 11 ++++++ chapter08/demo.m | 76 ++++++++++++++++++++++++++++++++++++++++ chapter08/gibbsEnergy.m | 9 +++++ chapter08/im2mrf.m | 20 +++++++++++ chapter08/letterX.mat | Bin 0 -> 273 bytes chapter08/meanField.m | 38 ++++++++++++++++++++ 6 files changed, 154 insertions(+) create mode 100644 chapter08/betheEnergy.m create mode 100644 chapter08/demo.m create mode 100644 chapter08/gibbsEnergy.m create mode 100644 chapter08/im2mrf.m create mode 100644 chapter08/letterX.mat create mode 100644 chapter08/meanField.m diff --git a/chapter08/betheEnergy.m b/chapter08/betheEnergy.m new file mode 100644 index 0000000..d663e8b --- /dev/null +++ b/chapter08/betheEnergy.m @@ -0,0 +1,11 @@ +function lnZ = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel) +% Compute Bethe free energy +% TBD: deal with log(0) for entropy +edgePot = reshape(edgePot,[],size(edgePot,3)); +edgeBel = reshape(edgeBel,[],size(edgeBel,3)); +Ex = dot(nodeBel,nodePot,1); +Exy = dot(edgeBel,edgePot,1); +Hx = -dot(nodeBel,log(nodeBel),1); +Hxy = -dot(edgeBel,log(edgeBel),1); +d = full(sum(logical(A),1)); +lnZ = -sum(Ex)-sum(Exy)-sum((d-1).*Hx)+sum(Hxy); diff --git a/chapter08/demo.m b/chapter08/demo.m new file mode 100644 index 0000000..8005d9a --- /dev/null +++ b/chapter08/demo.m @@ -0,0 +1,76 @@ +clear; close all; +% load letterA.mat; +% X = A; +load letterX.mat +%% Original image +epoch = 50; +J = 1; % ising parameter +sigma = 1; % noise level + +img = double(X); +img = sign(img-mean(img(:))); + +figure; +subplot(2,3,1); +imagesc(img); +title('Original image'); +axis image; +colormap gray; +%% Noisy image +y = img + sigma*randn(size(img)); % noisy signal +subplot(2,3,2); +imagesc(y); +title('Noisy image'); +axis image; +colormap gray; +%% Mean Field +[A, nodePot, edgePot] = im2mrf(y, sigma, J); +[nodeBel, edgeBel, lnZ] = meanField(A, nodePot, edgePot, epoch); +lnZ0 = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel); +lnZ1 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); +maxdiff(lnZ0, lnZ(end)) +maxdiff(lnZ0, lnZ1) + +subplot(2,3,3); +imagesc(reshape(nodeBel(1,:),size(img))); +title('MF'); +axis image; +colormap gray; +%% Belief Propagation +% [nodeBel,edgeBel] = belProp(A, nodePot, edgePot, epoch); +% +% [nodeBel0,edgeBel0] = belProp0(A, nodePot, edgePot, epoch); +% maxdiff(nodeBel,nodeBel0) +% maxdiff(edgeBel,edgeBel0) +% +% subplot(2,3,4); +% imagesc(reshape(nodeBel(1,:),size(img))); +% title('BP'); +% axis image; +% colormap gray; +% %% Expectation Propagation +% [nodeBel,edgeBel] = expProp(A, nodePot, edgePot, epoch); +% +% lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); +% +% [nodeBel0,edgeBel0] = expProp0(A, nodePot, edgePot, epoch); +% maxdiff(nodeBel,nodeBel0) +% maxdiff(edgeBel,edgeBel0) +% +% subplot(2,3,5); +% imagesc(reshape(nodeBel(1,:),size(img))); +% title('EP'); +% axis image; +% colormap gray; +% %% EP-BP +% [nodeBel,edgeBel] = expBelProp(A, nodePot, edgePot, epoch); +% +% [nodeBel0,edgeBel0] = expBelProp0(A, nodePot, edgePot, epoch); +% maxdiff(nodeBel,nodeBel0) +% maxdiff(edgeBel,edgeBel0) +% +% subplot(2,3,6); +% imagesc(reshape(nodeBel(1,:),size(img))); +% title('EBP'); +% axis image; +% colormap gray; diff --git a/chapter08/gibbsEnergy.m b/chapter08/gibbsEnergy.m new file mode 100644 index 0000000..b4c0aec --- /dev/null +++ b/chapter08/gibbsEnergy.m @@ -0,0 +1,9 @@ +function lnZ = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel) +% Compute Gibbs free energy +% TBD: deal with log(0) for entropy +edgePot = reshape(edgePot,[],size(edgePot,3)); +edgeBel = reshape(edgeBel,[],size(edgeBel,3)); +Ex = dot(nodeBel,nodePot,1); +Exy = dot(edgeBel,edgePot,1); +Hx = dot(nodeBel,log(nodeBel),1); +lnZ = -(sum(Ex)+sum(Exy)+sum(Hx)); \ No newline at end of file diff --git a/chapter08/im2mrf.m b/chapter08/im2mrf.m new file mode 100644 index 0000000..b960381 --- /dev/null +++ b/chapter08/im2mrf.m @@ -0,0 +1,20 @@ +function [A, nodePot, edgePot] = im2mrf(im, sigma, J) +% Convert a image to Ising MRF with distribution p(x)=exp(-sum(nodePot)-sum(edgePot)-lnZ) +% Input: +% im: row x col image +% sigma: variance of Gaussian node potential +% J: parameter of Ising edge +% Output: +% nodePot: 2 x n node potential +% edgePot: 2 x 2 x m edge potential + +A = lattice(size(im)); +[s,t,e] = find(tril(A)); +nEdge = numel(e); +e(:) = 1:nEdge; +A = sparse([s;t],[t;s],[e;e]); + +z = [1;-1]; +y = reshape(im,1,[]); +nodePot = (y-z).^2/(2*sigma^2); +edgePot = repmat(-J*(z*z'),[1, 1, nEdge]); \ No newline at end of file diff --git a/chapter08/letterX.mat b/chapter08/letterX.mat new file mode 100644 index 0000000000000000000000000000000000000000..eab4464282f232af265fc013705b0b4b1a2cf802 GIT binary patch literal 273 zcmeZu4DoSvQZUssQ1EpO(M`+DN!3vZ$Vn_o%P-2cQV4Jk_w>_Ia4t$sEJ;mK$j`G< z2q{ff@J}vLFfvduGO{u;w=yvTvJEU1NCpgyp1%AH3=Ew>Trp?wJ{O_p~^mMuI18&nL5BIIUCQ`=moM+7x^OiTs5C0xKeX%%jo{h?WX~7ls&oj3K zDORp?kl=Mj=TEAaC2 e%=zp$zpO88pL=cnpVN8wb_pGR#HMhVg%to3Ltq8~ literal 0 HcmV?d00001 diff --git a/chapter08/meanField.m b/chapter08/meanField.m new file mode 100644 index 0000000..2176f53 --- /dev/null +++ b/chapter08/meanField.m @@ -0,0 +1,38 @@ +function [nodeBel, edgeBel, lnZ] = meanField(A, nodePot, edgePot, epoch) +% Mean field for MRF +% Assuming egdePot is symmetric +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound +% Written by Mo Chen (sth4nth@gmail.com) +tol = 0; +if nargin < 4 + epoch = 10; + tol = 1e-4; +end +lnZ = -inf(1,epoch+1); +[nodeBel,L] = softmax(-nodePot,1); % init nodeBel +for iter = 1:epoch + for i = 1:numel(L) + [~,j,e] = find(A(i,:)); % neighbors + np = nodePot(:,i); + [lnp ,lnz] = lognormexp(-np-reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); + p = exp(lnp); + L(i) = -dot(p,lnp+np)+lnz; % + nodeBel(:,i) = p; + end + lnZ(iter+1) = sum(L)/2; + if abs(lnZ(iter+1)-lnZ(iter))/abs(lnZ(iter)) < tol; break; end +end +lnZ = lnZ(2:iter); + +[s,t,e] = find(tril(A)); +edgeBel = zeros(size(edgePot)); +for l = 1:numel(e) + edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; +end \ No newline at end of file From 8ddf99a44058642de4284838cd75fef488fca1c3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 13:21:10 +0800 Subject: [PATCH 055/119] add discrete MRF BP and EP --- chapter08/belProp.m | 63 +++++++++++++++++++++++++++++++++++++ chapter08/belProp0.m | 63 +++++++++++++++++++++++++++++++++++++ chapter08/expProp.m | 59 ++++++++++++++++++++++++++++++++++ chapter08/expProp0.m | 60 +++++++++++++++++++++++++++++++++++ chapter08/imageMeanField.m | 18 +++++++++++ chapter08/isingMeanField.m | 18 +++++++++++ chapter08/isingMeanField0.m | 18 +++++++++++ 7 files changed, 299 insertions(+) create mode 100644 chapter08/belProp.m create mode 100644 chapter08/belProp0.m create mode 100644 chapter08/expProp.m create mode 100644 chapter08/expProp0.m create mode 100644 chapter08/imageMeanField.m create mode 100644 chapter08/isingMeanField.m create mode 100644 chapter08/isingMeanField0.m diff --git a/chapter08/belProp.m b/chapter08/belProp.m new file mode 100644 index 0000000..c9a73da --- /dev/null +++ b/chapter08/belProp.m @@ -0,0 +1,63 @@ +function [nodeBel, edgeBel] = belProp(A, nodePot, edgePot, epoch) +% Belief propagation for MRF +% Assuming egdePot is symmetric +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound (Bethe energy) +% Written by Mo Chen (sth4nth@gmail.com) +nodePot = exp(-nodePot); +edgePot = exp(-edgePot); + +tol = 0; +if nargin < 4 + epoch = 10; + tol = 1e-4; +end +[k,n] = size(nodePot); +m = size(edgePot,3); + +[s,t,e] = find(tril(A)); +A = sparse([s;t],[t;s],[e;e+m]); % digraph adjacent matrix, where value is message index +mu = ones(k,2*m)/k; % message +for iter = 1:epoch + mu0 = mu; + for i = 1:n + in = nonzeros(A(:,i)); % incoming message index + nb = nodePot(:,i).*prod(mu(:,in),2); % product of incoming message + for l = in' + ep = edgePot(:,:,ud(l,m)); + mu(:,rd(l,m)) = normalize(ep*(nb./mu(:,l))); + end + end + if max(abs(mu(:)-mu0(:))) < tol; break; end +end + +nodeBel = zeros(k,n); +for i = 1:n + nodeBel(:,i) = nodePot(:,i).*prod(mu(:,nonzeros(A(:,i))),2); +end +nodeBel = normalize(nodeBel,1); + +edgeBel = zeros(k,k,m); +for l = 1:m + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + nbt = nodeBel(:,t(l))./mu(:,eij); + nbs = nodeBel(:,s(l))./mu(:,eji); + eb = (nbt*nbs').*ep; + edgeBel(:,:,eij) = eb./sum(eb(:)); +end + +function i = rd(i, m) +% reverse direction edge index +i = mod(i+m-1,2*m)+1; + +function i = ud(i, m) +% undirected edge index +i = mod(i-1,m)+1; \ No newline at end of file diff --git a/chapter08/belProp0.m b/chapter08/belProp0.m new file mode 100644 index 0000000..e59ef62 --- /dev/null +++ b/chapter08/belProp0.m @@ -0,0 +1,63 @@ +function [nodeBel, edgeBel] = belProp0(A, nodePot, edgePot, epoch) +% Belief propagation for MRF, calculation in log scale +% Assuming egdePot is symmetric +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound (Bethe energy) +% Written by Mo Chen (sth4nth@gmail.com) +tol = 0; +if nargin < 4 + epoch = 10; + tol = 1e-4; +end +[k,n] = size(nodePot); +m = size(edgePot,3); + +[s,t,e] = find(tril(A)); +A = sparse([s;t],[t;s],[e;e+m]); % digraph adjacent matrix, where value is message index +mu = zeros(k,2*m)-log(k); % message +for iter = 1:epoch + mu0 = mu; + for i = 1:n + in = nonzeros(A(:,i)); % incoming message index + nb = -nodePot(:,i)+sum(mu(:,in),2); % product of incoming message + for l = in' + ep = edgePot(:,:,ud(l,m)); + mut = logsumexp(-ep+(nb-mu(:,l)),1); + mu(:,rd(l,m)) = mut-logsumexp(mut); + end + end + if max(abs(mu(:)-mu0(:))) < tol; break; end +end + +nodeBel = zeros(k,n); +for i = 1:n + nb = -nodePot(:,i)+sum(mu(:,nonzeros(A(:,i))),2); + nodeBel(:,i) = nb-logsumexp(nb); +end + +edgeBel = zeros(k,k,m); +for l = 1:m + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + nbt = nodeBel(:,t(l))-mu(:,eij); + nbs = nodeBel(:,s(l))-mu(:,eji); + eb = (nbt+nbs')-ep; + edgeBel(:,:,eij) = eb-logsumexp(eb(:)); +end +nodeBel = exp(nodeBel); +edgeBel = exp(edgeBel); + +function i = rd(i, m) +% reverse direction edge index +i = mod(i+m-1,2*m)+1; + +function i = ud(i, m) +% undirected edge index +i = mod(i-1,m)+1; \ No newline at end of file diff --git a/chapter08/expProp.m b/chapter08/expProp.m new file mode 100644 index 0000000..a8f42b3 --- /dev/null +++ b/chapter08/expProp.m @@ -0,0 +1,59 @@ +function [nodeBel, edgeBel] = expProp(A, nodePot, edgePot, epoch) +% Expectation propagation for MRF +% Assuming egdePot is symmetric +% Another implementation with precompute nodeBel and update during iterations +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound (Bethe energy) +% Written by Mo Chen (sth4nth@gmail.com) + +% working in exp domain +nodePot = exp(-nodePot); +edgePot = exp(-edgePot); + +tol = 0; +if nargin < 4 + epoch = 10; + tol = 1e-4; +end +k = size(nodePot,1); +m = size(edgePot,3); + +[s,t,e] = find(tril(A)); +mu = ones(k,2*m)/k; % message +nodeBel = normalize(nodePot,1); +for iter = 1:epoch + mu0 = mu; + for l = 1:m + i = s(l); + j = t(l); + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + + nodeBel(:,j) = nodeBel(:,j)./mu(:,eij); + mu(:,eij) = normalize(ep*(nodeBel(:,i)./mu(:,eji))); + nodeBel(:,j) = normalize(nodeBel(:,j).*mu(:,eij)); + + nodeBel(:,i) = nodeBel(:,i)./mu(:,eji); + mu(:,eji) = normalize(ep*(nodeBel(:,j)./mu(:,eij))); + nodeBel(:,i) = normalize(nodeBel(:,i).*mu(:,eji)); + end + if max(abs(mu(:)-mu0(:))) < tol; break; end +end + +edgeBel = zeros(k,k,m); +for l = 1:m + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + nbt = nodeBel(:,t(l))./mu(:,eij); + nbs = nodeBel(:,s(l))./mu(:,eji); + eb = (nbt*nbs').*ep; + edgeBel(:,:,eij) = eb./sum(eb(:)); +end diff --git a/chapter08/expProp0.m b/chapter08/expProp0.m new file mode 100644 index 0000000..d6f2eb1 --- /dev/null +++ b/chapter08/expProp0.m @@ -0,0 +1,60 @@ +function [nodeBel, edgeBel] = expProp0(A, nodePot, edgePot, epoch) +% Expectation propagation for MRF, calculation in log scale +% Assuming egdePot is symmetric +% Another implementation with precompute nodeBel and update during iterations +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound (Bethe energy) +% Written by Mo Chen (sth4nth@gmail.com) +tol = 0; +if nargin < 4 + epoch = 10; + tol = 1e-4; +end +k = size(nodePot,1); +m = size(edgePot,3); + +[s,t,e] = find(tril(A)); +mu = zeros(k,2*m)-log(k); +nodeBel = -nodePot-logsumexp(-nodePot,1); +for iter = 1:epoch + mu0 = mu; + for l = 1:m + i = s(l); + j = t(l); + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + + nodeBel(:,j) = nodeBel(:,j)-mu(:,eij); + mut = logsumexp(-ep+(nodeBel(:,i)-mu(:,eji)),1); + mu(:,eij) = mut-logsumexp(mut); + nb = nodeBel(:,j)+mu(:,eij); + nodeBel(:,j) = nb-logsumexp(nb); + + nodeBel(:,i) = nodeBel(:,i)-mu(:,eji); + mut = logsumexp(-ep+(nodeBel(:,j)-mu(:,eij)),1); + mu(:,eji) = mut-logsumexp(mut); + nb = nodeBel(:,i)+mu(:,eji); + nodeBel(:,i) = nb-logsumexp(nb); + end + if max(abs(mu(:)-mu0(:))) < tol; break; end +end + +edgeBel = zeros(k,k,m); +for l = 1:m + eij = e(l); + eji = eij+m; + ep = edgePot(:,:,eij); + nbt = nodeBel(:,t(l))-mu(:,eij); + nbs = nodeBel(:,s(l))-mu(:,eji); + eb = (nbt+nbs')-ep; + edgeBel(:,:,eij) = eb-logsumexp(eb(:)); +end +nodeBel = exp(nodeBel); +edgeBel = exp(edgeBel); \ No newline at end of file diff --git a/chapter08/imageMeanField.m b/chapter08/imageMeanField.m new file mode 100644 index 0000000..a747f75 --- /dev/null +++ b/chapter08/imageMeanField.m @@ -0,0 +1,18 @@ +function nodeBel = imageMeanField(M, N, nodePot, edgePot, epoch) +if nargin < 5 + epoch = 10; +end +stride = [-1,1,-M,M]; +nodeBel = softmax(-nodePot,1); +for t = 1:epoch + for j = 1:N + for i = 1:M + pos = i + M*(j-1); + ne = pos + stride; + ne([i,i,j,j] == [1,M,1,N]) = []; + nodeBel(:,pos) = softmax(-edgePot*sum(nodeBel(:,ne),2)-nodePot(:,pos)); + end + end +end + + diff --git a/chapter08/isingMeanField.m b/chapter08/isingMeanField.m new file mode 100644 index 0000000..81a9887 --- /dev/null +++ b/chapter08/isingMeanField.m @@ -0,0 +1,18 @@ +function mu = isingMeanField(J, h, epoch) +if nargin < 3 + epoch = 10; +end +[M,N] = size(h); +mu = tanh(h); +stride = [-1,1,-M,M]; +for t = 1:epoch + for j = 1:N + for i = 1:M + pos = i + M*(j-1); + ne = pos + stride; + ne([i,i,j,j] == [1,M,1,N]) = []; + mu(i,j) = tanh(J*sum(mu(ne)) + h(i,j)); + end + end +end + diff --git a/chapter08/isingMeanField0.m b/chapter08/isingMeanField0.m new file mode 100644 index 0000000..f68cba0 --- /dev/null +++ b/chapter08/isingMeanField0.m @@ -0,0 +1,18 @@ +function mu = isingMeanField0(J, h, epoch) +% use padding trick +if nargin < 3 + epoch = 10; +end +mu = zeros(size(h)+2); % padding +[m,n] = size(mu); +mu(2:m-1,2:n-1) = tanh(h); % init +stride = [-1,1,-m,m]; +for t = 1:epoch + for j = 2:n-1 + for i = 2:m-1 + ne = i + m*(j-1) + stride; + mu(i,j) = tanh(J*sum(mu(ne))+h(i-1,j-1)); + end + end +end +mu = mu(2:m-1,2:n-1); \ No newline at end of file From d7cdb1af170f9f85c0bfd1b7eae3f5e1ed144b60 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 14:16:39 +0800 Subject: [PATCH 056/119] refine MRF mean field --- chapter08/demo.m | 13 ++++++------- chapter08/im2mrf.m | 2 +- chapter08/meanField.m | 38 -------------------------------------- chapter08/mrfMeanField.m | 28 ++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 46 deletions(-) delete mode 100644 chapter08/meanField.m create mode 100644 chapter08/mrfMeanField.m diff --git a/chapter08/demo.m b/chapter08/demo.m index 8005d9a..04a10fc 100644 --- a/chapter08/demo.m +++ b/chapter08/demo.m @@ -24,16 +24,15 @@ axis image; colormap gray; %% Mean Field -[A, nodePot, edgePot] = im2mrf(y, sigma, J); -[nodeBel, edgeBel, lnZ] = meanField(A, nodePot, edgePot, epoch); -lnZ0 = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel); -lnZ1 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); -maxdiff(lnZ0, lnZ(end)) -maxdiff(lnZ0, lnZ1) +[A, nodePot, edgePot] = im2mrf(y, J, sigma); +[nodeBel, edgeBel] = mrfMeanField(A, nodePot, edgePot, epoch); +lnZ = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel); +lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); +maxdiff(lnZ, lnZ0) subplot(2,3,3); imagesc(reshape(nodeBel(1,:),size(img))); -title('MF'); +title('Mean Field'); axis image; colormap gray; %% Belief Propagation diff --git a/chapter08/im2mrf.m b/chapter08/im2mrf.m index b960381..3d9e173 100644 --- a/chapter08/im2mrf.m +++ b/chapter08/im2mrf.m @@ -1,4 +1,4 @@ -function [A, nodePot, edgePot] = im2mrf(im, sigma, J) +function [A, nodePot, edgePot] = im2mrf(im, J, sigma) % Convert a image to Ising MRF with distribution p(x)=exp(-sum(nodePot)-sum(edgePot)-lnZ) % Input: % im: row x col image diff --git a/chapter08/meanField.m b/chapter08/meanField.m deleted file mode 100644 index 2176f53..0000000 --- a/chapter08/meanField.m +++ /dev/null @@ -1,38 +0,0 @@ -function [nodeBel, edgeBel, lnZ] = meanField(A, nodePot, edgePot, epoch) -% Mean field for MRF -% Assuming egdePot is symmetric -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief -% edgeBel: k x k x m edge belief -% L: variational lower bound -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 10; - tol = 1e-4; -end -lnZ = -inf(1,epoch+1); -[nodeBel,L] = softmax(-nodePot,1); % init nodeBel -for iter = 1:epoch - for i = 1:numel(L) - [~,j,e] = find(A(i,:)); % neighbors - np = nodePot(:,i); - [lnp ,lnz] = lognormexp(-np-reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); - p = exp(lnp); - L(i) = -dot(p,lnp+np)+lnz; % - nodeBel(:,i) = p; - end - lnZ(iter+1) = sum(L)/2; - if abs(lnZ(iter+1)-lnZ(iter))/abs(lnZ(iter)) < tol; break; end -end -lnZ = lnZ(2:iter); - -[s,t,e] = find(tril(A)); -edgeBel = zeros(size(edgePot)); -for l = 1:numel(e) - edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; -end \ No newline at end of file diff --git a/chapter08/mrfMeanField.m b/chapter08/mrfMeanField.m new file mode 100644 index 0000000..091c964 --- /dev/null +++ b/chapter08/mrfMeanField.m @@ -0,0 +1,28 @@ +function [nodeBel, edgeBel, lnZ] = mrfMeanField(A, nodePot, edgePot, epoch) +% Mean field for MRF (Assuming that egdePot is symmetric) +% p(x)=exp(-E(x))/Z, E(x)=\sum(edgePot)+sum(nodePot) +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief q(x_i) +% edgeBel: k x k x m edge belief q(x_i,x_j) +% Written by Mo Chen (sth4nth@gmail.com) +if nargin < 4 + epoch = 50; +end +lnZ = -inf(1,epoch+1); +[nodeBel,L] = softmax(-nodePot,1); % init nodeBel +for iter = 1:epoch + for i = 1:numel(L) + [~,j,e] = find(A(i,:)); % neighbors + nodeBel(:,i) = softmax(-nodePot(:,i)-reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); + end +end + +[s,t,e] = find(tril(A)); +edgeBel = zeros(size(edgePot)); +for l = 1:numel(e) + edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; +end \ No newline at end of file From a24ec5a3353dd073b52ad74414a4f7b030bf9b8e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 14:51:02 +0800 Subject: [PATCH 057/119] refine MRF --- chapter08/belProp0.m | 63 --------------------------- chapter08/demo.m | 57 ++++++++---------------- chapter08/expProp0.m | 60 ------------------------- chapter08/{belProp.m => mrfBelProp.m} | 17 ++++---- chapter08/{expProp.m => mrfExpProp.m} | 18 +++----- chapter08/mrfMeanField.m | 7 ++- 6 files changed, 39 insertions(+), 183 deletions(-) delete mode 100644 chapter08/belProp0.m delete mode 100644 chapter08/expProp0.m rename chapter08/{belProp.m => mrfBelProp.m} (88%) rename chapter08/{expProp.m => mrfExpProp.m} (80%) diff --git a/chapter08/belProp0.m b/chapter08/belProp0.m deleted file mode 100644 index e59ef62..0000000 --- a/chapter08/belProp0.m +++ /dev/null @@ -1,63 +0,0 @@ -function [nodeBel, edgeBel] = belProp0(A, nodePot, edgePot, epoch) -% Belief propagation for MRF, calculation in log scale -% Assuming egdePot is symmetric -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief -% edgeBel: k x k x m edge belief -% L: variational lower bound (Bethe energy) -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 10; - tol = 1e-4; -end -[k,n] = size(nodePot); -m = size(edgePot,3); - -[s,t,e] = find(tril(A)); -A = sparse([s;t],[t;s],[e;e+m]); % digraph adjacent matrix, where value is message index -mu = zeros(k,2*m)-log(k); % message -for iter = 1:epoch - mu0 = mu; - for i = 1:n - in = nonzeros(A(:,i)); % incoming message index - nb = -nodePot(:,i)+sum(mu(:,in),2); % product of incoming message - for l = in' - ep = edgePot(:,:,ud(l,m)); - mut = logsumexp(-ep+(nb-mu(:,l)),1); - mu(:,rd(l,m)) = mut-logsumexp(mut); - end - end - if max(abs(mu(:)-mu0(:))) < tol; break; end -end - -nodeBel = zeros(k,n); -for i = 1:n - nb = -nodePot(:,i)+sum(mu(:,nonzeros(A(:,i))),2); - nodeBel(:,i) = nb-logsumexp(nb); -end - -edgeBel = zeros(k,k,m); -for l = 1:m - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - nbt = nodeBel(:,t(l))-mu(:,eij); - nbs = nodeBel(:,s(l))-mu(:,eji); - eb = (nbt+nbs')-ep; - edgeBel(:,:,eij) = eb-logsumexp(eb(:)); -end -nodeBel = exp(nodeBel); -edgeBel = exp(edgeBel); - -function i = rd(i, m) -% reverse direction edge index -i = mod(i+m-1,2*m)+1; - -function i = ud(i, m) -% undirected edge index -i = mod(i-1,m)+1; \ No newline at end of file diff --git a/chapter08/demo.m b/chapter08/demo.m index 04a10fc..fd24aab 100644 --- a/chapter08/demo.m +++ b/chapter08/demo.m @@ -30,46 +30,27 @@ lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); maxdiff(lnZ, lnZ0) -subplot(2,3,3); +subplot(2,3,4); imagesc(reshape(nodeBel(1,:),size(img))); title('Mean Field'); axis image; colormap gray; %% Belief Propagation -% [nodeBel,edgeBel] = belProp(A, nodePot, edgePot, epoch); -% -% [nodeBel0,edgeBel0] = belProp0(A, nodePot, edgePot, epoch); -% maxdiff(nodeBel,nodeBel0) -% maxdiff(edgeBel,edgeBel0) -% -% subplot(2,3,4); -% imagesc(reshape(nodeBel(1,:),size(img))); -% title('BP'); -% axis image; -% colormap gray; -% %% Expectation Propagation -% [nodeBel,edgeBel] = expProp(A, nodePot, edgePot, epoch); -% -% lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); -% -% [nodeBel0,edgeBel0] = expProp0(A, nodePot, edgePot, epoch); -% maxdiff(nodeBel,nodeBel0) -% maxdiff(edgeBel,edgeBel0) -% -% subplot(2,3,5); -% imagesc(reshape(nodeBel(1,:),size(img))); -% title('EP'); -% axis image; -% colormap gray; -% %% EP-BP -% [nodeBel,edgeBel] = expBelProp(A, nodePot, edgePot, epoch); -% -% [nodeBel0,edgeBel0] = expBelProp0(A, nodePot, edgePot, epoch); -% maxdiff(nodeBel,nodeBel0) -% maxdiff(edgeBel,edgeBel0) -% -% subplot(2,3,6); -% imagesc(reshape(nodeBel(1,:),size(img))); -% title('EBP'); -% axis image; -% colormap gray; +[nodeBel,edgeBel] = mrfBelProp(A, nodePot, edgePot, epoch); +lnZ = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); + +subplot(2,3,5); +imagesc(reshape(nodeBel(1,:),size(img))); +title('Belief propagation'); +axis image; +colormap gray; +%% Expectation Propagation +[nodeBel,edgeBel] = mrfExpProp(A, nodePot, edgePot, epoch); +lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); +maxdiff(lnZ, lnZ0) + +subplot(2,3,6); +imagesc(reshape(nodeBel(1,:),size(img))); +title('Expectation Propagation'); +axis image; +colormap gray; diff --git a/chapter08/expProp0.m b/chapter08/expProp0.m deleted file mode 100644 index d6f2eb1..0000000 --- a/chapter08/expProp0.m +++ /dev/null @@ -1,60 +0,0 @@ -function [nodeBel, edgeBel] = expProp0(A, nodePot, edgePot, epoch) -% Expectation propagation for MRF, calculation in log scale -% Assuming egdePot is symmetric -% Another implementation with precompute nodeBel and update during iterations -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief -% edgeBel: k x k x m edge belief -% L: variational lower bound (Bethe energy) -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 10; - tol = 1e-4; -end -k = size(nodePot,1); -m = size(edgePot,3); - -[s,t,e] = find(tril(A)); -mu = zeros(k,2*m)-log(k); -nodeBel = -nodePot-logsumexp(-nodePot,1); -for iter = 1:epoch - mu0 = mu; - for l = 1:m - i = s(l); - j = t(l); - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - - nodeBel(:,j) = nodeBel(:,j)-mu(:,eij); - mut = logsumexp(-ep+(nodeBel(:,i)-mu(:,eji)),1); - mu(:,eij) = mut-logsumexp(mut); - nb = nodeBel(:,j)+mu(:,eij); - nodeBel(:,j) = nb-logsumexp(nb); - - nodeBel(:,i) = nodeBel(:,i)-mu(:,eji); - mut = logsumexp(-ep+(nodeBel(:,j)-mu(:,eij)),1); - mu(:,eji) = mut-logsumexp(mut); - nb = nodeBel(:,i)+mu(:,eji); - nodeBel(:,i) = nb-logsumexp(nb); - end - if max(abs(mu(:)-mu0(:))) < tol; break; end -end - -edgeBel = zeros(k,k,m); -for l = 1:m - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - nbt = nodeBel(:,t(l))-mu(:,eij); - nbs = nodeBel(:,s(l))-mu(:,eji); - eb = (nbt+nbs')-ep; - edgeBel(:,:,eij) = eb-logsumexp(eb(:)); -end -nodeBel = exp(nodeBel); -edgeBel = exp(edgeBel); \ No newline at end of file diff --git a/chapter08/belProp.m b/chapter08/mrfBelProp.m similarity index 88% rename from chapter08/belProp.m rename to chapter08/mrfBelProp.m index c9a73da..a556d9c 100644 --- a/chapter08/belProp.m +++ b/chapter08/mrfBelProp.m @@ -1,6 +1,5 @@ -function [nodeBel, edgeBel] = belProp(A, nodePot, edgePot, epoch) -% Belief propagation for MRF -% Assuming egdePot is symmetric +function [nodeBel, edgeBel] = mrfBelProp(A, nodePot, edgePot, epoch) +% Belief propagation for MRF (Assuming that egdePot is symmetric) % Input: % A: n x n adjacent matrix of undirected graph, where value is edge index % nodePot: k x n node potential @@ -8,16 +7,16 @@ % Output: % nodeBel: k x n node belief % edgeBel: k x k x m edge belief -% L: variational lower bound (Bethe energy) % Written by Mo Chen (sth4nth@gmail.com) -nodePot = exp(-nodePot); -edgePot = exp(-edgePot); - tol = 0; if nargin < 4 - epoch = 10; - tol = 1e-4; + epoch = 50; + tol = 1e-8; end + +nodePot = exp(-nodePot); +edgePot = exp(-edgePot); + [k,n] = size(nodePot); m = size(edgePot,3); diff --git a/chapter08/expProp.m b/chapter08/mrfExpProp.m similarity index 80% rename from chapter08/expProp.m rename to chapter08/mrfExpProp.m index a8f42b3..26969f2 100644 --- a/chapter08/expProp.m +++ b/chapter08/mrfExpProp.m @@ -1,7 +1,5 @@ -function [nodeBel, edgeBel] = expProp(A, nodePot, edgePot, epoch) -% Expectation propagation for MRF -% Assuming egdePot is symmetric -% Another implementation with precompute nodeBel and update during iterations +function [nodeBel, edgeBel] = mrfExpProp(A, nodePot, edgePot, epoch) +% Expectation propagation for MRF (Assuming that egdePot is symmetric) % Input: % A: n x n adjacent matrix of undirected graph, where value is edge index % nodePot: k x n node potential @@ -9,18 +7,16 @@ % Output: % nodeBel: k x n node belief % edgeBel: k x k x m edge belief -% L: variational lower bound (Bethe energy) % Written by Mo Chen (sth4nth@gmail.com) +tol = 0; +if nargin < 4 + epoch = 50; + tol = 1e-8; +end -% working in exp domain nodePot = exp(-nodePot); edgePot = exp(-edgePot); -tol = 0; -if nargin < 4 - epoch = 10; - tol = 1e-4; -end k = size(nodePot,1); m = size(edgePot,3); diff --git a/chapter08/mrfMeanField.m b/chapter08/mrfMeanField.m index 091c964..2f767cd 100644 --- a/chapter08/mrfMeanField.m +++ b/chapter08/mrfMeanField.m @@ -1,4 +1,4 @@ -function [nodeBel, edgeBel, lnZ] = mrfMeanField(A, nodePot, edgePot, epoch) +function [nodeBel, edgeBel] = mrfMeanField(A, nodePot, edgePot, epoch) % Mean field for MRF (Assuming that egdePot is symmetric) % p(x)=exp(-E(x))/Z, E(x)=\sum(edgePot)+sum(nodePot) % Input: @@ -9,16 +9,19 @@ % nodeBel: k x n node belief q(x_i) % edgeBel: k x k x m edge belief q(x_i,x_j) % Written by Mo Chen (sth4nth@gmail.com) +tol = 0; if nargin < 4 epoch = 50; + tol = 1e-8; end -lnZ = -inf(1,epoch+1); [nodeBel,L] = softmax(-nodePot,1); % init nodeBel for iter = 1:epoch + nodeBel0 = nodeBel; for i = 1:numel(L) [~,j,e] = find(A(i,:)); % neighbors nodeBel(:,i) = softmax(-nodePot(:,i)-reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); end + if max(abs(nodeBel(:)-nodeBel0(:))) < tol; break; end end [s,t,e] = find(tril(A)); From cc83a35fad2cdd358fbcbc79d573e3e97ac1eed8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 15:13:31 +0800 Subject: [PATCH 058/119] add Ising mean field --- chapter08/demo.m | 23 ++++++++++++++++------- chapter08/imageMeanField.m | 18 ------------------ chapter08/isingMeanField.m | 14 +++++++++++++- chapter08/isingMeanField0.m | 18 ------------------ 4 files changed, 29 insertions(+), 44 deletions(-) delete mode 100644 chapter08/imageMeanField.m delete mode 100644 chapter08/isingMeanField0.m diff --git a/chapter08/demo.m b/chapter08/demo.m index fd24aab..5cede61 100644 --- a/chapter08/demo.m +++ b/chapter08/demo.m @@ -1,12 +1,6 @@ clear; close all; -% load letterA.mat; -% X = A; -load letterX.mat %% Original image -epoch = 50; -J = 1; % ising parameter -sigma = 1; % noise level - +load letterX.mat img = double(X); img = sign(img-mean(img(:))); @@ -18,11 +12,16 @@ colormap gray; %% Noisy image y = img + sigma*randn(size(img)); % noisy signal + subplot(2,3,2); imagesc(y); title('Noisy image'); axis image; colormap gray; +%% Parameters +epoch = 50; +J = 1; % Ising parameter +sigma = 1; % noise level %% Mean Field [A, nodePot, edgePot] = im2mrf(y, J, sigma); [nodeBel, edgeBel] = mrfMeanField(A, nodePot, edgePot, epoch); @@ -35,6 +34,16 @@ title('Mean Field'); axis image; colormap gray; +%% Ising Mean Field +h = reshape(0.5*diff(nodePot),size(img)); +mu = isingMeanField(J, h, epoch); +maxdiff(reshape(mu,1,[]), [1,-1]*nodeBel) + +subplot(2,3,3); +imagesc(mu) +title('Ising Mean Field'); +axis image; +colormap gray; %% Belief Propagation [nodeBel,edgeBel] = mrfBelProp(A, nodePot, edgePot, epoch); lnZ = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); diff --git a/chapter08/imageMeanField.m b/chapter08/imageMeanField.m deleted file mode 100644 index a747f75..0000000 --- a/chapter08/imageMeanField.m +++ /dev/null @@ -1,18 +0,0 @@ -function nodeBel = imageMeanField(M, N, nodePot, edgePot, epoch) -if nargin < 5 - epoch = 10; -end -stride = [-1,1,-M,M]; -nodeBel = softmax(-nodePot,1); -for t = 1:epoch - for j = 1:N - for i = 1:M - pos = i + M*(j-1); - ne = pos + stride; - ne([i,i,j,j] == [1,M,1,N]) = []; - nodeBel(:,pos) = softmax(-edgePot*sum(nodeBel(:,ne),2)-nodePot(:,pos)); - end - end -end - - diff --git a/chapter08/isingMeanField.m b/chapter08/isingMeanField.m index 81a9887..ad7d286 100644 --- a/chapter08/isingMeanField.m +++ b/chapter08/isingMeanField.m @@ -1,11 +1,22 @@ function mu = isingMeanField(J, h, epoch) +% Mean field for 2d Ising model +% Input: +% J: scalar edge potential +% h: M X N image size node potential +% edgePot: k x k x m edge potential +% Output: +% mu: M x N image size expectation +% Written by Mo Chen (sth4nth@gmail.com) +tol = 0; if nargin < 3 - epoch = 10; + epoch = 50; + tol = 1e-8; end [M,N] = size(h); mu = tanh(h); stride = [-1,1,-M,M]; for t = 1:epoch + mu0 = mu; for j = 1:N for i = 1:M pos = i + M*(j-1); @@ -14,5 +25,6 @@ mu(i,j) = tanh(J*sum(mu(ne)) + h(i,j)); end end + if max(abs(mu(:)-mu0(:))) < tol; break; end end diff --git a/chapter08/isingMeanField0.m b/chapter08/isingMeanField0.m deleted file mode 100644 index f68cba0..0000000 --- a/chapter08/isingMeanField0.m +++ /dev/null @@ -1,18 +0,0 @@ -function mu = isingMeanField0(J, h, epoch) -% use padding trick -if nargin < 3 - epoch = 10; -end -mu = zeros(size(h)+2); % padding -[m,n] = size(mu); -mu(2:m-1,2:n-1) = tanh(h); % init -stride = [-1,1,-m,m]; -for t = 1:epoch - for j = 2:n-1 - for i = 2:m-1 - ne = i + m*(j-1) + stride; - mu(i,j) = tanh(J*sum(mu(ne))+h(i-1,j-1)); - end - end -end -mu = mu(2:m-1,2:n-1); \ No newline at end of file From 1a9cfa8c1a46fe99ffbd879496ce72c460c95fe3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 15:16:03 +0800 Subject: [PATCH 059/119] move demo --- chapter08/demo.m => demo/ch08/mrf_demo.m | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename chapter08/demo.m => demo/ch08/mrf_demo.m (100%) diff --git a/chapter08/demo.m b/demo/ch08/mrf_demo.m similarity index 100% rename from chapter08/demo.m rename to demo/ch08/mrf_demo.m From 81c8932c7d4df0db8dc9fc443257544906ce6295 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 28 May 2017 15:16:41 +0800 Subject: [PATCH 060/119] move data --- {chapter08 => demo/ch08}/letterX.mat | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename {chapter08 => demo/ch08}/letterX.mat (100%) diff --git a/chapter08/letterX.mat b/demo/ch08/letterX.mat similarity index 100% rename from chapter08/letterX.mat rename to demo/ch08/letterX.mat From f3c82fbc54d68f305acb36fd45d4d07fb8fd1956 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 31 May 2017 00:40:18 +0800 Subject: [PATCH 061/119] Update README.md --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 5f53e50..191a1f3 100644 --- a/README.md +++ b/README.md @@ -3,29 +3,29 @@ Introduction This package is a Matlab implementation of the algorithms described in the classical machine learning textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -Note: this package requires Matlab R2016b or later, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). +Note: this package requires Matlab **R2016b** or later, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). Description ------- The design goal of the code are as follows: -1. Succinct: Code is extremely terse. Minimizing the number of line of code is one of the primal target. As a result, the core of the algorithms can be easily spot. -2. Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementation. Usually, functions in this package are orders faster than Matlab builtin functions which provide the same functionality (eg. kmeans). If anyone found any Matlab implementation that is faster than mine, I am happy to further optimize. -3. Robust: Many numerical stability techniques are applied, such as probability computation in log scale to avoid numerical underflow and overflow, square root form update of symmetric matrix, etc. -4. Easy to learn: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. -5. Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). +* Succinct: The code is extremely terse. Minimizing the number of lines is a primal target. As a result, the core of the algorithms can be easily spot. +* Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. +* Robust: Many tricks for numerical stability are applied, such as probability computation in log scale and square root matrix update to enforce matrix symmetry, etc. +* Learnable: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. +* Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). Installation ------- -1. Download the package by running: `git clone https://github.com/PRML/PRMLT.git`. +1. Download the package to your local path (e.g. PRMLT/) by running: `git clone https://github.com/PRML/PRMLT.git`. -2. Run Matlab and navigate to package location as working directory, then run the init.m script. +2. Run Matlab and navigate to PRMLT/, then run the init.m script. -3. Run some demos in the your_location/demo directory. Enjoy! +3. Run some demos in PRMLT/demo directory. Enjoy! FeedBack ------- -If you found any bug or have any suggestion, please do fire issues. I am graceful for any feedback and will do my best to improve this package. +If you found any bug or have any suggestion, please do file issues. I am graceful for any feedback and will do my best to improve this package. License ------- From dc1acefcb8f2f0ff070dbe486d18f13371833e14 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 31 May 2017 00:45:27 +0800 Subject: [PATCH 062/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 191a1f3..73bb290 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Note: this package requires Matlab **R2016b** or later, since it utilizes a new Description ------- -The design goal of the code are as follows: +While developing this package, I stick to following prinples * Succinct: The code is extremely terse. Minimizing the number of lines is a primal target. As a result, the core of the algorithms can be easily spot. * Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. From b72f9a680c5bafe71d3a616270849e5d2c7867b3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 31 May 2017 00:45:49 +0800 Subject: [PATCH 063/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 73bb290..1cd2db2 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Note: this package requires Matlab **R2016b** or later, since it utilizes a new Description ------- -While developing this package, I stick to following prinples +While developing this package, I stick to following principles * Succinct: The code is extremely terse. Minimizing the number of lines is a primal target. As a result, the core of the algorithms can be easily spot. * Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. From 75f3cdf5ecd224ca06d73ea69379e8dcc1850143 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 31 May 2017 15:00:26 +0800 Subject: [PATCH 064/119] fix a minor bug --- chapter05/mlp.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter05/mlp.m b/chapter05/mlp.m index e19105c..baf78a1 100644 --- a/chapter05/mlp.m +++ b/chapter05/mlp.m @@ -17,7 +17,7 @@ for l = 1:L-1 W{l} = randn(h(l),h(l+1)); end -Z = cell(L); +Z = cell(1,L); Z{1} = X; maxiter = 200; mse = zeros(1,maxiter); From 66a59ca44cd437f39d6e4e74084b6586633e7cc6 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 31 May 2017 18:39:20 +0800 Subject: [PATCH 065/119] refine mlp --- chapter05/mlp.m | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chapter05/mlp.m b/chapter05/mlp.m index baf78a1..19e8b76 100644 --- a/chapter05/mlp.m +++ b/chapter05/mlp.m @@ -1,17 +1,15 @@ -function [model, mse] = mlp(X, Y, h, eta) +function [model, mse] = mlp(X, T, h) % Train a multilayer perceptron neural network % Input: % X: d x n data matrix -% Y: p x n response matrix +% T: p x n response matrix % h: L x 1 vector specify number of hidden nodes in each layer l % Ouput: % model: model structure % mse: mean square error % Written by Mo Chen (sth4nth@gmail.com). -if nargin < 4 - eta = 1/size(X,2); -end -h = [size(X,1);h(:);size(Y,1)]; +eta = 1/size(X,2); +h = [size(X,1);h(:);size(T,1)]; L = numel(h); W = cell(L-1); for l = 1:L-1 @@ -24,10 +22,10 @@ for iter = 1:maxiter % forward for l = 2:L - Z{l} = sigmoid(W{l-1}'*Z{l-1}); + Z{l} = sigmoid(W{l-1}'*Z{l-1}); % 5.10, 5.49 end % backward - E = Y-Z{L}; + E = T-Z{L}; mse(iter) = mean(dot(E,E),1); for l = L-1:-1:1 df = Z{l+1}.*(1-Z{l+1}); From 92c822088cf56b934c8c2afa3b344f57923ab325 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 2 Jun 2017 04:57:20 +0800 Subject: [PATCH 066/119] fix mlp --- chapter05/mlp.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter05/mlp.m b/chapter05/mlp.m index 19e8b76..df987b1 100644 --- a/chapter05/mlp.m +++ b/chapter05/mlp.m @@ -11,11 +11,11 @@ eta = 1/size(X,2); h = [size(X,1);h(:);size(T,1)]; L = numel(h); -W = cell(L-1); +W = cell(L-1,1); for l = 1:L-1 W{l} = randn(h(l),h(l+1)); end -Z = cell(1,L); +Z = cell(L,1); Z{1} = X; maxiter = 200; mse = zeros(1,maxiter); From dd161e46c87cb9baa61dd469f115241eb60ef453 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 2 Jun 2017 05:06:04 +0800 Subject: [PATCH 067/119] refine mlpPred.m --- chapter05/mlpPred.m | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/chapter05/mlpPred.m b/chapter05/mlpPred.m index 0b64ca1..e6fc280 100644 --- a/chapter05/mlpPred.m +++ b/chapter05/mlpPred.m @@ -1,4 +1,4 @@ -function y = mlpPred(model, X) +function Y = mlpPred(model, X) % Multilayer perceptron prediction % Input: % model: model structure @@ -8,9 +8,7 @@ % Written by Mo Chen (sth4nth@gmail.com). W = model.W; L = length(W)+1; -Z = cell(L); -Z{1} = X; +Y = X; for l = 2:L - Z{l} = sigmoid(W{l-1}'*Z{l-1}); -end -y = Z{L}; + Y = sigmoid(W{l-1}'*Y); +end \ No newline at end of file From e1c19a5a13949b00e6a1ebaaad64d69fa673bd2b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 2 Jun 2017 05:11:07 +0800 Subject: [PATCH 068/119] refine mlpPred.m --- chapter05/mlpPred.m | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chapter05/mlpPred.m b/chapter05/mlpPred.m index e6fc280..0ce5fb1 100644 --- a/chapter05/mlpPred.m +++ b/chapter05/mlpPred.m @@ -7,8 +7,7 @@ % Y: p x n response matrix % Written by Mo Chen (sth4nth@gmail.com). W = model.W; -L = length(W)+1; Y = X; -for l = 2:L - Y = sigmoid(W{l-1}'*Y); +for l = 1:length(W) + Y = sigmoid(W{l}'*Y); end \ No newline at end of file From 272477eccc2dfc95d3bd1ba4c4962db8ced2df58 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 26 Aug 2017 13:55:17 +0800 Subject: [PATCH 069/119] add log1mexp.m --- common/log1mexp.m | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 common/log1mexp.m diff --git a/common/log1mexp.m b/common/log1mexp.m new file mode 100644 index 0000000..3892c70 --- /dev/null +++ b/common/log1mexp.m @@ -0,0 +1,7 @@ +function y = log1mexp(x) +% Accurately compute y = log(1-exp(-x)) +% reference: Accurately Computing log(1-exp(-|a|)) Martin Machler +y = x; +i = x > log(2); +y(i) = log1p(-exp(-x(i))); +y(~i) = log(-expm1(-x(~i))); From 159d5cd4ab2ae4ce873f14901fcd81706940e86d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 26 Aug 2017 19:35:50 +0800 Subject: [PATCH 070/119] update log1pexp log1mexp --- common/log1mexp.m | 8 ++++---- common/log1pexp.m | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/common/log1mexp.m b/common/log1mexp.m index 3892c70..51918e1 100644 --- a/common/log1mexp.m +++ b/common/log1mexp.m @@ -1,7 +1,7 @@ function y = log1mexp(x) -% Accurately compute y = log(1-exp(-x)) +% Accurately compute y = log(1-exp(x)) % reference: Accurately Computing log(1-exp(-|a|)) Martin Machler y = x; -i = x > log(2); -y(i) = log1p(-exp(-x(i))); -y(~i) = log(-expm1(-x(~i))); +i = x < -log(2); +y(i) = log1p(-exp(x(i))); +y(~i) = log(-expm1(x(~i))); diff --git a/common/log1pexp.m b/common/log1pexp.m index 7ad0b9d..10096e5 100644 --- a/common/log1pexp.m +++ b/common/log1pexp.m @@ -1,7 +1,8 @@ function y = log1pexp(x) % Accurately compute y = log(1+exp(x)) -% reference: Accurately Computing log(1-exp(|a|)) Martin Machler -seed = 33.3; +% reference: Accurately Computing log(1-exp(-|a|)) Martin Machler y = x; -idx = x 18; +j = i & (x <= 33.3); +y(~i) = log1p(exp(x(~i))); +y(j) = x(j)+exp(-x(j)); From c4df199480fa844ffbb6f4f2687395fc1ddbf3e2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 22 Oct 2017 17:09:35 +0800 Subject: [PATCH 071/119] improve entropy --- chapter01/entropy.m | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/chapter01/entropy.m b/chapter01/entropy.m index 8cfd5a0..c59c7bd 100644 --- a/chapter01/entropy.m +++ b/chapter01/entropy.m @@ -6,10 +6,7 @@ % z: entropy z=H(x) % Written by Mo Chen (sth4nth@gmail.com). n = numel(x); -[u,~,x] = unique(x); -k = numel(u); -idx = 1:n; -Mx = sparse(idx,x,1,n,k,n); -Px = nonzeros(mean(Mx,1)); +[~,~,x] = unique(x); +Px = accumarray(x, 1)/n; Hx = -dot(Px,log2(Px)); z = max(0,Hx); \ No newline at end of file From 0af5717978b5b2017c643fd1f3293c09cc497259 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 16 Nov 2017 05:21:08 +0800 Subject: [PATCH 072/119] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1cd2db2..cd1debf 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Introduction This package is a Matlab implementation of the algorithms described in the classical machine learning textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -Note: this package requires Matlab **R2016b** or later, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). +Note: this package requires Matlab **R2016b** or latter, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). Description ------- @@ -12,7 +12,7 @@ While developing this package, I stick to following principles * Succinct: The code is extremely terse. Minimizing the number of lines is a primal target. As a result, the core of the algorithms can be easily spot. * Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. * Robust: Many tricks for numerical stability are applied, such as probability computation in log scale and square root matrix update to enforce matrix symmetry, etc. -* Learnable: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. +* Readable: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. * Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). Installation @@ -21,7 +21,7 @@ Installation 2. Run Matlab and navigate to PRMLT/, then run the init.m script. -3. Run some demos in PRMLT/demo directory. Enjoy! +3. Try demos in PRMLT/demo directory to verify installation correctness. Enjoy! FeedBack ------- From 2e96df71b77ac5a4615ab8f0755ad25c95f33220 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 16 Nov 2017 05:23:04 +0800 Subject: [PATCH 073/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cd1debf..73840b2 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Description ------- While developing this package, I stick to following principles -* Succinct: The code is extremely terse. Minimizing the number of lines is a primal target. As a result, the core of the algorithms can be easily spot. +* Succinct: The code is extremely terse. Minimizing the number of lines is one of the primal goals. As a result, the core of the algorithms can be easily spot. * Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. * Robust: Many tricks for numerical stability are applied, such as probability computation in log scale and square root matrix update to enforce matrix symmetry, etc. * Readable: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. From a71624867610c09fc9c76072faf4bc3abb20fdfa Mon Sep 17 00:00:00 2001 From: txingml Date: Sat, 10 Mar 2018 17:29:20 +0800 Subject: [PATCH 074/119] fix linRegVb and rvmRegVb --- chapter10/linRegVb.m | 2 +- chapter10/rvmRegVb.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter10/linRegVb.m b/chapter10/linRegVb.m index e3fe01f..0017e31 100644 --- a/chapter10/linRegVb.m +++ b/chapter10/linRegVb.m @@ -53,7 +53,7 @@ KLalpha = -a*log(b); % q(beta) e2 = sum((t-Ew'*X).^2); - invUX = U\X; + invUX = U'\X; trXSX = dot(invUX(:),invUX(:)); d = d0+0.5*(e2+trXSX); Ebeta = c/d; diff --git a/chapter10/rvmRegVb.m b/chapter10/rvmRegVb.m index 4616cba..2430662 100644 --- a/chapter10/rvmRegVb.m +++ b/chapter10/rvmRegVb.m @@ -56,7 +56,7 @@ KLalpha = -sum(a*log(b)); % q(beta) e2 = sum((t-Ew'*X).^2); - invUX = U\X; + invUX = U'\X; trXSX = dot(invUX(:),invUX(:)); d = d0+0.5*(e2+trXSX); Ebeta = c/d; From 7d7c3aab0585210fc2cf1b4298b4d92d902dd686 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 12 Mar 2018 02:55:19 +0800 Subject: [PATCH 075/119] make code consistent for linRegVb and rvmRegVb --- chapter10/linRegVb.m | 2 +- chapter10/rvmRegVb.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter10/linRegVb.m b/chapter10/linRegVb.m index 0017e31..49ba81e 100644 --- a/chapter10/linRegVb.m +++ b/chapter10/linRegVb.m @@ -46,7 +46,7 @@ KLw = -sum(log(diag(U))); % q(alpha) w2 = dot(Ew,Ew); - invU = U\I; + invU = U'\I; trS = dot(invU(:),invU(:)); b = b0+0.5*(w2+trS); % 10.95 Ealpha = a/b; % 10.102 diff --git a/chapter10/rvmRegVb.m b/chapter10/rvmRegVb.m index 2430662..2ced2d7 100644 --- a/chapter10/rvmRegVb.m +++ b/chapter10/rvmRegVb.m @@ -49,7 +49,7 @@ KLw = -sum(log(diag(U))); % q(alpha) w2 = Ew.*Ew; - invU = U\I; + invU = U'\I; dgS = dot(invU,invU,2); b = b0+0.5*(w2+dgS); Ealpha = a./b; From 7349e4a2548d5a22bd6f472cb76a7a8434167fc5 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 12 Mar 2018 03:11:38 +0800 Subject: [PATCH 076/119] minor improvements --- chapter09/linRegEm.m | 10 +++++----- chapter10/linRegVb.m | 2 +- chapter10/rvmRegVb.m | 2 +- chapter14/mixLinReg.m | 1 - 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/chapter09/linRegEm.m b/chapter09/linRegEm.m index 00280fe..5534bfa 100644 --- a/chapter09/linRegEm.m +++ b/chapter09/linRegEm.m @@ -14,7 +14,7 @@ beta = 0.5; end [d,n] = size(X); - +I = eye(d); xbar = mean(X,2); tbar = mean(t,2); @@ -39,12 +39,12 @@ llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*m2-beta*e2-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end - V = inv(U); - trS = dot(V(:),V(:)); % A=inv(S) + invU = U'\I; + trS = dot(invU(:),invU(:)); % A=inv(S) alpha = d/(m2+trS); % 9.63 - UX = U'\X; - trXSX = dot(UX(:),UX(:)); + invUX = U'\X; + trXSX = dot(invUX(:),invUX(:)); beta = n/(e2+trXSX); % 9.68 is wrong end w0 = tbar-dot(m,xbar); diff --git a/chapter10/linRegVb.m b/chapter10/linRegVb.m index 49ba81e..809c3fa 100644 --- a/chapter10/linRegVb.m +++ b/chapter10/linRegVb.m @@ -8,7 +8,6 @@ % model: trained model structure % energy: variational lower bound % Written by Mo Chen (sth4nth@gmail.com). -[m,n] = size(X); if nargin < 3 a0 = 1e-4; b0 = 1e-4; @@ -20,6 +19,7 @@ c0 = prior.c; d0 = prior.d; end +[m,n] = size(X); I = eye(m); xbar = mean(X,2); tbar = mean(t,2); diff --git a/chapter10/rvmRegVb.m b/chapter10/rvmRegVb.m index 2ced2d7..91d073b 100644 --- a/chapter10/rvmRegVb.m +++ b/chapter10/rvmRegVb.m @@ -8,7 +8,6 @@ % model: trained model structure % energy: variational lower bound % Written by Mo Chen (sth4nth@gmail.com). -[m,n] = size(X); if nargin < 3 a0 = 1e-4; b0 = 1e-4; @@ -20,6 +19,7 @@ c0 = prior.c; d0 = prior.d; end +[m,n] = size(X); idx = (1:m)'; dg = sub2ind([m,m],idx,idx); I = eye(m); diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index aa530f5..7bf90cb 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -45,7 +45,6 @@ if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end end llh = llh(2:iter); -label = max(R,[],1); model.alpha = alpha; % mixing coefficient model.beta = beta; % mixture component precision model.W = W; % linear model coefficent From 3f9d968e1ab90f879b9a5b4035fc72aecaad3dd9 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 19 Apr 2018 11:02:48 +0800 Subject: [PATCH 077/119] update MRF --- chapter08/MRF/mrfBethe.m | 12 ++++ chapter08/MRF/mrfBp.m | 56 +++++++++++++++++ chapter08/MRF/mrfGibbs.m | 11 ++++ chapter08/MRF/mrfIsGa.m | 21 +++++++ chapter08/MRF/mrfMf.m | 34 +++++++++++ chapter08/{ => NaiveBayes}/nbBern.m | 0 chapter08/{ => NaiveBayes}/nbBernPred.m | 0 chapter08/{ => NaiveBayes}/nbGauss.m | 0 chapter08/{ => NaiveBayes}/nbGaussPred.m | 0 chapter08/betheEnergy.m | 11 ---- chapter08/gibbsEnergy.m | 9 --- chapter08/im2mrf.m | 20 ------- chapter08/isingMeanField.m | 30 ---------- chapter08/mrfBelProp.m | 62 ------------------- chapter08/mrfExpProp.m | 55 ----------------- chapter08/mrfMeanField.m | 31 ---------- demo/ch08/mrf_demo.m | 76 +++++++++++------------- 17 files changed, 168 insertions(+), 260 deletions(-) create mode 100644 chapter08/MRF/mrfBethe.m create mode 100644 chapter08/MRF/mrfBp.m create mode 100644 chapter08/MRF/mrfGibbs.m create mode 100644 chapter08/MRF/mrfIsGa.m create mode 100644 chapter08/MRF/mrfMf.m rename chapter08/{ => NaiveBayes}/nbBern.m (100%) rename chapter08/{ => NaiveBayes}/nbBernPred.m (100%) rename chapter08/{ => NaiveBayes}/nbGauss.m (100%) rename chapter08/{ => NaiveBayes}/nbGaussPred.m (100%) delete mode 100644 chapter08/betheEnergy.m delete mode 100644 chapter08/gibbsEnergy.m delete mode 100644 chapter08/im2mrf.m delete mode 100644 chapter08/isingMeanField.m delete mode 100644 chapter08/mrfBelProp.m delete mode 100644 chapter08/mrfExpProp.m delete mode 100644 chapter08/mrfMeanField.m diff --git a/chapter08/MRF/mrfBethe.m b/chapter08/MRF/mrfBethe.m new file mode 100644 index 0000000..85b69d6 --- /dev/null +++ b/chapter08/MRF/mrfBethe.m @@ -0,0 +1,12 @@ +function lnZ = mrfBethe(A, nodePot, edgePot, nodeBel, edgeBel) +% Compute Bethe energy +[s,t,e] = find(triu(A)); +edgeCor = zeros(size(edgePot)); +for l = 1:numel(e) + edgeCor(:,:,e(l)) = edgeBel(:,:,e(l))./(nodeBel(:,s(l))*nodeBel(:,t(l))'); +end +Ex = dot(nodeBel(:),nodePot(:)); +Exy = dot(edgeBel(:),edgePot(:)); +Hx = -dot(nodeBel(:),log(nodeBel(:))); +Ixy = dot(edgeBel(:),log(edgeCor(:))); +lnZ = Ex+Exy+Hx-Ixy; \ No newline at end of file diff --git a/chapter08/MRF/mrfBp.m b/chapter08/MRF/mrfBp.m new file mode 100644 index 0000000..ff2746b --- /dev/null +++ b/chapter08/MRF/mrfBp.m @@ -0,0 +1,56 @@ +function [nodeBel, edgeBel, L] = mrfBp(A, nodePot, edgePot, epoch) +% Undirected graph belief propagation for MRF +% Assuming egdePot is symmetric +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% L: variational lower bound (Bethe energy) +% Written by Mo Chen (sth4nth@gmail.com) +if nargin < 4 + epoch = 10; +end +expNodePot = exp(nodePot); +expEdgePot = exp(edgePot); +[k,n] = size(nodePot); +m = size(edgePot,3); + +[s,t,e] = find(triu(A)); +A = sparse([s;t],[t;s],[e;e+m]); % digraph adjacent matrix, where value is message index +mu = ones(k,2*m)/k; % message factor to node + +nodeBel = zeros(k,n); +edgeBel = zeros(k,k,m); +L = -inf(1,epoch+1); +for iter = 1:epoch + for i = 1:n + in = nonzeros(A(:,i)); % incoming message index + nb = expNodePot(:,i).*prod(mu(:,in),2); % product of incoming message + for l = in' + ep = expEdgePot(:,:,ud(l,m)); + mu(:,rd(l,m)) = normalize(ep*(nb./mu(:,l))); + end + nodeBel(:,i) = nb/sum(nb); + end + + for l = 1:m + st = e(l); + nut = nodeBel(:,t(l))./mu(:,st); + nus = nodeBel(:,s(l))./mu(:,st+m); + eb = expEdgePot(:,:,st).*(nus*nut'); + edgeBel(:,:,st) = eb./sum(eb(:)); + end + L(iter+1) = mrfBethe(A,nodePot,edgePot,nodeBel,edgeBel); +end +L = L(1,2:iter+1); + +function i = rd(i, m) +% reverse direction edge index +i = mod(i+m-1,2*m)+1; + +function i = ud(i, m) +% undirected edge index +i = mod(i-1,m)+1; \ No newline at end of file diff --git a/chapter08/MRF/mrfGibbs.m b/chapter08/MRF/mrfGibbs.m new file mode 100644 index 0000000..60f75cd --- /dev/null +++ b/chapter08/MRF/mrfGibbs.m @@ -0,0 +1,11 @@ +function lnZ = mrfGibbs(A, nodePot, edgePot, nodeBel) +% Compute Gibbs energy +[s,t,e] = find(triu(A)); +edgeBel = zeros(size(edgePot)); +for l = 1:numel(e) + edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; +end +Ex = dot(nodeBel(:),nodePot(:)); +Exy = dot(edgeBel(:),edgePot(:)); +Hx = -dot(nodeBel(:),log(nodeBel(:))); +lnZ = Ex+Exy+Hx; \ No newline at end of file diff --git a/chapter08/MRF/mrfIsGa.m b/chapter08/MRF/mrfIsGa.m new file mode 100644 index 0000000..a303119 --- /dev/null +++ b/chapter08/MRF/mrfIsGa.m @@ -0,0 +1,21 @@ +function [A, nodePot, edgePot] = mrfIsGa(im, sigma, J) +% Contruct a latent Ising MRF with Gaussian observation +% Input: +% im: row x col image +% sigma: variance of Gaussian node potential +% J: parameter of Ising edge +% Output: +% A: n x n adjacent matrix +% nodePot: 2 x n node potential +% edgePot: 2 x 2 x m edge potential +% Written by Mo Chen (sth4nth@gmail.com) +A = lattice(size(im)); +[s,t,e] = find(triu(A)); +m = numel(e); +e(:) = 1:m; +A = sparse([s;t],[t;s],[e;e]); + +z = [1;-1]; +x = reshape(im,1,[]); +nodePot = -(x-z).^2/(2*sigma^2); +edgePot = repmat(J*(z*z'),[1, 1, m]); \ No newline at end of file diff --git a/chapter08/MRF/mrfMf.m b/chapter08/MRF/mrfMf.m new file mode 100644 index 0000000..366164b --- /dev/null +++ b/chapter08/MRF/mrfMf.m @@ -0,0 +1,34 @@ +function [nodeBel, edgeBel, L] = mrfMf(A, nodePot, edgePot, epoch) +% Mean field for MRF +% Assuming egdePot is symmetric +% Input: +% A: n x n adjacent matrix of undirected graph, where value is edge index +% nodePot: k x n node potential +% edgePot: k x k x m edge potential +% Output: +% nodeBel: k x n node belief +% edgeBel: k x k x m edge belief +% Written by Mo Chen (sth4nth@gmail.com) +if nargin < 4 + epoch = 10; +end +L = -inf(1,epoch+1); +[nodeBel,lnZ] = softmax(nodePot,1); % initialization +for iter = 1:epoch + for i = 1:size(nodePot,2) + [~,j,e] = find(A(i,:)); % neighbors + [nodeBel(:,i),lnZ(i)] = softmax(nodePot(:,i)+reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); + end +% E = dot(nodeBel,nodePot,1); +% H = -dot(nodeBel,log(nodeBel),1); +% L(iter+1) = sum(lnZ+E+H)/2; + L(iter+1) = mrfGibbs(A,nodePot,edgePot,nodeBel); +% if abs(L(iter+1)-L(iter))/abs(L(iter)) < tol; break; end +end +L = L(1,2:iter+1); + +[s,t,e] = find(triu(A)); +edgeBel = zeros(size(edgePot)); +for l = 1:numel(e) + edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; +end \ No newline at end of file diff --git a/chapter08/nbBern.m b/chapter08/NaiveBayes/nbBern.m similarity index 100% rename from chapter08/nbBern.m rename to chapter08/NaiveBayes/nbBern.m diff --git a/chapter08/nbBernPred.m b/chapter08/NaiveBayes/nbBernPred.m similarity index 100% rename from chapter08/nbBernPred.m rename to chapter08/NaiveBayes/nbBernPred.m diff --git a/chapter08/nbGauss.m b/chapter08/NaiveBayes/nbGauss.m similarity index 100% rename from chapter08/nbGauss.m rename to chapter08/NaiveBayes/nbGauss.m diff --git a/chapter08/nbGaussPred.m b/chapter08/NaiveBayes/nbGaussPred.m similarity index 100% rename from chapter08/nbGaussPred.m rename to chapter08/NaiveBayes/nbGaussPred.m diff --git a/chapter08/betheEnergy.m b/chapter08/betheEnergy.m deleted file mode 100644 index d663e8b..0000000 --- a/chapter08/betheEnergy.m +++ /dev/null @@ -1,11 +0,0 @@ -function lnZ = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel) -% Compute Bethe free energy -% TBD: deal with log(0) for entropy -edgePot = reshape(edgePot,[],size(edgePot,3)); -edgeBel = reshape(edgeBel,[],size(edgeBel,3)); -Ex = dot(nodeBel,nodePot,1); -Exy = dot(edgeBel,edgePot,1); -Hx = -dot(nodeBel,log(nodeBel),1); -Hxy = -dot(edgeBel,log(edgeBel),1); -d = full(sum(logical(A),1)); -lnZ = -sum(Ex)-sum(Exy)-sum((d-1).*Hx)+sum(Hxy); diff --git a/chapter08/gibbsEnergy.m b/chapter08/gibbsEnergy.m deleted file mode 100644 index b4c0aec..0000000 --- a/chapter08/gibbsEnergy.m +++ /dev/null @@ -1,9 +0,0 @@ -function lnZ = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel) -% Compute Gibbs free energy -% TBD: deal with log(0) for entropy -edgePot = reshape(edgePot,[],size(edgePot,3)); -edgeBel = reshape(edgeBel,[],size(edgeBel,3)); -Ex = dot(nodeBel,nodePot,1); -Exy = dot(edgeBel,edgePot,1); -Hx = dot(nodeBel,log(nodeBel),1); -lnZ = -(sum(Ex)+sum(Exy)+sum(Hx)); \ No newline at end of file diff --git a/chapter08/im2mrf.m b/chapter08/im2mrf.m deleted file mode 100644 index 3d9e173..0000000 --- a/chapter08/im2mrf.m +++ /dev/null @@ -1,20 +0,0 @@ -function [A, nodePot, edgePot] = im2mrf(im, J, sigma) -% Convert a image to Ising MRF with distribution p(x)=exp(-sum(nodePot)-sum(edgePot)-lnZ) -% Input: -% im: row x col image -% sigma: variance of Gaussian node potential -% J: parameter of Ising edge -% Output: -% nodePot: 2 x n node potential -% edgePot: 2 x 2 x m edge potential - -A = lattice(size(im)); -[s,t,e] = find(tril(A)); -nEdge = numel(e); -e(:) = 1:nEdge; -A = sparse([s;t],[t;s],[e;e]); - -z = [1;-1]; -y = reshape(im,1,[]); -nodePot = (y-z).^2/(2*sigma^2); -edgePot = repmat(-J*(z*z'),[1, 1, nEdge]); \ No newline at end of file diff --git a/chapter08/isingMeanField.m b/chapter08/isingMeanField.m deleted file mode 100644 index ad7d286..0000000 --- a/chapter08/isingMeanField.m +++ /dev/null @@ -1,30 +0,0 @@ -function mu = isingMeanField(J, h, epoch) -% Mean field for 2d Ising model -% Input: -% J: scalar edge potential -% h: M X N image size node potential -% edgePot: k x k x m edge potential -% Output: -% mu: M x N image size expectation -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 3 - epoch = 50; - tol = 1e-8; -end -[M,N] = size(h); -mu = tanh(h); -stride = [-1,1,-M,M]; -for t = 1:epoch - mu0 = mu; - for j = 1:N - for i = 1:M - pos = i + M*(j-1); - ne = pos + stride; - ne([i,i,j,j] == [1,M,1,N]) = []; - mu(i,j) = tanh(J*sum(mu(ne)) + h(i,j)); - end - end - if max(abs(mu(:)-mu0(:))) < tol; break; end -end - diff --git a/chapter08/mrfBelProp.m b/chapter08/mrfBelProp.m deleted file mode 100644 index a556d9c..0000000 --- a/chapter08/mrfBelProp.m +++ /dev/null @@ -1,62 +0,0 @@ -function [nodeBel, edgeBel] = mrfBelProp(A, nodePot, edgePot, epoch) -% Belief propagation for MRF (Assuming that egdePot is symmetric) -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief -% edgeBel: k x k x m edge belief -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 50; - tol = 1e-8; -end - -nodePot = exp(-nodePot); -edgePot = exp(-edgePot); - -[k,n] = size(nodePot); -m = size(edgePot,3); - -[s,t,e] = find(tril(A)); -A = sparse([s;t],[t;s],[e;e+m]); % digraph adjacent matrix, where value is message index -mu = ones(k,2*m)/k; % message -for iter = 1:epoch - mu0 = mu; - for i = 1:n - in = nonzeros(A(:,i)); % incoming message index - nb = nodePot(:,i).*prod(mu(:,in),2); % product of incoming message - for l = in' - ep = edgePot(:,:,ud(l,m)); - mu(:,rd(l,m)) = normalize(ep*(nb./mu(:,l))); - end - end - if max(abs(mu(:)-mu0(:))) < tol; break; end -end - -nodeBel = zeros(k,n); -for i = 1:n - nodeBel(:,i) = nodePot(:,i).*prod(mu(:,nonzeros(A(:,i))),2); -end -nodeBel = normalize(nodeBel,1); - -edgeBel = zeros(k,k,m); -for l = 1:m - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - nbt = nodeBel(:,t(l))./mu(:,eij); - nbs = nodeBel(:,s(l))./mu(:,eji); - eb = (nbt*nbs').*ep; - edgeBel(:,:,eij) = eb./sum(eb(:)); -end - -function i = rd(i, m) -% reverse direction edge index -i = mod(i+m-1,2*m)+1; - -function i = ud(i, m) -% undirected edge index -i = mod(i-1,m)+1; \ No newline at end of file diff --git a/chapter08/mrfExpProp.m b/chapter08/mrfExpProp.m deleted file mode 100644 index 26969f2..0000000 --- a/chapter08/mrfExpProp.m +++ /dev/null @@ -1,55 +0,0 @@ -function [nodeBel, edgeBel] = mrfExpProp(A, nodePot, edgePot, epoch) -% Expectation propagation for MRF (Assuming that egdePot is symmetric) -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief -% edgeBel: k x k x m edge belief -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 50; - tol = 1e-8; -end - -nodePot = exp(-nodePot); -edgePot = exp(-edgePot); - -k = size(nodePot,1); -m = size(edgePot,3); - -[s,t,e] = find(tril(A)); -mu = ones(k,2*m)/k; % message -nodeBel = normalize(nodePot,1); -for iter = 1:epoch - mu0 = mu; - for l = 1:m - i = s(l); - j = t(l); - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - - nodeBel(:,j) = nodeBel(:,j)./mu(:,eij); - mu(:,eij) = normalize(ep*(nodeBel(:,i)./mu(:,eji))); - nodeBel(:,j) = normalize(nodeBel(:,j).*mu(:,eij)); - - nodeBel(:,i) = nodeBel(:,i)./mu(:,eji); - mu(:,eji) = normalize(ep*(nodeBel(:,j)./mu(:,eij))); - nodeBel(:,i) = normalize(nodeBel(:,i).*mu(:,eji)); - end - if max(abs(mu(:)-mu0(:))) < tol; break; end -end - -edgeBel = zeros(k,k,m); -for l = 1:m - eij = e(l); - eji = eij+m; - ep = edgePot(:,:,eij); - nbt = nodeBel(:,t(l))./mu(:,eij); - nbs = nodeBel(:,s(l))./mu(:,eji); - eb = (nbt*nbs').*ep; - edgeBel(:,:,eij) = eb./sum(eb(:)); -end diff --git a/chapter08/mrfMeanField.m b/chapter08/mrfMeanField.m deleted file mode 100644 index 2f767cd..0000000 --- a/chapter08/mrfMeanField.m +++ /dev/null @@ -1,31 +0,0 @@ -function [nodeBel, edgeBel] = mrfMeanField(A, nodePot, edgePot, epoch) -% Mean field for MRF (Assuming that egdePot is symmetric) -% p(x)=exp(-E(x))/Z, E(x)=\sum(edgePot)+sum(nodePot) -% Input: -% A: n x n adjacent matrix of undirected graph, where value is edge index -% nodePot: k x n node potential -% edgePot: k x k x m edge potential -% Output: -% nodeBel: k x n node belief q(x_i) -% edgeBel: k x k x m edge belief q(x_i,x_j) -% Written by Mo Chen (sth4nth@gmail.com) -tol = 0; -if nargin < 4 - epoch = 50; - tol = 1e-8; -end -[nodeBel,L] = softmax(-nodePot,1); % init nodeBel -for iter = 1:epoch - nodeBel0 = nodeBel; - for i = 1:numel(L) - [~,j,e] = find(A(i,:)); % neighbors - nodeBel(:,i) = softmax(-nodePot(:,i)-reshape(edgePot(:,:,e),2,[])*reshape(nodeBel(:,j),[],1)); - end - if max(abs(nodeBel(:)-nodeBel0(:))) < tol; break; end -end - -[s,t,e] = find(tril(A)); -edgeBel = zeros(size(edgePot)); -for l = 1:numel(e) - edgeBel(:,:,e(l)) = nodeBel(:,s(l))*nodeBel(:,t(l))'; -end \ No newline at end of file diff --git a/demo/ch08/mrf_demo.m b/demo/ch08/mrf_demo.m index 5cede61..b15942f 100644 --- a/demo/ch08/mrf_demo.m +++ b/demo/ch08/mrf_demo.m @@ -1,65 +1,57 @@ +% Done! clear; close all; -%% Original image +% load letterA.mat; +% X = A; load letterX.mat +%% Original image img = double(X); img = sign(img-mean(img(:))); figure; -subplot(2,3,1); +subplot(2,2,1); imagesc(img); title('Original image'); axis image; colormap gray; %% Noisy image -y = img + sigma*randn(size(img)); % noisy signal - -subplot(2,3,2); -imagesc(y); +sigma = 1; % noise level +x = img + sigma*randn(size(img)); % noisy signal +subplot(2,2,2); +imagesc(x); title('Noisy image'); axis image; colormap gray; -%% Parameters -epoch = 50; -J = 1; % Ising parameter -sigma = 1; % noise level +%% Construct MRF data +epoch = 20; +J = 1; % ising parameter +[A,nodePot,edgePot] = mrfIsGa(x,sigma,J); %% Mean Field -[A, nodePot, edgePot] = im2mrf(y, J, sigma); -[nodeBel, edgeBel] = mrfMeanField(A, nodePot, edgePot, epoch); -lnZ = gibbsEnergy(nodePot, edgePot, nodeBel, edgeBel); -lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); -maxdiff(lnZ, lnZ0) +[nodeBel0,edgeBel0,lnZ0] = mrfMf(A,nodePot,edgePot,epoch); -subplot(2,3,4); -imagesc(reshape(nodeBel(1,:),size(img))); -title('Mean Field'); -axis image; -colormap gray; -%% Ising Mean Field -h = reshape(0.5*diff(nodePot),size(img)); -mu = isingMeanField(J, h, epoch); -maxdiff(reshape(mu,1,[]), [1,-1]*nodeBel) +L0 = mrfGibbs(A,nodePot,edgePot,nodeBel0); +L1 = mrfBethe(A,nodePot,edgePot,nodeBel0,edgeBel0); +maxdiff(L0,lnZ0(end)) +maxdiff(L0,L1) -subplot(2,3,3); -imagesc(mu) -title('Ising Mean Field'); +subplot(2,2,3); +imagesc(reshape(nodeBel0(1,:),size(img))); +title('Mean Field'); axis image; colormap gray; %% Belief Propagation -[nodeBel,edgeBel] = mrfBelProp(A, nodePot, edgePot, epoch); -lnZ = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); - -subplot(2,3,5); -imagesc(reshape(nodeBel(1,:),size(img))); -title('Belief propagation'); -axis image; -colormap gray; -%% Expectation Propagation -[nodeBel,edgeBel] = mrfExpProp(A, nodePot, edgePot, epoch); -lnZ0 = betheEnergy(A, nodePot, edgePot, nodeBel, edgeBel); -maxdiff(lnZ, lnZ0) +[nodeBel1,edgeBel1,lnZ1] = mrfBp(A,nodePot,edgePot,epoch); -subplot(2,3,6); -imagesc(reshape(nodeBel(1,:),size(img))); -title('Expectation Propagation'); +subplot(2,2,4); +imagesc(reshape(nodeBel1(1,:),size(img))); +title('Belief Propagation'); axis image; colormap gray; +%% Energy comparation +figure +epochs = 1:epoch; +plot( epochs,lnZ0,'-', ... + epochs,lnZ1,'-'); +xlabel('epoch'); % add axis labels and plot title +ylabel('energy'); +title('Energy Comparation'); +legend('MF','BP'); \ No newline at end of file From d858d5d692091a31f25f786200513bec4a506fe0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 15 Nov 2018 19:58:52 +0800 Subject: [PATCH 078/119] fix some demos --- demo/ch04/logitBin_demo.m | 6 +++--- demo/ch07/rvmBinEm_demo.m | 2 +- demo/ch07/rvmBinFp_demo.m | 2 +- demo/ch09/rvmBinEm_demo.m | 13 ------------- 4 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 demo/ch09/rvmBinEm_demo.m diff --git a/demo/ch04/logitBin_demo.m b/demo/ch04/logitBin_demo.m index dd2c020..502aedf 100644 --- a/demo/ch04/logitBin_demo.m +++ b/demo/ch04/logitBin_demo.m @@ -6,9 +6,9 @@ d = 2; k = 2; n = 1000; -[X,y] = kmeansRnd(d,k,n); -[model, llh] = logitBin(X,y-1); +[X,t] = kmeansRnd(d,k,n); +[model, llh] = logitBin(X,t-1); plot(llh); -t = logitBinPred(model,X)+1; +y = logitBinPred(model,X)+1; figure binPlot(model,X,y) \ No newline at end of file diff --git a/demo/ch07/rvmBinEm_demo.m b/demo/ch07/rvmBinEm_demo.m index 039e856..54ed1bb 100644 --- a/demo/ch07/rvmBinEm_demo.m +++ b/demo/ch07/rvmBinEm_demo.m @@ -9,4 +9,4 @@ plot(llh); y = rvmBinPred(model,X)+1; figure; -binPlot(model,X,y); +plotClass(X,y); diff --git a/demo/ch07/rvmBinFp_demo.m b/demo/ch07/rvmBinFp_demo.m index 2dcb2ae..ff1f823 100644 --- a/demo/ch07/rvmBinFp_demo.m +++ b/demo/ch07/rvmBinFp_demo.m @@ -9,4 +9,4 @@ plot(llh); y = rvmBinPred(model,X)+1; figure; -binPlot(model,X,y); +plotClass(X,y); diff --git a/demo/ch09/rvmBinEm_demo.m b/demo/ch09/rvmBinEm_demo.m deleted file mode 100644 index f15ae6e..0000000 --- a/demo/ch09/rvmBinEm_demo.m +++ /dev/null @@ -1,13 +0,0 @@ -%% RVM classification via EM -clear; close all -k = 2; -d = 2; -n = 1000; -[X,t] = kmeansRnd(d,k,n); -[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); - -[model, llh] = rvmBinEm(X,t-1); -plot(llh); -y = rvmBinPred(model,X)+1; -figure; -binPlot(model,X,y); \ No newline at end of file From 75933f7a1dfab3a31f50e46ec900c78fbcab3cc8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 15 Nov 2018 22:33:56 +0800 Subject: [PATCH 079/119] minor tweak --- chapter12/ppcaVb.m | 10 +++++----- demo/ch12/ppcaVb_demo.m | 6 ++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/chapter12/ppcaVb.m b/chapter12/ppcaVb.m index 2c6b249..a07b4e8 100644 --- a/chapter12/ppcaVb.m +++ b/chapter12/ppcaVb.m @@ -1,4 +1,4 @@ -function [model, energy] = ppcaVb(X, q, prior) +function [model, L] = ppcaVb(X, q, prior) % Perform variatioanl Bayeisan inference for probabilistic PCA model. % Input: % X: d x n data matrix @@ -27,7 +27,7 @@ end tol = 1e-6; maxIter = 500; -energy = -inf(1,maxIter); +L = -inf(1,maxIter); mu = mean(X,2); Xo = bsxfun(@minus, X, mu); @@ -67,10 +67,10 @@ % Emu = Ebeta/(lambda+n*Ebeta)*sum(X-WZ,2); % lower bound - energy(iter) = KLalpha+KLbeta+KLW+KLZ; - if energy(iter)-energy(iter-1) < tol*abs(energy(iter-1)); break; end + L(iter) = KLalpha+KLbeta+KLW+KLZ; + if L(iter)-L(iter-1) < tol*abs(L(iter-1)); break; end end -energy = energy(2:iter); +L = L(2:iter); model.Z = EZ; model.W = EW; diff --git a/demo/ch12/ppcaVb_demo.m b/demo/ch12/ppcaVb_demo.m index 074fce3..ac43da3 100644 --- a/demo/ch12/ppcaVb_demo.m +++ b/demo/ch12/ppcaVb_demo.m @@ -1,5 +1,4 @@ % demos for ch12 - clear; close all; d = 3; m = 2; @@ -7,7 +6,6 @@ X = ppcaRnd(m,d,n); plotClass(X); - %% Variational Bayesian probabilistic PCA -[model, energy] = ppcaVb(X); -plot(energy); +[model, L] = ppcaVb(X); +plot(L); From fcf7e0d285260eddf0dcec1a62e39d387c15234f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 15 Nov 2018 22:35:49 +0800 Subject: [PATCH 080/119] fix typo --- chapter12/ppcaVb.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter12/ppcaVb.m b/chapter12/ppcaVb.m index a07b4e8..e60d523 100644 --- a/chapter12/ppcaVb.m +++ b/chapter12/ppcaVb.m @@ -5,7 +5,7 @@ % q: dimension of target space % Output: % model: trained model structure -% ernergy: variantional lower bound +% L: variantional lower bound % Reference: % Pattern Recognition and Machine Learning by Christopher M. Bishop % Written by Mo Chen (sth4nth@gmail.com). From 2fb33395dd6061b87955a855ef799ad88d359d0d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 20 Nov 2018 19:11:42 +0800 Subject: [PATCH 081/119] rewrite mlp and backpropagation for regression --- chapter05/mlp.m | 39 ------------------ chapter05/mlpReg.m | 59 +++++++++++++++++++++++++++ chapter05/{mlpPred.m => mlpRegPred.m} | 14 ++++--- demo/ch05/mlp_demo.m | 22 ++++++---- 4 files changed, 82 insertions(+), 52 deletions(-) delete mode 100644 chapter05/mlp.m create mode 100644 chapter05/mlpReg.m rename chapter05/{mlpPred.m => mlpRegPred.m} (53%) diff --git a/chapter05/mlp.m b/chapter05/mlp.m deleted file mode 100644 index df987b1..0000000 --- a/chapter05/mlp.m +++ /dev/null @@ -1,39 +0,0 @@ -function [model, mse] = mlp(X, T, h) -% Train a multilayer perceptron neural network -% Input: -% X: d x n data matrix -% T: p x n response matrix -% h: L x 1 vector specify number of hidden nodes in each layer l -% Ouput: -% model: model structure -% mse: mean square error -% Written by Mo Chen (sth4nth@gmail.com). -eta = 1/size(X,2); -h = [size(X,1);h(:);size(T,1)]; -L = numel(h); -W = cell(L-1,1); -for l = 1:L-1 - W{l} = randn(h(l),h(l+1)); -end -Z = cell(L,1); -Z{1} = X; -maxiter = 200; -mse = zeros(1,maxiter); -for iter = 1:maxiter -% forward - for l = 2:L - Z{l} = sigmoid(W{l-1}'*Z{l-1}); % 5.10, 5.49 - end -% backward - E = T-Z{L}; - mse(iter) = mean(dot(E,E),1); - for l = L-1:-1:1 - df = Z{l+1}.*(1-Z{l+1}); - dG = df.*E; - dW = Z{l}*dG'; - W{l} = W{l}+eta*dW; - E = W{l}*dG; - end -end -mse = mse(1:iter); -model.W = W; \ No newline at end of file diff --git a/chapter05/mlpReg.m b/chapter05/mlpReg.m new file mode 100644 index 0000000..caf42d1 --- /dev/null +++ b/chapter05/mlpReg.m @@ -0,0 +1,59 @@ +function [model, L] = mlpReg(X,Y,k,lambda) +% Train a multilayer perceptron neural network +% Input: +% X: d x n data matrix +% Y: p x n response matrix +% k: T x 1 vector to specify number of hidden nodes in each layer +% lambda: regularization parameter +% Ouput: +% model: model structure +% L: loss +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 4 + lambda = 1e-2; +end +eta = 1e-3; +maxiter = 50000; +L = inf(1,maxiter); + +k = [size(X,1);k(:);size(Y,1)]; +T = numel(k)-1; +W = cell(T,1); +b = cell(T,1); +for t = 1:T + W{t} = randn(k(t),k(t+1)); + b{t} = randn(k(t+1),1); +end +R = cell(T,1); +Z = cell(T+1,1); +Z{1} = X; +for iter = 2:maxiter +% forward + for t = 1:T-1 + Z{t+1} = tanh(W{t}'*Z{t}+b{t}); + end + Z{T+1} = W{T}'*Z{T}+b{T}; + +% loss + E = Z{T+1}-Y; + Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 + L(iter) = dot(E(:),E(:))+lambda*sum(Wn); + +% backward + R{T} = E; % delta + for t = T-1:-1:1 + df = 1-Z{t+1}.^2; % h'(a) + R{t} = df.*(W{t+1}*R{t+1}); % delta + end + +% gradient descent + for t=1:T + dW = Z{t}*R{t}'+lambda*W{t}; + db = sum(R{t},2); + W{t} = W{t}-eta*dW; + b{t} = b{t}-eta*db; + end +end +L = L(1,2:iter); +model.W = W; +model.b = b; diff --git a/chapter05/mlpPred.m b/chapter05/mlpRegPred.m similarity index 53% rename from chapter05/mlpPred.m rename to chapter05/mlpRegPred.m index 0ce5fb1..ce71bc5 100644 --- a/chapter05/mlpPred.m +++ b/chapter05/mlpRegPred.m @@ -1,4 +1,4 @@ -function Y = mlpPred(model, X) +function Y = mlpRegPred(model, X) % Multilayer perceptron prediction % Input: % model: model structure @@ -7,7 +7,11 @@ % Y: p x n response matrix % Written by Mo Chen (sth4nth@gmail.com). W = model.W; -Y = X; -for l = 1:length(W) - Y = sigmoid(W{l}'*Y); -end \ No newline at end of file +b = model.b; +T = length(W); +Z = cell(T+1,1); +Z{1} = X; +for t = 1:T-1 + Z{t+1} = tanh(W{t}'*Z{t}+b{t}); +end +Y = W{T}'*Z{T}+b{T}; \ No newline at end of file diff --git a/demo/ch05/mlp_demo.m b/demo/ch05/mlp_demo.m index 9e55c26..75c170a 100644 --- a/demo/ch05/mlp_demo.m +++ b/demo/ch05/mlp_demo.m @@ -1,9 +1,15 @@ clear; close all; -h = [4,5]; -X = [0 0 1 1;0 1 0 1]; -T = [0 1 1 0]; -[model,mse] = mlp(X,T,h); -plot(mse); -disp(['T = [' num2str(T) ']']); -Y = mlpPred(model,X); -disp(['Y = [' num2str(Y) ']']); \ No newline at end of file +n = 200; +x = linspace(0,2*pi,n); +y = sin(x); + +k = [3,4]; % two hidden layers with 3 and 4 hidden nodes +lambda = 1e-2; +[model, L] = mlpReg(x,y,k); +t = mlpRegPred(model,x); +plot(L); +figure; +hold on +plot(x,y,'.'); +plot(x,t); +hold off \ No newline at end of file From b63676b131d4b672f08a6b29e3cb59ebc9a7e28a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 20 Nov 2018 19:13:27 +0800 Subject: [PATCH 082/119] tweak mlpRegPred.m --- chapter05/mlpRegPred.m | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/chapter05/mlpRegPred.m b/chapter05/mlpRegPred.m index ce71bc5..e3bba3f 100644 --- a/chapter05/mlpRegPred.m +++ b/chapter05/mlpRegPred.m @@ -9,9 +9,8 @@ W = model.W; b = model.b; T = length(W); -Z = cell(T+1,1); -Z{1} = X; +Y = X; for t = 1:T-1 - Z{t+1} = tanh(W{t}'*Z{t}+b{t}); + Y = tanh(W{t}'*Y+b{t}); end -Y = W{T}'*Z{T}+b{T}; \ No newline at end of file +Y = W{T}'*Y+b{T}; \ No newline at end of file From 48cf34b2f014453bf5643a6192850ba64438cb87 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 01:30:19 +0800 Subject: [PATCH 083/119] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e37e360 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Mo Chen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From bd8c51c0ae01c10c8894087cb743c9002de4e7cc Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 01:31:12 +0800 Subject: [PATCH 084/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 73840b2..8f71920 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ If you found any bug or have any suggestion, please do file issues. I am gracefu License ------- -Currently Released Under GPLv3 +Released Under MIT License Contact From e21c48947c2ac369716c34744abeca3d97ccd9e6 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 01:51:50 +0800 Subject: [PATCH 085/119] Update README.md --- README.md | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 8f71920..9a8ea43 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,33 @@ Introduction ------- -This package is a Matlab implementation of the algorithms described in the classical machine learning textbook: +This package is a Matlab implementation of the algorithms described in the machine learning textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -Note: this package requires Matlab **R2016b** or latter, since it utilizes a new syntax of Matlab called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting in Python). +Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). -Description +Design Goal ------- -While developing this package, I stick to following principles - -* Succinct: The code is extremely terse. Minimizing the number of lines is one of the primal goals. As a result, the core of the algorithms can be easily spot. -* Efficient: Many tricks for making Matlab scripts fast were applied (eg. vectorization and matrix factorization). Many functions are even comparable with C implementations. Usually, functions in this package are orders faster than Matlab builtin ones which provide the same functionality (eg. kmeans). If anyone have found any Matlab implementation that is faster than mine, I am happy to further optimize. -* Robust: Many tricks for numerical stability are applied, such as probability computation in log scale and square root matrix update to enforce matrix symmetry, etc. -* Readable: The code is heavily commented. Reference formulas in PRML book are indicated for corresponding code lines. Symbols are in sync with the book. -* Practical: The package is designed not only to be easily read, but also to be easily used to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). +* Succinct: The code is extremely compact. Minimizing code length is a major goal. As a result, the core of the algorithms can be easily spotted. +* Efficient: Many tricks to speedup Matlab code were applied (eg. vectorization, matrix factorization, etc.). Usually, functions in this package are orders faster than Matlab builtin ones (eg. kmeans). +* Robust: Many tricks for numerical stability are applied, such as computing probability in log domain, square root matrix update to enforce matrix symmetry\PD, etc. +* Readable: The code is heavily commented. Corresponding formulas in PRML are annoted. Symbols are in sync with the book. +* Practical: The package is not only readable, but also meant to be easily used and modified to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). Installation ------- -1. Download the package to your local path (e.g. PRMLT/) by running: `git clone https://github.com/PRML/PRMLT.git`. +1. Download the package to a local folder (e.g. ~/PRMLT/) by running: `git clone https://github.com/PRML/PRMLT.git`. -2. Run Matlab and navigate to PRMLT/, then run the init.m script. +2. Run Matlab and navigate to the folder (~/PRMLT/), then run the init.m script. -3. Try demos in PRMLT/demo directory to verify installation correctness. Enjoy! +3. Run some demos in ~/PRMLT/demo folder. Enjoy! FeedBack ------- -If you found any bug or have any suggestion, please do file issues. I am graceful for any feedback and will do my best to improve this package. +If you find any bug or have any suggestion, please do file issues. I am graceful for any feedback and will do my best to improve this package. License ------- -Released Under MIT License - +Released under MIT license Contact ------- From 83dbea0ad3e39b074d0cf54977fe4496d20e0c1e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 01:59:54 +0800 Subject: [PATCH 086/119] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9a8ea43..703c0f4 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ Introduction ------- -This package is a Matlab implementation of the algorithms described in the machine learning textbook: +This Matlab package implementes machine learning algorithms described in the great textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). +It is written purely in Matlab language. It is self-contained. There is no outside denpency. + +Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires statistical toolbox (for some simple random number generator) and image processing box (for reading image data). Design Goal ------- From 0c2a768b32262f62a839afb61ee9d7459ddc3ff0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 02:00:55 +0800 Subject: [PATCH 087/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 703c0f4..a4645c5 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.mi It is written purely in Matlab language. It is self-contained. There is no outside denpency. -Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires statistical toolbox (for some simple random number generator) and image processing box (for reading image data). +Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires statistical toolbox (for some simple random number generator) and image processing toolbox (for reading image data). Design Goal ------- From 364201b5f2cdbfe81fd9df2272f7bd457cbdac5f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 02:02:47 +0800 Subject: [PATCH 088/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a4645c5..c618080 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.mi It is written purely in Matlab language. It is self-contained. There is no outside denpency. -Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires statistical toolbox (for some simple random number generator) and image processing toolbox (for reading image data). +Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires Statistics Toolbox (for some simple random number generator) and Image Processing Toolbox (for reading image data). Design Goal ------- From bb05b4c5e523131dd01fad85baf2692cf1c9b95e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 02:03:57 +0800 Subject: [PATCH 089/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c618080..7223bd0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ Introduction ------- -This Matlab package implementes machine learning algorithms described in the great textbook: +This Matlab package implements machine learning algorithms described in the great textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). It is written purely in Matlab language. It is self-contained. There is no outside denpency. From 86686b86be7cd8c499780d087fc5aaaaa950150d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 24 Nov 2018 02:08:59 +0800 Subject: [PATCH 090/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7223bd0..1817d05 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Introduction This Matlab package implements machine learning algorithms described in the great textbook: Pattern Recognition and Machine Learning by C. Bishop ([PRML](http://research.microsoft.com/en-us/um/people/cmbishop/prml/)). -It is written purely in Matlab language. It is self-contained. There is no outside denpency. +It is written purely in Matlab language. It is self-contained. There is no external dependency. Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Matlab syntax called [Implicit expansion](https://cn.mathworks.com/help/matlab/release-notes.html?rntext=implicit+expansion&startrelease=R2016b&endrelease=R2016b&groupby=release&sortby=descending) (a.k.a. broadcasting). It also requires Statistics Toolbox (for some simple random number generator) and Image Processing Toolbox (for reading image data). From 25a456ed4c20db65863532cf63f55f82c18e934a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Nov 2018 15:54:22 +0800 Subject: [PATCH 091/119] Set theme jekyll-theme-midnight --- _config.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 _config.yml diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..1885487 --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-midnight \ No newline at end of file From 700df90820b1239f4e2739a778dbc59bcb697cfc Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Nov 2018 15:55:58 +0800 Subject: [PATCH 092/119] Set theme jekyll-theme-modernist --- _config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_config.yml b/_config.yml index 1885487..cc35c1d 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-midnight \ No newline at end of file +theme: jekyll-theme-modernist \ No newline at end of file From 96db6fb5049a234df0bca71192a1e719a588fd02 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Nov 2018 15:57:54 +0800 Subject: [PATCH 093/119] Set theme jekyll-theme-midnight --- _config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_config.yml b/_config.yml index cc35c1d..1885487 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-modernist \ No newline at end of file +theme: jekyll-theme-midnight \ No newline at end of file From 0763001c9491b6efd2ffb1f9990f6f5d61b7f84b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Nov 2018 17:00:57 +0800 Subject: [PATCH 094/119] Delete _config.yml --- _config.yml | 1 - 1 file changed, 1 deletion(-) delete mode 100644 _config.yml diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 1885487..0000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-midnight \ No newline at end of file From 2025472ae9eac4d7a386c9b9e385b5acd5da3ed3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 27 Nov 2018 20:02:09 +0800 Subject: [PATCH 095/119] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1817d05..ddd02c6 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,10 @@ Design Goal Installation ------- -1. Download the package to a local folder (e.g. ~/PRMLT/) by running: `git clone https://github.com/PRML/PRMLT.git`. - +1. Download the package to a local folder (e.g. ~/PRMLT/) by running: +```console +git clone https://github.com/PRML/PRMLT.git +``` 2. Run Matlab and navigate to the folder (~/PRMLT/), then run the init.m script. 3. Run some demos in ~/PRMLT/demo folder. Enjoy! From 0523c2c8ab7ebf3a22d7616b91643e147c235071 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 02:55:51 +0800 Subject: [PATCH 096/119] refactor kalmanFilter and fix kalmanSmoother --- chapter13/LDS/kalmanFilter.m | 4 ++-- chapter13/LDS/kalmanSmoother.m | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/chapter13/LDS/kalmanFilter.m b/chapter13/LDS/kalmanFilter.m index 0005ee6..19a4137 100644 --- a/chapter13/LDS/kalmanFilter.m +++ b/chapter13/LDS/kalmanFilter.m @@ -32,11 +32,11 @@ llh(1) = logGauss(X(:,1),C*mu0,R); for i = 2:n [mu(:,i), V(:,:,i), llh(i)] = ... - forwardStep(X(:,i), mu(:,i-1), V(:,:,i-1), A, G, C, S, I); + forwardUpdate(X(:,i), mu(:,i-1), V(:,:,i-1), A, G, C, S, I); end llh = sum(llh); -function [mu, V, llh] = forwardStep(x, mu, V, A, G, C, S, I) +function [mu, V, llh] = forwardUpdate(x, mu, V, A, G, C, S, I) P = A*V*A'+G; % 13.88 PC = P*C'; R = C*PC+S; diff --git a/chapter13/LDS/kalmanSmoother.m b/chapter13/LDS/kalmanSmoother.m index 8254230..f6ed2e9 100644 --- a/chapter13/LDS/kalmanSmoother.m +++ b/chapter13/LDS/kalmanSmoother.m @@ -1,4 +1,4 @@ -function [nu, U, Ezz, Ezy, llh] = kalmanSmoother(model, X) +function [nu, U, llh, Ezz, Ezy] = kalmanSmoother(model, X) % Kalman smoother (forward-backward algorithm for linear dynamic system) % NOTE: This is the exact implementation of the Kalman smoother algorithm in PRML. % However, this algorithm is not practical. It is numerical unstable. @@ -26,20 +26,19 @@ P = zeros(q,q,n); % C_{t+1|t} Amu = zeros(q,n); % u_{t+1|t} llh = zeros(1,n); -I = eye(q); % forward PC = P0*C'; R = C*PC+S; K = PC/R; mu(:,1) = mu0+K*(X(:,1)-C*mu0); -V(:,:,1) = (I-K*C)*P0; +V(:,:,1) = (eye(q)-K*C)*P0; P(:,:,1) = P0; % useless, just make a point Amu(:,1) = mu0; % useless, just make a point llh(1) = logGauss(X(:,1),C*mu0,R); for i = 2:n [mu(:,i), V(:,:,i), Amu(:,i), P(:,:,i), llh(i)] = ... - forwardStep(X(:,i), mu(:,i-1), V(:,:,i-1), A, G, C, S, I); + forwardUpdate(X(:,i), mu(:,i-1), V(:,:,i-1), A, G, C, S); end llh = sum(llh); % backward @@ -53,24 +52,25 @@ Ezz(:,:,n) = U(:,:,n)+nu(:,n)*nu(:,n)'; for i = n-1:-1:1 [nu(:,i), U(:,:,i), Ezz(:,:,i), Ezy(:,:,i)] = ... - backwardStep(nu(:,i+1), U(:,:,i+1), mu(:,i), V(:,:,i), Amu(:,i+1), P(:,:,i+1), A); + backwardUpdate(nu(:,i+1), U(:,:,i+1), mu(:,i), V(:,:,i), Amu(:,i+1), P(:,:,i+1), A); end -function [mu, V, Amu, P, llh] = forwardStep(x, mu0, V0, A, G, C, S, I) +function [mu1, V1, Amu, P, llh] = forwardUpdate(x, mu0, V0, A, G, C, S) +k = numel(mu0); P = A*V0*A'+G; % 13.88 PC = P*C'; R = C*PC+S; K = PC/R; % 13.92 Amu = A*mu0; CAmu = C*Amu; -mu = Amu+K*(x-CAmu); % 13.89 -V = (I-K*C)*P; % 13.90 +mu1 = Amu+K*(x-CAmu); % 13.89 +V1 = (eye(k)-K*C)*P; % 13.90 llh = logGauss(x,CAmu,R); % 13.91 -function [nu, U, Ezz, Ezy] = backwardStep(nu0, U0, mu, V, Amu, P, A) -J = V*A'/P; % 13.102 -nu = mu+J*(nu0-Amu); % 13.100 -U = V+J*(U0-P)*J'; % 13.101 -Ezy = J*U0+nu0*nu'; % 13.106 -Ezz = U+nu*nu'; % 13.107 \ No newline at end of file +function [nu0, U0, E00, E10] = backwardUpdate(nu1, U1, mu, V, Amu, P, A) +J = V*A'/P; % 13.102 +nu0 = mu+J*(nu1-Amu); % 13.100 +U0 = V+J*(U1-P)*J'; % 13.101 +E00 = U0+nu0*nu0'; % 13.107 +E10 = U1*J'+nu1*nu0'; % 13.106 From 469aa06bce9c89c50c55dceb6b901810cc1aa181 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 03:01:30 +0800 Subject: [PATCH 097/119] fix ldsEm --- chapter13/LDS/kalmanSmoother.m | 8 ++-- chapter13/LDS/ldsEm.m | 76 +++++++++++++++++----------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/chapter13/LDS/kalmanSmoother.m b/chapter13/LDS/kalmanSmoother.m index f6ed2e9..5a8ee7b 100644 --- a/chapter13/LDS/kalmanSmoother.m +++ b/chapter13/LDS/kalmanSmoother.m @@ -57,15 +57,15 @@ function [mu1, V1, Amu, P, llh] = forwardUpdate(x, mu0, V0, A, G, C, S) k = numel(mu0); -P = A*V0*A'+G; % 13.88 +P = A*V0*A'+G; % 13.88 PC = P*C'; R = C*PC+S; -K = PC/R; % 13.92 +K = PC/R; % 13.92 Amu = A*mu0; CAmu = C*Amu; mu1 = Amu+K*(x-CAmu); % 13.89 -V1 = (eye(k)-K*C)*P; % 13.90 -llh = logGauss(x,CAmu,R); % 13.91 +V1 = (eye(k)-K*C)*P; % 13.90 +llh = logGauss(x,CAmu,R); % 13.91 function [nu0, U0, E00, E10] = backwardUpdate(nu1, U1, mu, V, Amu, P, A) diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index 7f283e4..0187a54 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -1,60 +1,60 @@ -function [model, llh] = ldsEm(X, init) +function [model, llh] = ldsEm(X, m) % EM algorithm for parameter estimation of linear dynamic system. -% NOTE: This is the exact implementation of the EM algorithm in PRML. -% However, this algorithm is not practical. It is numerical unstable and -% there is too much redundant degree of freedom. +% NOTE: This is an exact implementation of the algorithm in PRML. +% However, this algorithm is numerical unstable and there is much redundant degree of freedom. % Input: % X: d x n data matrix -% model: prior model structure +% m: initilaization parameter, either a integer for dimension of z or +% initi model structure. % Output: % model: trained model structure % llh: loglikelihood +% reference: Bayesian Reasoning and Machine Learning (BRML) % Written by Mo Chen (sth4nth@gmail.com). -d = size(X,1); -if isstruct(init) % init with a model - model = init; -elseif numel(init) == 1 % random init with latent k - k = init; - model.A = randn(k,k); - model.G = iwishrnd(eye(k),k); - model.C = randn(d,k); - model.S = iwishrnd(eye(d),d); - model.mu0 = randn(k,1); - model.P0 = iwishrnd(eye(k),k); +if isstruct(m) % init with a model + model = m; +elseif numel(m) == 1 % random init with latent dimension m + model = init(X,m); end -tol = 1e-2; -maxIter = 100; +tol = 1e-4; +maxIter = 1000; llh = -inf(1,maxIter); for iter = 2:maxIter % E-step - [nu, U, Ezz, Ezy, llh(iter)] = kalmanSmoother(model,X); - if llh(iter)-llh(iter-1) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence + [nu, U, llh(iter),Ezz, Ezy] = kalmanSmoother(model,X); + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % M-step - model = maximization(X, nu, U, Ezz, Ezy); + model = maximization(model, X, nu, U, Ezz, Ezy); end llh = llh(2:iter); -function model = maximization(X ,nu, U, Ezz, Ezy) +function model = init(X, k) +d = size(X,1); +model.mu0 = randn(k,1); +model.P0 = iwishrnd(eye(k),k); +model.A = randn(k,k); +model.G = iwishrnd(eye(k),k); +model.C = randn(d,k); +model.S = iwishrnd(eye(d),d); + + +function model = maximization(model, X ,nu, U, Ezz, Ezy) n = size(X,2); -mu0 = nu(:,1); -P0 = U(:,:,1); +mu0 = nu(:,1); % 13.110 +P0 = U(:,:,1); % 13.111, 13.107 -Ezzn = sum(Ezz,3); -Ezz1 = Ezzn-Ezz(:,:,n); -Ezz2 = Ezzn-Ezz(:,:,1); -Ezy = sum(Ezy,3); +EZZ = sum(Ezz,3); +EZY = sum(Ezy,3); +A = EZY/(EZZ-Ezz(:,:,n)); % 13.113 +G = (EZZ-Ezz(:,:,1)-EZY*A')/(n-1); % 13.114, BRML 24.5.12 -A = Ezy/Ezz1; % 13.113 -EzyA = Ezy*A'; -G = (Ezz2-(EzyA+EzyA')+A*Ezz1*A')/(n-1); % 13.114 Xnu = X*nu'; -C = Xnu/Ezzn; % 13.115 -XnuC = Xnu*C'; -S = (X*X'-(XnuC+XnuC')+C*Ezzn*C')/n; % 13.116 +C = Xnu/EZZ; % 13.115 +S = (X*X'-Xnu*C')/n; % 13.116, BRML 24.5.11 -model.A = A; -model.G = G; -model.C = C; -model.S = S; model.mu0 = mu0; model.P0 = P0; +model.A = A; +model.G = (G+G')/2; +model.C = C; +model.S = (S+S')/2; \ No newline at end of file From ccbf6eaca546c8de9cf6d6e2579388973ceaadf1 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 03:08:05 +0800 Subject: [PATCH 098/119] rewrite ldsRnd and lds_demo --- chapter13/LDS/ldsRnd.m | 28 ++++++++-------- demo/ch13/lds_demo.m | 76 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/chapter13/LDS/ldsRnd.m b/chapter13/LDS/ldsRnd.m index 1f4cb12..db73531 100644 --- a/chapter13/LDS/ldsRnd.m +++ b/chapter13/LDS/ldsRnd.m @@ -1,4 +1,4 @@ -function [X, Z, model] = ldsRnd(d, k, n) +function [Z, X] = ldsRnd(model, n) % Generate a data sequence from linear dynamic system. % Input: % d: dimension of data @@ -8,25 +8,27 @@ % X: d x n data matrix % model: model structure % Written by Mo Chen (sth4nth@gmail.com). -A = randn(k,k); -G = iwishrnd(eye(k),k); -C = randn(d,k); -S = iwishrnd(eye(d),d); -mu0 = randn(k,1); -P0 = iwishrnd(eye(k),k); +mu0 = model.mu0; +P0 = model.P0; +A = model.A; +G = model.G; +C = model.C; +S = model.S; + +k = size(G,1); +d = size(S,1); X = zeros(d,n); Z = zeros(k,n); -Z(:,1) = gaussRnd(mu0,P0); % 13.80 +Z(:,1) = gaussRnd(mu0,P0); % 13.80 X(:,1) = gaussRnd(C*Z(:,1),S); for i = 2:n - Z(:,i) = gaussRnd(A*Z(:,i-1),G); % 13.75, 13.78 - X(:,i) = gaussRnd(C*Z(:,i),S); % 13.76, 13.79 + Z(:,i) = gaussRnd(A*Z(:,i-1),G); % 13.75, 13.78 + X(:,i) = gaussRnd(C*Z(:,i),S); % 13.76, 13.79 end - +model.mu0 = mu0; % prior mean +model.P0 = P0; % prior covairance model.A = A; % transition matrix model.G = G; % transition covariance model.C = C; % emission matrix model.S = S; % emision covariance -model.mu0 = mu0; % prior mean -model.P0 = P0; % prior covairance diff --git a/demo/ch13/lds_demo.m b/demo/ch13/lds_demo.m index 8c0b30e..ba9bc50 100644 --- a/demo/ch13/lds_demo.m +++ b/demo/ch13/lds_demo.m @@ -1,14 +1,68 @@ -% demos for LDS in ch13 +close all; +%% generate data +clear; +d = 2; +k = 4; +n = 50; -clear; close all; -d = 3; -k = 2; -n = 100; +A = [1 0 1 0; + 0 1 0 1; + 0 0 1 0; + 0 0 0 1]; +G = 0.001*eye(k); -[X,Z,model] = ldsRnd(d,k,n); -[mu, V, llh] = kalmanFilter(model, X); +C = [1 0 0 0; + 0 1 0 0]; +S = eye(d); -[nu, U, Ezz, Ezy, llh] = kalmanSmoother(model, X); -% [model, llh] = ldsEm(X,k); -% plot(llh); -% +mu0 = [8; 10; 1; 0]; +P0 = eye(k); + +model.A = A; +model.G = G; +model.C = C; +model.S = S; +model.mu0 = mu0; +model.P0 = P0; + +[z,x] = ldsRnd(model, n); +figure; +hold on +plot(x(1,:), x(2,:), 'ro'); +plot(z(1,:), z(2,:), 'b*-'); +legend('observed', 'latent') +axis equal +hold off + +%% filter +[mu, V, llh] = kalmanFilter(model, x); +figure +hold on +plot(x(1,:), x(2,:), 'ro'); +plot(mu(1,:), mu(2,:), 'b*-'); +legend('observed', 'filtered') +axis equal +hold off + +%% smoother +[nu, U, llh] = kalmanSmoother(model, x); +figure +hold on +plot(x(1,:), x(2,:), 'ro'); +plot(nu(1,:), nu(2,:), 'b*-'); +legend('observed', 'smoothed') +axis equal +hold off + +%% EM +[model, llh] = ldsEm(x,model); +nu = kalmanSmoother(model, x); +figure +hold on +plot(x(1,:), x(2,:), 'ro'); +plot(nu(1,:), nu(2,:), 'b*-'); +legend('observed', 'smoothed with fitted model') +axis equal +hold off +figure; +plot(llh); From 11687b3a261504a3a738d099b07b92c61db86ddb Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 03:12:11 +0800 Subject: [PATCH 099/119] improve ldsEm --- chapter13/LDS/ldsEm.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index 0187a54..5d5b421 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -24,7 +24,7 @@ [nu, U, llh(iter),Ezz, Ezy] = kalmanSmoother(model,X); if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % M-step - model = maximization(model, X, nu, U, Ezz, Ezy); + model = maximization(X, nu, U, Ezz, Ezy); end llh = llh(2:iter); @@ -38,7 +38,7 @@ model.S = iwishrnd(eye(d),d); -function model = maximization(model, X ,nu, U, Ezz, Ezy) +function model = maximization(X ,nu, U, Ezz, Ezy) n = size(X,2); mu0 = nu(:,1); % 13.110 P0 = U(:,:,1); % 13.111, 13.107 From 9a248dab2b872cedbaa6e8a5234828e16298a10b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 03:37:54 +0800 Subject: [PATCH 100/119] tweak hmmEm a little --- chapter13/HMM/hmmEm.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter13/HMM/hmmEm.m b/chapter13/HMM/hmmEm.m index 455106b..691409c 100644 --- a/chapter13/HMM/hmmEm.m +++ b/chapter13/HMM/hmmEm.m @@ -21,14 +21,14 @@ E = normalize(rand(k,d),2); end tol = 1e-4; -maxIter = 100; +maxIter = 1000; llh = -inf(1,maxIter); for iter = 2:maxIter M = E*X; % E-step [gamma,alpha,beta,c] = hmmSmoother(M,A,s); llh(iter) = mean(log(c)); - if llh(iter)-llh(iter-1) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % M-step s = gamma(:,1); % 13.18 A = normalize(A.*(alpha(:,1:n-1)*(beta(:,2:n).*M(:,2:n)./c(2:n))'),2); % 13.19 13.43 13.65 From fb04ac210add4ca4d958d58bcdde921c2bd84444 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 03:38:27 +0800 Subject: [PATCH 101/119] add empty ldsPca --- chapter13/LDS/ldsPca.m | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 chapter13/LDS/ldsPca.m diff --git a/chapter13/LDS/ldsPca.m b/chapter13/LDS/ldsPca.m new file mode 100644 index 0000000..8ed035a --- /dev/null +++ b/chapter13/LDS/ldsPca.m @@ -0,0 +1,12 @@ +function [A, C, Z] = ldsPca(X, k, m) +% Subspace method for learning linear dynamic system. +% Input: +% X: d x n data matrix +% k: dimension of hidden variable +% m: +% Output: +% A: +% C: +% Z: +% reference: Bayesian Reasoning and Machine Learning (BRML) chapter 24.5.3 p.507 +% Written by Mo Chen (sth4nth@gmail.com). \ No newline at end of file From d1a3ae66bd6fe8d97141ff09b454ab155f1a240e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 29 Nov 2018 04:26:10 +0800 Subject: [PATCH 102/119] tweak ldsEm a little --- chapter13/LDS/ldsEm.m | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index 5d5b421..0ce4d3e 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -40,8 +40,6 @@ function model = maximization(X ,nu, U, Ezz, Ezy) n = size(X,2); -mu0 = nu(:,1); % 13.110 -P0 = U(:,:,1); % 13.111, 13.107 EZZ = sum(Ezz,3); EZY = sum(Ezy,3); @@ -52,8 +50,8 @@ C = Xnu/EZZ; % 13.115 S = (X*X'-Xnu*C')/n; % 13.116, BRML 24.5.11 -model.mu0 = mu0; -model.P0 = P0; +model.mu0 = nu(:,1); % 13.110 +model.P0 = U(:,:,1); % 13.111, 13.107 model.A = A; model.G = (G+G')/2; model.C = C; From 91cefe8c9643a1ff5faa41dd9e3262b9878cd023 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 30 Nov 2018 00:51:37 +0800 Subject: [PATCH 103/119] add ldsPca --- chapter13/LDS/ldsPca.m | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/chapter13/LDS/ldsPca.m b/chapter13/LDS/ldsPca.m index 8ed035a..d484a8c 100644 --- a/chapter13/LDS/ldsPca.m +++ b/chapter13/LDS/ldsPca.m @@ -3,10 +3,18 @@ % Input: % X: d x n data matrix % k: dimension of hidden variable -% m: +% m: stacking order for the Hankel matrix % Output: -% A: -% C: -% Z: +% A: k x k transition matrix +% C: k x d emission matrix +% Z: k x n latent variable +% Y: d x n reconstructed data % reference: Bayesian Reasoning and Machine Learning (BRML) chapter 24.5.3 p.507 -% Written by Mo Chen (sth4nth@gmail.com). \ No newline at end of file +% Written by Mo Chen (sth4nth@gmail.com). +[d,n] = size(X); +H = reshape(X(:,hankel(1:m,m:n)),d*m,[]); +[U,S,V] = svd(H,'econ'); +C = U(1:d,1:k); +Z = S(1:k,1:k)*V(:,1:k)'; +A = Z(:,2:end)/Z(:,1:end-1); % estimated transition +% Y = C*Z; % reconstructions \ No newline at end of file From ca599be9687287c0e0a3db20aeeacc9025f82515 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 30 Nov 2018 01:22:41 +0800 Subject: [PATCH 104/119] update lds_demo. TODO: init with ldsPCA in ldsEM --- chapter13/LDS/ldsEm.m | 8 +++++- demo/ch13/lds_demo.m | 61 ++++++++++++++++++++++++++----------------- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index 0ce4d3e..7611595 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -36,7 +36,13 @@ model.G = iwishrnd(eye(k),k); model.C = randn(d,k); model.S = iwishrnd(eye(d),d); - +% [A,C,Z] = ldsPca(X,k,3*k); +% model.mu0 = Z(:,1); +% model.P0 = ; +% model.A = A; +% model.C = C; +% model.G = ; +% model.S = ; function model = maximization(X ,nu, U, Ezz, Ezy) n = size(X,2); diff --git a/demo/ch13/lds_demo.m b/demo/ch13/lds_demo.m index ba9bc50..fe9e421 100644 --- a/demo/ch13/lds_demo.m +++ b/demo/ch13/lds_demo.m @@ -1,21 +1,19 @@ close all; -%% generate data +%% Parameter clear; d = 2; -k = 4; +k = 2; n = 50; -A = [1 0 1 0; - 0 1 0 1; - 0 0 1 0; - 0 0 0 1]; -G = 0.001*eye(k); +A = [1,1; + 0 1]; +G = eye(k)*1e-3; -C = [1 0 0 0; - 0 1 0 0]; -S = eye(d); +C = [1 0; + 0 1]; +S = eye(d)*1e-1; -mu0 = [8; 10; 1; 0]; +mu0 = [0; 0]; P0 = eye(k); model.A = A; @@ -25,43 +23,58 @@ model.mu0 = mu0; model.P0 = P0; -[z,x] = ldsRnd(model, n); +%% Generate data +[z,x] = ldsRnd(model,n); figure; hold on plot(x(1,:), x(2,:), 'ro'); plot(z(1,:), z(2,:), 'b*-'); legend('observed', 'latent') +title('Generated Data') axis equal hold off - -%% filter -[mu, V, llh] = kalmanFilter(model, x); +%% Kalman filter +[mu, V, llh] = kalmanFilter(model,x); figure hold on plot(x(1,:), x(2,:), 'ro'); plot(mu(1,:), mu(2,:), 'b*-'); legend('observed', 'filtered') +title('Kalman filter') axis equal hold off - -%% smoother -[nu, U, llh] = kalmanSmoother(model, x); +%% Kalman smoother +[nu, U, llh] = kalmanSmoother(model,x); figure hold on plot(x(1,:), x(2,:), 'ro'); plot(nu(1,:), nu(2,:), 'b*-'); legend('observed', 'smoothed') +title('Kalman smoother') axis equal hold off - -%% EM -[model, llh] = ldsEm(x,model); -nu = kalmanSmoother(model, x); +%% LDS Subspace +[A,C,z] = ldsPca(x,k,3*k); +y = C*z; +t = size(z,2); +figure; +hold on +plot(x(1,1:t), x(2,1:t), 'ro'); +plot(y(1,1:t), y(2,1:t), 'b*-'); +legend('observed', 'projected') +title('LDS subspace learning') +axis equal +hold off +%% LDS EM +[model, llh] = ldsEm(x,k); +nu = kalmanSmoother(model,x); +y = model.C*nu; figure hold on plot(x(1,:), x(2,:), 'ro'); -plot(nu(1,:), nu(2,:), 'b*-'); -legend('observed', 'smoothed with fitted model') +plot(y(1,:), y(2,:), 'b*-'); +legend('observed', 'learned') +title('LDS EM learning') axis equal hold off figure; From 62279de8275618a79ccabec77cd90a484021bd61 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 30 Nov 2018 05:55:49 +0800 Subject: [PATCH 105/119] modify ldsEm to use ldsPca as initialization --- chapter13/LDS/ldsEm.m | 33 ++++++++++++++++++--------------- demo/ch13/lds_demo.m | 29 +++++++++++++++-------------- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/chapter13/LDS/ldsEm.m b/chapter13/LDS/ldsEm.m index 7611595..d07620a 100644 --- a/chapter13/LDS/ldsEm.m +++ b/chapter13/LDS/ldsEm.m @@ -17,7 +17,7 @@ model = init(X,m); end tol = 1e-4; -maxIter = 1000; +maxIter = 2000; llh = -inf(1,maxIter); for iter = 2:maxIter % E-step @@ -29,20 +29,23 @@ llh = llh(2:iter); function model = init(X, k) -d = size(X,1); -model.mu0 = randn(k,1); -model.P0 = iwishrnd(eye(k),k); -model.A = randn(k,k); -model.G = iwishrnd(eye(k),k); -model.C = randn(d,k); -model.S = iwishrnd(eye(d),d); -% [A,C,Z] = ldsPca(X,k,3*k); -% model.mu0 = Z(:,1); -% model.P0 = ; -% model.A = A; -% model.C = C; -% model.G = ; -% model.S = ; +% d = size(X,1); +% model.mu0 = randn(k,1); +% model.P0 = iwishrnd(eye(k),k); +% model.A = randn(k,k); +% model.G = iwishrnd(eye(k),k); +% model.C = randn(d,k); +% model.S = iwishrnd(eye(d),d); +[A,C,Z] = ldsPca(X,k,3*k); +model.mu0 = Z(:,1); +E = Z(:,1:end-1)-Z(:,2:end); +model.P0 = (dot(E(:),E(:))/(k*size(E,2)))*eye(k); +model.A = A; +E = A*Z(:,1:end-1)-Z(:,2:end); +model.G = E*E'/size(E,2); +model.C = C; +E = C*Z-X(:,1:size(Z,2)); +model.S = E*E'/size(E,2); function model = maximization(X ,nu, U, Ezz, Ezy) n = size(X,2); diff --git a/demo/ch13/lds_demo.m b/demo/ch13/lds_demo.m index fe9e421..42742ae 100644 --- a/demo/ch13/lds_demo.m +++ b/demo/ch13/lds_demo.m @@ -1,19 +1,20 @@ close all; -%% Parameter +% Parameter clear; d = 2; -k = 2; -n = 50; +k = 3; +n = 100; -A = [1,1; - 0 1]; +A = [1,0,1; + 0 1,0; + 0,0,1]; G = eye(k)*1e-3; -C = [1 0; - 0 1]; +C = [1,0,0; + 0 1,0]; S = eye(d)*1e-1; -mu0 = [0; 0]; +mu0 = [0;0;0]; P0 = eye(k); model.A = A; @@ -54,9 +55,9 @@ axis equal hold off %% LDS Subspace -[A,C,z] = ldsPca(x,k,3*k); -y = C*z; -t = size(z,2); +[A,C,nu] = ldsPca(x,k,3*k); +y = C*nu; +t = size(y,2); figure; hold on plot(x(1,1:t), x(2,1:t), 'ro'); @@ -66,9 +67,9 @@ axis equal hold off %% LDS EM -[model, llh] = ldsEm(x,k); -nu = kalmanSmoother(model,x); -y = model.C*nu; +[tmodel, llh] = ldsEm(x,k); +nu = kalmanSmoother(tmodel,x); +y = tmodel.C*nu; figure hold on plot(x(1,:), x(2,:), 'ro'); From 0e1acae939e9ea5554cff3319287199e7bbd892f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 30 Nov 2018 19:13:22 +0800 Subject: [PATCH 106/119] add Contents.m --- Contents.m | 124 +++++++++++++++++++++++++++++++++ chapter07/rvmRegSeq.m | 1 - chapter08/NaiveBayes/nbGauss.m | 4 +- 3 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 Contents.m diff --git a/Contents.m b/Contents.m new file mode 100644 index 0000000..f3c492c --- /dev/null +++ b/Contents.m @@ -0,0 +1,124 @@ +% CHAPTER01 +% condEntropy - Compute conditional entropy z=H(x|y) of two discrete variables x and y. +% entropy - Compute entropy z=H(x) of a discrete variable x. +% jointEntropy - Compute joint entropy z=H(x,y) of two discrete variables x and y. +% mutInfo - Compute mutual information I(x,y) of two discrete variables x and y. +% nmi - Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)) of two discrete variables x and y. +% nvi - Compute normalized variation information z=(1-I(x,y)/H(x,y)) of two discrete variables x and y. +% relatEntropy - Compute relative entropy (a.k.a KL divergence) z=KL(p(x)||p(y)) of two discrete variables x and y. +% CHAPTER02 +% logDirichlet - Compute log pdf of a Dirichlet distribution. +% logGauss - Compute log pdf of a Gaussian distribution. +% logKde - Compute log pdf of kernel density estimator. +% logMn - Compute log pdf of a multinomial distribution. +% logMvGamma - Compute logarithm multivariate Gamma function +% logSt - Compute log pdf of a Student's t distribution. +% logVmf - Compute log pdf of a von Mises-Fisher distribution. +% logWishart - Compute log pdf of a Wishart distribution. +% CHAPTER03 +% linReg - Fit linear regression model y=w'x+w0 +% linRegFp - Fit empirical Bayesian linear model with Mackay fixed point method (p.168) +% linRegPred - Compute linear regression model reponse y = w'*X+w0 and likelihood +% linRnd - Generate data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) +% CHAPTER04 +% binPlot - Plot binary classification result for 2d data +% fda - Fisher (linear) discriminant analysis +% logitBin - Logistic regression for binary classification optimized by Newton-Raphson method. +% logitBinPred - Prediction of binary logistic regression model +% logitMn - Multinomial regression for multiclass problem (Multinomial likelihood) +% logitMnPred - Prediction of multiclass (multinomial) logistic regression model +% sigmoid - Sigmod function +% softmax - Softmax function +% CHAPTER05 +% mlpReg - Train a multilayer perceptron neural network +% mlpRegPred - Multilayer perceptron prediction +% CHAPTER06 +% kn2sd - Transform a kernel matrix (or inner product matrix) to a squared distance matrix +% knCenter - Centerize the data in the kernel space +% knGauss - Gaussian (RBF) kernel K = exp(-|x-y|/(2s)); +% knKmeans - Perform kernel kmeans clustering. +% knKmeansPred - Prediction for kernel kmeans clusterng +% knLin - Linear kernel (inner product) +% knPca - Kernel PCA +% knPcaPred - Prediction for kernel PCA +% knPoly - Polynomial kernel k(x,y)=(x'y+c)^o +% knReg - Gaussian process (kernel) regression +% knRegPred - Prediction for Gaussian Process (kernel) regression model +% sd2kn - Transform a squared distance matrix to a kernel matrix. +% CHAPTER07 +% rvmBinFp - Relevance Vector Machine (ARD sparse prior) for binary classification. +% rvmBinPred - Prodict the label for binary logistic regression model +% rvmRegFp - Relevance Vector Machine (ARD sparse prior) for regression +% rvmRegPred - Compute RVM regression model reponse y = w'*X+w0 and likelihood +% rvmRegSeq - Sparse Bayesian Regression (RVM) using sequential algorithm +% CHAPTER08 +% MRF +% mrfBethe - Compute Bethe energy +% mrfBp - Undirected graph belief propagation for MRF +% mrfGibbs - Compute Gibbs energy +% mrfIsGa - Contruct a latent Ising MRF with Gaussian observation +% mrfMf - Mean field for MRF +% NaiveBayes +% nbBern - Naive bayes classifier with indepenet Bernoulli. +% nbBernPred - Prediction of naive Bayes classifier with independent Bernoulli. +% nbGauss - Naive bayes classifier with indepenet Gaussian +% nbGaussPred - Prediction of naive Bayes classifier with independent Gaussian. +% CHAPTER09 +% kmeans - Perform kmeans clustering. +% kmeansPred - Prediction for kmeans clusterng +% kmeansRnd - Generate samples from a Gaussian mixture distribution with common variances (kmeans model). +% kmedoids - Perform k-medoids clustering. +% kseeds - Perform kmeans++ seeding +% linRegEm - Fit empirical Bayesian linear regression model with EM (p.448 chapter 9.3.4) +% mixBernEm - Perform EM algorithm for fitting the Bernoulli mixture model. +% mixBernRnd - Generate samples from a Bernoulli mixture distribution. +% mixGaussEm - Perform EM algorithm for fitting the Gaussian mixture model. +% mixGaussPred - Predict label and responsibility for Gaussian mixture model. +% mixGaussRnd - Genarate samples form a Gaussian mixture model. +% rvmBinEm - Relevance Vector Machine (ARD sparse prior) for binary classification. +% rvmRegEm - Relevance Vector Machine (ARD sparse prior) for regression +% CHAPTER10 +% linRegVb - Variational Bayesian inference for linear regression. +% mixGaussEvidence - Variational lower bound of the model evidence (log of marginal likelihood) +% mixGaussVb - Variational Bayesian inference for Gaussian mixture. +% mixGaussVbPred - Predict label and responsibility for Gaussian mixture model trained by VB. +% rvmRegVb - Variational Bayesian inference for RVM regression. +% CHAPTER11 +% dirichletRnd - Generate samples from a Dirichlet distribution. +% discreteRnd - Generate samples from a discrete distribution (multinomial). +% Gauss - Class for Gaussian distribution used by Dirichlet process +% gaussRnd - Generate samples from a Gaussian distribution. +% GaussWishart - Class for Gaussian-Wishart distribution used by Dirichlet process +% mixDpGb - Collapsed Gibbs sampling for Dirichlet process (infinite) mixture model. +% mixDpGbOl - Online collapsed Gibbs sampling for Dirichlet process (infinite) mixture model. +% mixGaussGb - Collapsed Gibbs sampling for Dirichlet process (infinite) Gaussian mixture model (a.k.a. DPGM). +% mixGaussSample - Genarate samples form a Gaussian mixture model with GaussianWishart prior. +% CHAPTER12 +% fa - Perform EM algorithm for factor analysis model +% pca - Principal component analysis +% pcaEm - Perform EM-like algorithm for PCA (by Sam Roweis). +% pcaEmC - Perform Constrained EM like algorithm for PCA. +% ppcaEm - Perform EM algorithm to maiximize likelihood of probabilistic PCA model. +% ppcaRnd - Generate data from probabilistic PCA model +% ppcaVb - Perform variatioanl Bayeisan inference for probabilistic PCA model. +% CHAPTER13 +% HMM +% hmmEm - EM algorithm to fit the parameters of HMM model (a.k.a Baum-Welch algorithm) +% hmmFilter - HMM forward filtering algorithm. +% hmmRnd - Generate a data sequence from a hidden Markov model. +% hmmSmoother - HMM smoothing alogrithm (normalized forward-backward or normalized alpha-beta algorithm). +% hmmViterbi - Viterbi algorithm (calculated in log scale to improve numerical stability). +% LDS +% kalmanFilter - Kalman filter (forward algorithm for linear dynamic system) +% kalmanSmoother - Kalman smoother (forward-backward algorithm for linear dynamic system) +% ldsEm - EM algorithm for parameter estimation of linear dynamic system. +% ldsPca - Subspace method for learning linear dynamic system. +% ldsRnd - Generate a data sequence from linear dynamic system. +% CHAPTER14 +% adaboostBin - Adaboost for binary classification (weak learner: kmeans) +% adaboostBinPred - Prediction of binary Adaboost +% mixLinPred - Prediction function for mxiture of linear regression +% mixLinReg - Mixture of linear regression +% mixLinRnd - Generate data from mixture of linear model +% mixLogitBin - Mixture of logistic regression model for binary classification optimized by Newton-Raphson method +% mixLogitBinPred - Prediction function for mixture of logistic regression diff --git a/chapter07/rvmRegSeq.m b/chapter07/rvmRegSeq.m index 97b93db..7fcad31 100644 --- a/chapter07/rvmRegSeq.m +++ b/chapter07/rvmRegSeq.m @@ -1,5 +1,4 @@ function [model, llh] = rvmRegSeq(X, t) -% TODO: beta is not updated. % Sparse Bayesian Regression (RVM) using sequential algorithm % Input: % X: d x n data diff --git a/chapter08/NaiveBayes/nbGauss.m b/chapter08/NaiveBayes/nbGauss.m index 6b4e8b2..c8061e4 100644 --- a/chapter08/NaiveBayes/nbGauss.m +++ b/chapter08/NaiveBayes/nbGauss.m @@ -1,6 +1,6 @@ function model = nbGauss(X, t) -% Naive bayes classifier with indepenet Gaussian, each dimension of data is -% assumed from a 1d Gaussian distribution with independent mean and variance. +% Naive bayes classifier with indepenet Gaussian +% Each dimension of data is assumed from a 1d Gaussian distribution with independent mean and variance. % Input: % X: d x n data matrix % t: 1 x n label (1~k) From a117a4364fef63d24e3e34403e69a522771ee25a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 4 Dec 2018 00:56:24 +0800 Subject: [PATCH 107/119] add MLP classification --- chapter05/mlpClass.m | 63 ++++++++++++++++++++++++++++++++++++++++ chapter05/mlpClassPred.m | 19 ++++++++++++ chapter05/mlpReg.m | 33 +++++++++++---------- chapter05/mlpRegPred.m | 3 +- demo/ch05/mlp_demo.m | 25 +++++++++++++--- 5 files changed, 123 insertions(+), 20 deletions(-) create mode 100644 chapter05/mlpClass.m create mode 100644 chapter05/mlpClassPred.m diff --git a/chapter05/mlpClass.m b/chapter05/mlpClass.m new file mode 100644 index 0000000..0a5d645 --- /dev/null +++ b/chapter05/mlpClass.m @@ -0,0 +1,63 @@ +function [model, L] = mlpClass(X,y,k,lambda) +% Train a multilayer perceptron neural network for classification with backpropagation +% logistic activation function is used. +% Input: +% X: d x n data matrix +% Y: p x n response matrix +% k: T x 1 vector to specify number of hidden nodes in each layer +% lambda: regularization parameter +% Ouput: +% model: model structure +% L: (regularized cross entropy) loss +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 4 + lambda = 1e-2; +end +eta = 1e-3; +tol = 1e-4; +maxiter = 50000; +L = inf(1,maxiter); + +Y = sparse(y,1:numel(y),1); +k = [size(X,1);k(:);size(Y,1)]; +T = numel(k)-1; +W = cell(T,1); +b = cell(T,1); +for t = 1:T + W{t} = randn(k(t),k(t+1)); + b{t} = randn(k(t+1),1); +end +R = cell(T,1); +Z = cell(T+1,1); +Z{1} = X; +for iter = 2:maxiter +% forward + for t = 1:T-1 + Z{t+1} = sigmoid(W{t}'*Z{t}+b{t}); % 5.10 5.113 + end + Z{T+1} = softmax(W{T}'*Z{T}+b{T}); + +% loss + E = Z{T+1}; + Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 + L(iter) = -dot(Y(:),log(E(:)))+0.5*lambda*sum(Wn); + if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end + +% backward + R{T} = Z{T+1}-Y; + for t = T-1:-1:1 + df = Z{t+1}.*(1-Z{t+1}); % h'(a) + R{t} = df.*(W{t+1}*R{t+1}); % 5.66 + end + +% gradient descent + for t=1:T + dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 + db = sum(R{t},2); + W{t} = W{t}-eta*dW; % 5.43 + b{t} = b{t}-eta*db; + end +end +L = L(2:iter); +model.W = W; +model.b = b; diff --git a/chapter05/mlpClassPred.m b/chapter05/mlpClassPred.m new file mode 100644 index 0000000..0c94742 --- /dev/null +++ b/chapter05/mlpClassPred.m @@ -0,0 +1,19 @@ +function [y, P] = mlpClassPred(model, X) +% Multilayer perceptron classification prediction +% logistic activation function is used. +% Input: +% model: model structure +% X: d x n data matrix +% Ouput: +% y: 1 x n label vector +% P: k x n probability matrix +% Written by Mo Chen (sth4nth@gmail.com). +W = model.W; +b = model.b; +T = length(W); +Z = X; +for t = 1:T-1 + Z = sigmoid(W{t}'*Z+b{t}); +end +P = softmax(W{T}'*Z+b{T}); +[~,y] = max(P,[],1); \ No newline at end of file diff --git a/chapter05/mlpReg.m b/chapter05/mlpReg.m index caf42d1..d3759eb 100644 --- a/chapter05/mlpReg.m +++ b/chapter05/mlpReg.m @@ -1,22 +1,24 @@ -function [model, L] = mlpReg(X,Y,k,lambda) -% Train a multilayer perceptron neural network +function [model, L] = mlpReg(X,y,k,lambda) +% Train a multilayer perceptron neural network for regression with backpropagation +% tanh activation function is used % Input: % X: d x n data matrix -% Y: p x n response matrix +% y: p x n response matrix % k: T x 1 vector to specify number of hidden nodes in each layer % lambda: regularization parameter % Ouput: % model: model structure -% L: loss +% L: (regularized least square) loss % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 lambda = 1e-2; end -eta = 1e-3; +eta = 1e-5; +tol = 1e-5; maxiter = 50000; L = inf(1,maxiter); -k = [size(X,1);k(:);size(Y,1)]; +k = [size(X,1);k(:);size(y,1)]; T = numel(k)-1; W = cell(T,1); b = cell(T,1); @@ -30,30 +32,31 @@ for iter = 2:maxiter % forward for t = 1:T-1 - Z{t+1} = tanh(W{t}'*Z{t}+b{t}); + Z{t+1} = tanh(W{t}'*Z{t}+b{t}); % 5.10 5.113 end - Z{T+1} = W{T}'*Z{T}+b{T}; + Z{T+1} = W{T}'*Z{T}+b{T}; % 5.114 % loss - E = Z{T+1}-Y; + E = Z{T+1}-y; Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 L(iter) = dot(E(:),E(:))+lambda*sum(Wn); - + if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end + % backward - R{T} = E; % delta + R{T} = E; for t = T-1:-1:1 df = 1-Z{t+1}.^2; % h'(a) - R{t} = df.*(W{t+1}*R{t+1}); % delta + R{t} = df.*(W{t+1}*R{t+1}); % 5.66 end % gradient descent for t=1:T - dW = Z{t}*R{t}'+lambda*W{t}; + dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 db = sum(R{t},2); - W{t} = W{t}-eta*dW; + W{t} = W{t}-eta*dW; % 5.43 b{t} = b{t}-eta*db; end end -L = L(1,2:iter); +L = L(2:iter); model.W = W; model.b = b; diff --git a/chapter05/mlpRegPred.m b/chapter05/mlpRegPred.m index e3bba3f..d2e67f9 100644 --- a/chapter05/mlpRegPred.m +++ b/chapter05/mlpRegPred.m @@ -1,5 +1,6 @@ function Y = mlpRegPred(model, X) -% Multilayer perceptron prediction +% Multilayer perceptron regression prediction +% tanh activation function is used. % Input: % model: model structure % X: d x n data matrix diff --git a/demo/ch05/mlp_demo.m b/demo/ch05/mlp_demo.m index 75c170a..70b57b3 100644 --- a/demo/ch05/mlp_demo.m +++ b/demo/ch05/mlp_demo.m @@ -1,15 +1,32 @@ -clear; close all; +clear; close all +%% Regression n = 200; x = linspace(0,2*pi,n); y = sin(x); -k = [3,4]; % two hidden layers with 3 and 4 hidden nodes +h = [10,6]; % two hidden layers with 10 and 6 neurons lambda = 1e-2; -[model, L] = mlpReg(x,y,k); +[model, L] = mlpReg(x,y,h,lambda); t = mlpRegPred(model,x); plot(L); figure; hold on plot(x,y,'.'); plot(x,t); -hold off \ No newline at end of file +hold off +%% Classification +clear; +k = 2; +n = 200; +[X,y] = kmeansRnd(2,k,n); +figure; +plotClass(X,y); + +h = 3; +lambda = 1e-2; +[model, llh] = mlpClass(X,y,h,lambda); +[t,p] = mlpClassPred(model,X); +figure; +plotClass(X,t); +figure; +plot(llh); \ No newline at end of file From 36900bd74681dc529e02974010ace70f7625f04b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 4 Dec 2018 00:58:48 +0800 Subject: [PATCH 108/119] update Contents.m --- Contents.m | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Contents.m b/Contents.m index f3c492c..bd96bba 100644 --- a/Contents.m +++ b/Contents.m @@ -29,9 +29,11 @@ % logitMnPred - Prediction of multiclass (multinomial) logistic regression model % sigmoid - Sigmod function % softmax - Softmax function -% CHAPTER05 -% mlpReg - Train a multilayer perceptron neural network -% mlpRegPred - Multilayer perceptron prediction +% CHAPTER05 +% mlpClass - Train a multilayer perceptron neural network for classification with backpropagation +% mlpClassPred - Multilayer perceptron classification prediction +% mlpReg - Train a multilayer perceptron neural network for regression with backpropagation +% mlpRegPred - Multilayer perceptron regression prediction % CHAPTER06 % kn2sd - Transform a kernel matrix (or inner product matrix) to a squared distance matrix % knCenter - Centerize the data in the kernel space From f2d8141d0fa749af5c186921cd0281a9fd529309 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 7 Dec 2018 00:03:18 +0800 Subject: [PATCH 109/119] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ddd02c6..ec97ac6 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Design Goal ------- * Succinct: The code is extremely compact. Minimizing code length is a major goal. As a result, the core of the algorithms can be easily spotted. -* Efficient: Many tricks to speedup Matlab code were applied (eg. vectorization, matrix factorization, etc.). Usually, functions in this package are orders faster than Matlab builtin ones (eg. kmeans). +* Efficient: Many tricks to speedup Matlab code are applied (eg. vectorization, matrix factorization, etc.). Usually, functions in this package are orders faster than Matlab builtin ones (e.g. kmeans). * Robust: Many tricks for numerical stability are applied, such as computing probability in log domain, square root matrix update to enforce matrix symmetry\PD, etc. * Readable: The code is heavily commented. Corresponding formulas in PRML are annoted. Symbols are in sync with the book. * Practical: The package is not only readable, but also meant to be easily used and modified to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). From 87260d58048a656a270207a9f7f21050df72efce Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 24 Jan 2019 16:05:57 +0800 Subject: [PATCH 110/119] fix doc --- chapter05/mlpClass.m | 6 +++--- chapter05/mlpReg.m | 4 ++-- demo/ch05/mlp_demo.m | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chapter05/mlpClass.m b/chapter05/mlpClass.m index 0a5d645..6e626de 100644 --- a/chapter05/mlpClass.m +++ b/chapter05/mlpClass.m @@ -1,9 +1,9 @@ -function [model, L] = mlpClass(X,y,k,lambda) -% Train a multilayer perceptron neural network for classification with backpropagation +function [model, L] = mlpClass(X, y, k, lambda) +% Train a multilayer perceptron neural network for multiclass classification with backpropagation % logistic activation function is used. % Input: % X: d x n data matrix -% Y: p x n response matrix +% y: 1 x n label vector % k: T x 1 vector to specify number of hidden nodes in each layer % lambda: regularization parameter % Ouput: diff --git a/chapter05/mlpReg.m b/chapter05/mlpReg.m index d3759eb..b6b3378 100644 --- a/chapter05/mlpReg.m +++ b/chapter05/mlpReg.m @@ -1,9 +1,9 @@ -function [model, L] = mlpReg(X,y,k,lambda) +function [model, L] = mlpReg(X, y, k, lambda) % Train a multilayer perceptron neural network for regression with backpropagation % tanh activation function is used % Input: % X: d x n data matrix -% y: p x n response matrix +% y: 1 x n real value response vector % k: T x 1 vector to specify number of hidden nodes in each layer % lambda: regularization parameter % Ouput: diff --git a/demo/ch05/mlp_demo.m b/demo/ch05/mlp_demo.m index 70b57b3..33f77b3 100644 --- a/demo/ch05/mlp_demo.m +++ b/demo/ch05/mlp_demo.m @@ -26,7 +26,7 @@ lambda = 1e-2; [model, llh] = mlpClass(X,y,h,lambda); [t,p] = mlpClassPred(model,X); +plot(llh); figure; plotClass(X,t); figure; -plot(llh); \ No newline at end of file From 0635e51e906ad5a6fbe14a44f1c711333c7a8903 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 24 Jan 2019 16:06:07 +0800 Subject: [PATCH 111/119] fix kmedoids --- chapter09/kmedoids.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter09/kmedoids.m b/chapter09/kmedoids.m index ff94a60..2dcc0d8 100644 --- a/chapter09/kmedoids.m +++ b/chapter09/kmedoids.m @@ -18,7 +18,7 @@ X = X-mean(X,2); % reduce chance of numerical problems v = dot(X,X,1); D = v+v'-2*(X'*X); % Euclidean distance matrix -D(sub2ind([d,d],1:d,1:d)) = 0; % reduce chance of numerical problems +D(sub2ind([n,n],1:n,1:n)) = 0; % reduce chance of numerical problems last = zeros(1,n); while any(label ~= last) [~,~,last(:)] = unique(label); % remove empty clusters From 314f4756103c28d362d10946aa820a78d216e007 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Wed, 30 Jan 2019 14:17:00 +0800 Subject: [PATCH 112/119] tweak logistic regression --- chapter04/logitBin.m | 15 +++++++-------- chapter09/kmeansRnd.m | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/chapter04/logitBin.m b/chapter04/logitBin.m index 80584a9..aa3eed7 100644 --- a/chapter04/logitBin.m +++ b/chapter04/logitBin.m @@ -1,16 +1,16 @@ -function [model, llh] = logitBin(X, y, lambda, eta) +function [model, llh] = logitBin(X, y, lambda) % Logistic regression for binary classification optimized by Newton-Raphson method. % Input: % X: d x n data matrix -% z: 1 x n label (0/1) +% y: 1 x n label (0/1) % lambda: regularization parameter -% eta: step size +% alpha: step size % Output: % model: trained model structure % llh: loglikelihood % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 - eta = 1e-1; + alpha = 1e-1; end if nargin < 3 lambda = 1e-4; @@ -20,18 +20,17 @@ tol = 1e-4; epoch = 200; llh = -inf(1,epoch); -h = 2*y-1; w = rand(d,1); for t = 2:epoch a = w'*X; - llh(t) = -(sum(log1pexp(-h.*a))+0.5*lambda*dot(w,w))/n; % 4.89 - if llh(t)-llh(t-1) < tol; break; end + llh(t) = (dot(a,y)-sum(log1pexp(a))-0.5*lambda*dot(w,w))/n; % 4.90 + if abs(llh(t)-llh(t-1)) < tol; break; end z = sigmoid(a); % 4.87 g = X*(z-y)'+lambda*w; % 4.96 r = z.*(1-z); % 4.98 Xw = bsxfun(@times, X, sqrt(r)); H = Xw*Xw'+lambda*eye(d); % 4.97 - w = w-eta*(H\g); + w = w-alpha*(H\g); % 4.92 end llh = llh(2:t); model.w = w; diff --git a/chapter09/kmeansRnd.m b/chapter09/kmeansRnd.m index d48013f..b02f98f 100644 --- a/chapter09/kmeansRnd.m +++ b/chapter09/kmeansRnd.m @@ -10,7 +10,7 @@ % mu: d x k centers of clusters % Written by Mo Chen (sth4nth@gmail.com). alpha = 1; -beta = nthroot(k,d); % in volume x^d there is k points: x^d=k +beta = nthroot(k,d); % k points in volume x^d : x^d=k X = randn(d,n); w = dirichletRnd(alpha,ones(1,k)/k); From 11e81d33dfcb26540df18fc7ecd661a420393533 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 25 Jul 2019 16:01:42 +0800 Subject: [PATCH 113/119] add add demo for kernel kmeans --- chapter06/knKmeans.m | 10 +++------- demo/ch06/knKmeans_demo.m | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 demo/ch06/knKmeans_demo.m diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 49c6c15..2265e83 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,7 +1,7 @@ -function [label, model, energy] = knKmeans(X, init, kn) +function [label, model, energy] = knKmeans(K, init) % Perform kernel kmeans clustering. % Input: -% K: n x n kernel matrix +% K: n x n data matrix % init: either number of clusters (k) or initial label (1xn) % Output: % label: 1 x n sample labels @@ -10,17 +10,13 @@ % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). -n = size(X,2); +n = size(K,2); if numel(init)==1 k = init; label = ceil(k*rand(1,n)); elseif numel(init)==n label = init; end -if nargin < 3 - kn = @knGauss; -end -K = kn(X,X); last = zeros(1,n); while any(label ~= last) [~,~,last(:)] = unique(label); % remove empty clusters diff --git a/demo/ch06/knKmeans_demo.m b/demo/ch06/knKmeans_demo.m new file mode 100644 index 0000000..4d1882a --- /dev/null +++ b/demo/ch06/knKmeans_demo.m @@ -0,0 +1,23 @@ +%% Kernel kmeans with linear kernel is equivalent to kmeans +close all; clear; +d = 2; +k = 3; +n = 200; +[X, y] = kmeansRnd(d,k,n); +init = ceil(k*rand(1,n)); +K = knLin(X,X); +label = knKmeans(K,init); + +label0 = kmeans(X,init); +maxdiff(label,label0) +plotClass(X,label); +%% Kernel kmeans with Gaussian Kernel for nonlinear data +x1 = linspace(0,pi,n/2); +x2 = sin(x1); +X = [x1,x1+pi/2; + x2,-x2]; + +K = knGauss(X,X,0.4); +label = knKmeans(K,2); +figure; +plotClass(X,label); \ No newline at end of file From b9eef951ccb81ca63bf255dc4aef2623bb2d037c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 25 Jul 2019 16:01:42 +0800 Subject: [PATCH 114/119] add demo for kernel kmeans --- chapter06/knKmeans.m | 10 +++------- demo/ch06/knKmeans_demo.m | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 demo/ch06/knKmeans_demo.m diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 49c6c15..2265e83 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,7 +1,7 @@ -function [label, model, energy] = knKmeans(X, init, kn) +function [label, model, energy] = knKmeans(K, init) % Perform kernel kmeans clustering. % Input: -% K: n x n kernel matrix +% K: n x n data matrix % init: either number of clusters (k) or initial label (1xn) % Output: % label: 1 x n sample labels @@ -10,17 +10,13 @@ % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). -n = size(X,2); +n = size(K,2); if numel(init)==1 k = init; label = ceil(k*rand(1,n)); elseif numel(init)==n label = init; end -if nargin < 3 - kn = @knGauss; -end -K = kn(X,X); last = zeros(1,n); while any(label ~= last) [~,~,last(:)] = unique(label); % remove empty clusters diff --git a/demo/ch06/knKmeans_demo.m b/demo/ch06/knKmeans_demo.m new file mode 100644 index 0000000..4d1882a --- /dev/null +++ b/demo/ch06/knKmeans_demo.m @@ -0,0 +1,23 @@ +%% Kernel kmeans with linear kernel is equivalent to kmeans +close all; clear; +d = 2; +k = 3; +n = 200; +[X, y] = kmeansRnd(d,k,n); +init = ceil(k*rand(1,n)); +K = knLin(X,X); +label = knKmeans(K,init); + +label0 = kmeans(X,init); +maxdiff(label,label0) +plotClass(X,label); +%% Kernel kmeans with Gaussian Kernel for nonlinear data +x1 = linspace(0,pi,n/2); +x2 = sin(x1); +X = [x1,x1+pi/2; + x2,-x2]; + +K = knGauss(X,X,0.4); +label = knKmeans(K,2); +figure; +plotClass(X,label); \ No newline at end of file From 50a654cd556d4f33a34132a1163bbaf47b1ebc2b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 25 Jul 2019 16:12:35 +0800 Subject: [PATCH 115/119] tweak knkmeans --- chapter06/knKmeans.m | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 2265e83..60e9032 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,4 +1,4 @@ -function [label, model, energy] = knKmeans(K, init) +function [label, energy] = knKmeans(K, init) % Perform kernel kmeans clustering. % Input: % K: n x n data matrix @@ -26,8 +26,4 @@ [val, label] = max(T-dot(T,E,2)/2,[],1); end energy = trace(K)-2*sum(val); -if nargout == 3 - model.X = X; - model.label = label; - model.kn = kn; -end + From f20e50ebb86ffcf144a7acfdc39bb1377ac6bba8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 17 Sep 2019 19:54:02 +0800 Subject: [PATCH 116/119] tweak discreteRnd --- chapter11/discreteRnd.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter11/discreteRnd.m b/chapter11/discreteRnd.m index d942783..5b811c2 100644 --- a/chapter11/discreteRnd.m +++ b/chapter11/discreteRnd.m @@ -11,4 +11,4 @@ end r = rand(1,n); p = cumsum(p(:)); -[~,x] = histc(r,[0;p/p(end)]); +[~,~,x] = histcounts(r,[0;p/p(end)]); From 1501973c6d7632da70b424cbd28a7ec3b9529c23 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 17 Sep 2019 20:04:12 +0800 Subject: [PATCH 117/119] tweak discreteRnd --- chapter11/discreteRnd.m | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/chapter11/discreteRnd.m b/chapter11/discreteRnd.m index 5b811c2..93bcc63 100644 --- a/chapter11/discreteRnd.m +++ b/chapter11/discreteRnd.m @@ -9,6 +9,4 @@ if nargin == 1 n = 1; end -r = rand(1,n); -p = cumsum(p(:)); -[~,~,x] = histcounts(r,[0;p/p(end)]); +[~,~,x] = histcounts(rand(1,n),[0;cumsum(p(:))]); From d86f1a92e53ded0d8b73b518eaf8c0dde6ecca2e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 19 Dec 2019 13:40:21 +0800 Subject: [PATCH 118/119] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ec97ac6..9043b3c 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Note: this package requires Matlab **R2016b** or latter, since it utilizes a new Design Goal ------- * Succinct: The code is extremely compact. Minimizing code length is a major goal. As a result, the core of the algorithms can be easily spotted. -* Efficient: Many tricks to speedup Matlab code are applied (eg. vectorization, matrix factorization, etc.). Usually, functions in this package are orders faster than Matlab builtin ones (e.g. kmeans). -* Robust: Many tricks for numerical stability are applied, such as computing probability in log domain, square root matrix update to enforce matrix symmetry\PD, etc. +* Efficient: Many tricks for speeding up Matlab code are applied (e.g. vectorization, matrix factorization, etc.). Usually, functions in this package are orders faster than Matlab builtin ones (e.g. kmeans). +* Robust: Many tricks for numerical stability are applied, such as computing probability in logrithm domain, square root matrix update to enforce matrix symmetry\PD, etc. * Readable: The code is heavily commented. Corresponding formulas in PRML are annoted. Symbols are in sync with the book. * Practical: The package is not only readable, but also meant to be easily used and modified to facilitate ML research. Many functions in this package are already widely used (see [Matlab file exchange](http://www.mathworks.com/matlabcentral/fileexchange/?term=authorid%3A49739)). From baac49f643db6b39e75307d3b21307b32b29a7a9 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 4 Mar 2020 21:42:10 +0800 Subject: [PATCH 119/119] fix kernel kmeans --- chapter06/knGauss.m | 2 +- chapter06/knKmeans.m | 7 ++++++- demo/ch06/knKmeans_demo.m | 7 +++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/chapter06/knGauss.m b/chapter06/knGauss.m index d19820b..4832e8d 100755 --- a/chapter06/knGauss.m +++ b/chapter06/knGauss.m @@ -8,7 +8,7 @@ % K: nx x ny kernel matrix % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 - s = 1; + s = 0.4; end if nargin < 2 || isempty(Y) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 60e9032..c3af5a1 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,4 +1,4 @@ -function [label, energy] = knKmeans(K, init) +function [label, model, energy] = knKmeans(X, init, kn) % Perform kernel kmeans clustering. % Input: % K: n x n data matrix @@ -10,6 +10,7 @@ % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). +K = kn(X,X); n = size(K,2); if numel(init)==1 k = init; @@ -26,4 +27,8 @@ [val, label] = max(T-dot(T,E,2)/2,[],1); end energy = trace(K)-2*sum(val); +model.kn = kn; +model.label = label; +model.X = X; + diff --git a/demo/ch06/knKmeans_demo.m b/demo/ch06/knKmeans_demo.m index 4d1882a..50e7bfc 100644 --- a/demo/ch06/knKmeans_demo.m +++ b/demo/ch06/knKmeans_demo.m @@ -5,8 +5,8 @@ n = 200; [X, y] = kmeansRnd(d,k,n); init = ceil(k*rand(1,n)); -K = knLin(X,X); -label = knKmeans(K,init); + +label = knKmeans(X,init,@knLin); label0 = kmeans(X,init); maxdiff(label,label0) @@ -17,7 +17,6 @@ X = [x1,x1+pi/2; x2,-x2]; -K = knGauss(X,X,0.4); -label = knKmeans(K,2); +label = knKmeans(X,2,@knGauss); figure; plotClass(X,label); \ No newline at end of file