Last active
August 29, 2015 14:07
-
-
Save wanirepo/d196c90becd1c1a4f723 to your computer and use it in GitHub Desktop.
Uses the Kernighan-Lin (KL) heuristic to optimize any partition score function.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [bestL, bestP, info] = KL_heuristic_k2(A, varargin) | |
% usage: [bestL, bestP, info] = KL_heuristic_k2(A, varargin) | |
% | |
% feature: use the Kernighan-Lin (KL) heuristic to optimize any partition | |
% score function, e.g., modularity Q or stochastic block model's | |
% likelihood function. This works only for 2 partitioning problem. | |
% | |
% input: A adjacency matrix | |
% | |
% output: bestL maximum likelihood over all j, n-j paritioning. | |
% bestP the best partitioning that provides maximum likelihood | |
% info.L likelihood over t rounds. | |
% | |
% optional input: | |
% 'modularity' This option uses modularity Q as a partitioing | |
% score function. | |
% 'fixed' This option assigns a fixed partitioning membership | |
% to certain vertices (e.g., known vertices). | |
% 'fixed' should be followed by two vectors. One is the | |
% vertex index/number that to be fixed. Also you need to | |
% provide the partitioning. | |
% e.g., 'fixed', fixed, z (see the below example) | |
% 'dcsbm' This option uses degree-corrected SBM likelihood as | |
% a partitioing score function. | |
% | |
% ** TERMINATION OPTIONS ** | |
% | |
% 'run_to_n'{default} The algorithm is repeated n times. | |
% 'early_stop' The algorithm is terminated at the point where | |
% the score (e.g., log-likelihood) doesn't increase | |
% 'lag' The algorithm is terminated when the score does | |
% not increase lagn (which should be specified) | |
% times. e.g., KL_heuristic_k2(A, 'lag', 5) | |
% | |
% % example | |
% A = [0 1 1 0 0 0; 1 0 1 0 0 0; 1 1 0 1 0 0; 0 0 1 0 1 1; ... | |
% 0 0 0 1 0 1; 0 0 0 1 1 0]; | |
% fixed = [1,3]; | |
% z = [1,2]; | |
% [bestL, bestP] = KL_heuristic_k2(A, 'fixed', fixed, z) | |
% | |
% All calculations are based on the lecture note of Aaron Clauset's | |
% Network analysis and modeling class (Fall 2014). | |
% see http://tuvalu.santafe.edu/~aaronc/courses/5352/ | |
n = size(A,1); | |
P0 = ones(1,n); | |
fixed = false(n,1); | |
scoref = @(A,P) SBM_likelihood(A,P); % SBM is a default | |
run_to_n = true; % default | |
do_earlystop = false; | |
do_lag = false; | |
for i = 1:length(varargin) | |
if ischar(varargin{i}) | |
switch varargin{i} | |
% score function | |
case {'modularity'} | |
scoref = @(A,P) modularity_wani(A,P); | |
case {'fixed'} | |
fixed(varargin{i+1}) = true; % fixed some vertices | |
P0(varargin{i+1}) = varargin{i+2}; % assign groups | |
case {'dcsbm'} | |
scoref = @(A,P) SBM_likelihood(A,P, 'dcsbm'); | |
case {'run_to_n'} | |
% do nothing | |
case {'early_stop'} | |
% stop at the point where L does not increase | |
do_earlystop = true; run_to_n = false; | |
case {'lag'} % stop if L does not increase lagn times. | |
do_lag = true; lagn = varargin{i+1}; | |
run_to_n = false; | |
end | |
end | |
end | |
for j = 1:((n-sum(fixed))/2) | |
% t = 1 | |
P{j,1} = rand_partition(n,j,fixed,P0); % random (j,n-j) partition | |
L(j,1) = scoref(A,P{j,1}); % initial log-likelihood of P | |
% t = from 2 to ... | |
t = 1; | |
continuing = true; | |
testn = []; | |
testm = -Inf; | |
while continuing | |
P1 = P{j,t}; | |
t = t + 1; | |
[P{j,t}, L(j,t)] = swapping(A, P1, fixed, scoref); | |
if do_earlystop % stop at the point where L does not increase | |
continuing = L(j,t) > L(j,t-1); | |
elseif do_lag % stop if L does not increase lagn times. | |
testm = max(testm, L(j,t)); | |
if L(j,t) < testm || L(j,t) == L(j,t-1) | |
testn(end+1) = 1; | |
else | |
testn = []; | |
end | |
continuing = (sum(testn) < lagn) && (t < n); % upper limit is n | |
elseif run_to_n | |
continuing = t < n; | |
end | |
end | |
end | |
L(L==0) = -Inf; | |
bestL = max(max(L)); | |
[i,j] = find(L == bestL); | |
bestP = P{i,j}; | |
% save P and L for t (the number of rounds) | |
% info.P = P; | |
info.L = L; | |
end | |
% =============== SUBFUNCTIONS =============== | |
function [P, P_idx] = rand_partition(n,j,fixed,P) | |
% CHOOSE a random (j, n-j)-partition | |
fn = sum(fixed); | |
temp_P_idx = datasample(1:(n-fn),j,'Replace',false); | |
P_idx = find(~fixed); | |
P_idx = P_idx(temp_P_idx); | |
P(P_idx) = 2; | |
end | |
function [P, L] = swapping(A, P0, fixed_s, scoref) | |
% GET possible pairs that can be swapped. | |
[~, min_idx] = min([sum(P0(~fixed_s)==1), sum(P0(~fixed_s)==2)]); | |
for i = 1:sum(P0(~fixed_s) == min_idx) | |
nonfix_idx = find(~fixed_s); | |
eta_i = datasample(nonfix_idx(P0(~fixed_s)==1), 1); | |
eta_j = datasample(nonfix_idx(P0(~fixed_s)==2), 1); | |
P0(eta_i)=2; | |
P0(eta_j)=1; | |
fixed_s(eta_i) = true; | |
fixed_s(eta_j) = true; | |
P_temp(i,:) = P0; | |
L_temp(i) = scoref(A, P_temp(i,:)); | |
end | |
[~, max_idx] = max(L_temp); | |
L = L_temp(max_idx); | |
P = P_temp(max_idx,:); | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment