wanirepo · August 29, 2015 14:07
diff --git a/KL_heuristic_k2.m b/KL_heuristic_k2.m
 function [bestL, bestP, info] = KL_heuristic_k2(A, varargin)

 % usage: [bestL, bestP, info] = KL_heuristic_k2(A, varargin)
 %
 % feature: use the Kernighan-Lin (KL) heuristic to optimize any partition 
 %          score function, e.g., modularity Q or stochastic block model's
 %          likelihood function. This works only for 2 partitioning problem.
 %
 % input:   A        adjacency matrix
 %
 % output:  bestL    maximum likelihood over all j, n-j paritioning.
 %          bestP    the best partitioning that provides maximum likelihood
 %          info.L   likelihood over t rounds. 
 %
 % optional input:   
 %    'modularity'   This option uses modularity Q as a partitioing 
 %                   score function. 
 %    'fixed'        This option assigns a fixed partitioning membership 
 %                   to certain vertices (e.g., known vertices). 
 %                'fixed' should be followed by two vectors. One is the 
 %                   vertex index/number that to be fixed. Also you need to
 %                   provide the partitioning. 
 %                   e.g., 'fixed', fixed, z (see the below example)
 %    'dcsbm'        This option uses degree-corrected SBM likelihood as 
 %                   a partitioing score function. 
 %
 % ** TERMINATION OPTIONS **
 %
 %    'run_to_n'{default}  The algorithm is repeated n times.
 %    'early_stop'         The algorithm is terminated at the point where 
 %                         the score (e.g., log-likelihood) doesn't increase
 %    'lag'                The algorithm is terminated when the score does 
 %                         not increase lagn (which should be specified) 
 %                         times. e.g., KL_heuristic_k2(A, 'lag', 5)
 % 
 % % example
 % A = [0 1 1 0 0 0; 1 0 1 0 0 0; 1 1 0 1 0 0; 0 0 1 0 1 1; ...
 %      0 0 0 1 0 1; 0 0 0 1 1 0];
 % fixed = [1,3];
 % z = [1,2];
 % [bestL, bestP] = KL_heuristic_k2(A, 'fixed', fixed, z)
 %
 % All calculations are based on the lecture note of Aaron Clauset's 
 % Network analysis and modeling class (Fall 2014).
 % see  http://tuvalu.santafe.edu/~aaronc/courses/5352/

 n = size(A,1);
 P0 = ones(1,n);
 fixed = false(n,1);
 scoref = @(A,P) SBM_likelihood(A,P); % SBM is a default
 run_to_n = true; % default
 do_earlystop = false;
 do_lag = false;

 for i = 1:length(varargin)
    if ischar(varargin{i})
        switch varargin{i}
            % score function
            case {'modularity'}
                scoref = @(A,P) modularity_wani(A,P);
            case {'fixed'}
                fixed(varargin{i+1}) = true;       % fixed some vertices
                P0(varargin{i+1}) = varargin{i+2}; % assign groups
            case {'dcsbm'}
                scoref = @(A,P) SBM_likelihood(A,P, 'dcsbm'); 
            case {'run_to_n'}  
                % do nothing
            case {'early_stop'}  
                            % stop at the point where L does not increase
                do_earlystop = true; run_to_n = false;
            case {'lag'}    % stop if L does not increase lagn times. 
                do_lag = true; lagn = varargin{i+1}; 
                run_to_n = false;
        end
    end
 end

 for j = 1:((n-sum(fixed))/2)
    % t = 1
    P{j,1} = rand_partition(n,j,fixed,P0); % random (j,n-j) partition
    L(j,1) = scoref(A,P{j,1});             % initial log-likelihood of P
    
    % t = from 2 to ...
    t = 1;
    continuing = true;
    testn = [];
    testm = -Inf;
    while continuing
        P1 = P{j,t};
        t = t + 1;
        [P{j,t}, L(j,t)] = swapping(A, P1, fixed, scoref);
        
        if do_earlystop  % stop at the point where L does not increase
            continuing = L(j,t) > L(j,t-1);
        elseif do_lag    % stop if L does not increase lagn times. 
            testm = max(testm, L(j,t));
            if L(j,t) < testm || L(j,t) == L(j,t-1)
                testn(end+1) = 1;
            else
                testn = [];
            end
            continuing = (sum(testn) < lagn) && (t < n); % upper limit is n
        elseif run_to_n 
            continuing = t < n;
        end

    end
    
 end

 L(L==0) = -Inf;

 bestL = max(max(L));
 [i,j] = find(L == bestL);
 bestP = P{i,j};

 % save P and L for t (the number of rounds)
 % info.P = P;
 info.L = L;

 end

 % =============== SUBFUNCTIONS ===============

 function [P, P_idx] = rand_partition(n,j,fixed,P)

 % CHOOSE a random (j, n-j)-partition

 fn = sum(fixed);
 temp_P_idx = datasample(1:(n-fn),j,'Replace',false);
 P_idx = find(~fixed);
 P_idx = P_idx(temp_P_idx);
 P(P_idx) = 2;

 end

 function [P, L] = swapping(A, P0, fixed_s, scoref)

 % GET possible pairs that can be swapped.

 [~, min_idx] = min([sum(P0(~fixed_s)==1), sum(P0(~fixed_s)==2)]);

 for i = 1:sum(P0(~fixed_s) == min_idx)

    nonfix_idx = find(~fixed_s);
    eta_i = datasample(nonfix_idx(P0(~fixed_s)==1), 1);
    eta_j = datasample(nonfix_idx(P0(~fixed_s)==2), 1);
    
    P0(eta_i)=2;
    P0(eta_j)=1;
    
    fixed_s(eta_i) = true;
    fixed_s(eta_j) = true;
    
    P_temp(i,:) = P0;
    L_temp(i) = scoref(A, P_temp(i,:));
 end

 [~, max_idx] = max(L_temp);
 L = L_temp(max_idx);
 P = P_temp(max_idx,:);

 end
	function [bestL, bestP, info] = KL_heuristic_k2(A, varargin)

	% usage: [bestL, bestP, info] = KL_heuristic_k2(A, varargin)
	%
	% feature: use the Kernighan-Lin (KL) heuristic to optimize any partition
	% score function, e.g., modularity Q or stochastic block model's
	% likelihood function. This works only for 2 partitioning problem.
	%
	% input: A adjacency matrix
	%
	% output: bestL maximum likelihood over all j, n-j paritioning.
	% bestP the best partitioning that provides maximum likelihood
	% info.L likelihood over t rounds.
	%
	% optional input:
	% 'modularity' This option uses modularity Q as a partitioing
	% score function.
	% 'fixed' This option assigns a fixed partitioning membership
	% to certain vertices (e.g., known vertices).
	% 'fixed' should be followed by two vectors. One is the
	% vertex index/number that to be fixed. Also you need to
	% provide the partitioning.
	% e.g., 'fixed', fixed, z (see the below example)
	% 'dcsbm' This option uses degree-corrected SBM likelihood as
	% a partitioing score function.
	%
	% TERMINATION OPTIONS
	%
	% 'run_to_n'{default} The algorithm is repeated n times.
	% 'early_stop' The algorithm is terminated at the point where
	% the score (e.g., log-likelihood) doesn't increase
	% 'lag' The algorithm is terminated when the score does
	% not increase lagn (which should be specified)
	% times. e.g., KL_heuristic_k2(A, 'lag', 5)
	%
	% % example
	% A = [0 1 1 0 0 0; 1 0 1 0 0 0; 1 1 0 1 0 0; 0 0 1 0 1 1; ...
	% 0 0 0 1 0 1; 0 0 0 1 1 0];
	% fixed = [1,3];
	% z = [1,2];
	% [bestL, bestP] = KL_heuristic_k2(A, 'fixed', fixed, z)
	%
	% All calculations are based on the lecture note of Aaron Clauset's
	% Network analysis and modeling class (Fall 2014).
	% see http://tuvalu.santafe.edu/~aaronc/courses/5352/

	n = size(A,1);
	P0 = ones(1,n);
	fixed = false(n,1);
	scoref = @(A,P) SBM_likelihood(A,P); % SBM is a default
	run_to_n = true; % default
	do_earlystop = false;
	do_lag = false;

	for i = 1:length(varargin)
	if ischar(varargin{i})
	switch varargin{i}
	% score function
	case {'modularity'}
	scoref = @(A,P) modularity_wani(A,P);
	case {'fixed'}
	fixed(varargin{i+1}) = true; % fixed some vertices
	P0(varargin{i+1}) = varargin{i+2}; % assign groups
	case {'dcsbm'}
	scoref = @(A,P) SBM_likelihood(A,P, 'dcsbm');
	case {'run_to_n'}
	% do nothing
	case {'early_stop'}
	% stop at the point where L does not increase
	do_earlystop = true; run_to_n = false;
	case {'lag'} % stop if L does not increase lagn times.
	do_lag = true; lagn = varargin{i+1};
	run_to_n = false;
	end
	end
	end

	for j = 1:((n-sum(fixed))/2)
	% t = 1
	P{j,1} = rand_partition(n,j,fixed,P0); % random (j,n-j) partition
	L(j,1) = scoref(A,P{j,1}); % initial log-likelihood of P

	% t = from 2 to ...
	t = 1;
	continuing = true;
	testn = [];
	testm = -Inf;
	while continuing
	P1 = P{j,t};
	t = t + 1;
	[P{j,t}, L(j,t)] = swapping(A, P1, fixed, scoref);

	if do_earlystop % stop at the point where L does not increase
	continuing = L(j,t) > L(j,t-1);
	elseif do_lag % stop if L does not increase lagn times.
	testm = max(testm, L(j,t));
	if L(j,t) < testm \|\| L(j,t) == L(j,t-1)
	testn(end+1) = 1;
	else
	testn = [];
	end
	continuing = (sum(testn) < lagn) && (t < n); % upper limit is n
	elseif run_to_n
	continuing = t < n;
	end

	end

	end

	L(L==0) = -Inf;

	bestL = max(max(L));
	[i,j] = find(L == bestL);
	bestP = P{i,j};

	% save P and L for t (the number of rounds)
	% info.P = P;
	info.L = L;

	end

	% =============== SUBFUNCTIONS ===============

	function [P, P_idx] = rand_partition(n,j,fixed,P)

	% CHOOSE a random (j, n-j)-partition

	fn = sum(fixed);
	temp_P_idx = datasample(1:(n-fn),j,'Replace',false);
	P_idx = find(~fixed);
	P_idx = P_idx(temp_P_idx);
	P(P_idx) = 2;

	end

	function [P, L] = swapping(A, P0, fixed_s, scoref)

	% GET possible pairs that can be swapped.

	[~, min_idx] = min([sum(P0(~fixed_s)==1), sum(P0(~fixed_s)==2)]);

	for i = 1:sum(P0(~fixed_s) == min_idx)

	nonfix_idx = find(~fixed_s);
	eta_i = datasample(nonfix_idx(P0(~fixed_s)==1), 1);
	eta_j = datasample(nonfix_idx(P0(~fixed_s)==2), 1);

	P0(eta_i)=2;
	P0(eta_j)=1;

	fixed_s(eta_i) = true;
	fixed_s(eta_j) = true;

	P_temp(i,:) = P0;
	L_temp(i) = scoref(A, P_temp(i,:));
	end

	[~, max_idx] = max(L_temp);
	L = L_temp(max_idx);
	P = P_temp(max_idx,:);

	end
No results found