Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry6/NicForest_CalculateGroupPower.m (3,028 bytes)
function [ pwr ] = NicForest_CalculateGroupPower(xtrain,ytrain,opt)
%NICFOREST_CALCULATEGROUPPOWER	Calculate the power of the exponent for the
%groupings, if provided
%	[ width ] = NicForest_CalculateGroupPower(xtrain,ytrain,opt) calculates a 
%   reasonable starting value for the width parameter for the ensemble 
%   forest development.
%
%	Inputs:
%		xtrain      - Training features
%		ytrain      - Training targets
%       opt         - Number of trees to be used in development
%
%	Outputs:
%		opt.Width   - Scalar initial value for the intercept prior's width
%		
%	Example
%		[ width ] = NicForest_CalculateGroupPower(xtrain)
%	
%	See also NICFOREST NICFOREST_TRAIN

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-05-30 12:21:30 +0100 (Wed, 30 May 2012) $
%	$Revision: 21 $
%	Originally written on GLNXA64 by Alistair Johnson, 24-May-2012 13:32:13
%	Contact: alistairewj@gmail.com

if isempty(opt.Group) || numel(unique(opt.Group))==1
    %=== No groups input, do not calculate power // set it to 0
    pwr=0;
end
Ntrees = opt.Trees;

if strcmp(opt.Family,'binomial')
    likelihoodFcn = @ll;
else
    likelihoodFcn = @normll;
end

%=== Do a quick MCMC to find a reasonable width
opt = forest_opt_set(opt,...
    'Iterations',20000,...
    'Save',2000,...
    'Resets', 1,...
    'UpdatedTrees', 2,...
    'BurnIn', 20);

%=== Split into 2 folds + train 2 models
group_num = unique(opt.Group);
group_num = group_num(randperm(numel(group_num),numel(group_num)));
idxSplit = false(size(ytrain,1),1);
for k = 1: floor(numel(group_num)/2)
    idxSplit(opt.Group == group_num(k)) = true;
end

opt1 = opt; opt2 = opt;
opt1.Group = opt.Group(idxSplit);
opt2.Group = opt.Group(~idxSplit);

%=== Loop across different powers
pwr = 0:0.25:1;
Npwr = numel(pwr);
ll1 = zeros(Npwr,1);
ll2 = zeros(Npwr,1);
ypred = cell(2,Npwr);
for k=1:Npwr
    fprintf('Beginning power value %g - repetition %g of %g\n',pwr(k),k,Npwr);
    %=== Select a new value for power
    opt1.GroupPower = pwr(k);
    opt2.GroupPower = pwr(k);
    
    %=== Train models
    [ forests1  ] = NicForest_train(xtrain(idxSplit,:),ytrain(idxSplit,:),opt1);
    [ forests2  ] = NicForest_train(xtrain(~idxSplit,:),ytrain(~idxSplit,:),opt2);
    
    %=== Evaluate models
    [ ypred1 ] = NicForest_apply_quick( forests1 , xtrain(~idxSplit,:) );
    [ ypred2 ] = NicForest_apply_quick( forests2 , xtrain(idxSplit,:) );
    
    %=== Save likelihoods
    ll1(k) = feval(likelihoodFcn,ypred1,ytrain(~idxSplit),opt2.Group);
    ll2(k) = feval(likelihoodFcn,ypred2,ytrain(idxSplit),opt1.Group);
    ypred{1,k} = ypred1;
    ypred{2,k} = ypred2;
end
ll1
ll2
[minLL,idxMin] = min(ll1+ll2);
pwr = pwr(idxMin);
end


function [p] = invlogit(p) % inverse logit
p = 1./(1+exp(-p));
end

function [p] = ll(pred,tar,group) % log likelihood
pred = invlogit(pred);
p = sum((-tar.*log(pred)-(1-tar).*log(1-pred)).*group);
end

function [p] = normll(pred,tar,group) % log likelihood
sigma=std(tar);
p = log(sigma)*sum(group) + 0.5*sum(((tar-pred).^2).*group)/(sigma^2);
end