Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry6/NicForest_CalculateWidth.m (2,439 bytes)
function [ width ] = NicForest_CalculateWidth(xtrain,ytrain,opt)
%NICFOREST_CALCULATEWIDTH	Calculate width using a mini-forest or
%regression
%	[ width ] = NicForest_CalculateWidth(xtrain,ytrain,opt) calculates a 
%   reasonable starting value for the width parameter for the ensemble 
%   forest development.
%
%	Inputs:
%		xtrain      - Training features
%		ytrain      - Training targets
%       opt         - Number of trees to be used in development
%
%	Outputs:
%		opt.Width   - Scalar initial value for the intercept prior's width
%		
%	Example
%		[ width ] = NicForest_CalculateWidth(xtrain)
%	
%	See also NICFOREST NICFOREST_TRAIN

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-05-30 12:21:30 +0100 (Wed, 30 May 2012) $
%	$Revision: 21 $
%	Originally written on GLNXA64 by Alistair Johnson, 09-May-2012 16:26:13
%	Contact: alistairewj@gmail.com

Ntrees = opt.Trees;
num_tar = numel(unique(ytrain));
if num_tar==1
    error('Only one class provided');
elseif num_tar==2 % binary classification
    %=== Do a quick MCMC to find a reasonable width
    opt = forest_opt_set(opt,...
        'Iterations',20000,...
        'Save',2000,...
        'Resets', 1,...
        'UpdatedTrees', 2,...
        'BurnIn', 20, ...
        'Width', []);
    
    %=== Split into 2 folds + train 2 models
    idxSplit = false(size(xtrain,1),1);
    idxSplit(1:2:end) = true;
    
%     group=opt.Group;
%     group_uniq = unique(group,'first');
    
    %=== if there is more than 1 group, ignore it for width estimation
    % this is probably temporary
    opt1 = opt; opt2 = opt;
    N1 = sum(idxSplit); N2 = numel(idxSplit)-N1;
    opt1.Group = ones(N1,1);
    opt2.Group = ones(N2,1);
    
    [ forests1  ] = NicForest_train(xtrain(idxSplit,:),ytrain(idxSplit,:),opt1);
    [ forests2  ] = NicForest_train(xtrain(~idxSplit,:),ytrain(~idxSplit,:),opt2);
    
    ypred = zeros(size(ytrain,1),1);
    [ ypred1 ] = NicForest_apply_quick( forests1 , xtrain(~idxSplit,:) );
    ypred(~idxSplit) = ypred1;
    [ ypred2 ] = NicForest_apply_quick( forests2 , xtrain(idxSplit,:) );
    ypred(idxSplit) = ypred2;
    
    % width^2*Ntrees/4 ~ var(logit(Pi)) where Pi is the pred from reasonable model.
    width = 2*sqrt(var(logit(ypred))/Ntrees);
else
    
    width = 2*Ntrees^-0.5; % ytrain variance is normalized to be 1 in fcn
    % width = sqrt(4*var(ytrain)/Ntrees);
    
end

end

function [p] = logit(p) % logit
p = log(p) - log(1-p);
end