Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(2,439 bytes)
function [ width ] = NicForest_CalculateWidth(xtrain,ytrain,opt)
%NICFOREST_CALCULATEWIDTH Calculate width using a mini-forest or
%regression
% [ width ] = NicForest_CalculateWidth(xtrain,ytrain,opt) calculates a
% reasonable starting value for the width parameter for the ensemble
% forest development.
%
% Inputs:
% xtrain - Training features
% ytrain - Training targets
% opt - Number of trees to be used in development
%
% Outputs:
% opt.Width - Scalar initial value for the intercept prior's width
%
% Example
% [ width ] = NicForest_CalculateWidth(xtrain)
%
% See also NICFOREST NICFOREST_TRAIN
% $LastChangedBy: alistair $
% $LastChangedDate: 2012-05-30 12:21:30 +0100 (Wed, 30 May 2012) $
% $Revision: 21 $
% Originally written on GLNXA64 by Alistair Johnson, 09-May-2012 16:26:13
% Contact: alistairewj@gmail.com
Ntrees = opt.Trees;
num_tar = numel(unique(ytrain));
if num_tar==1
error('Only one class provided');
elseif num_tar==2 % binary classification
%=== Do a quick MCMC to find a reasonable width
opt = forest_opt_set(opt,...
'Iterations',20000,...
'Save',2000,...
'Resets', 1,...
'UpdatedTrees', 2,...
'BurnIn', 20, ...
'Width', []);
%=== Split into 2 folds + train 2 models
idxSplit = false(size(xtrain,1),1);
idxSplit(1:2:end) = true;
% group=opt.Group;
% group_uniq = unique(group,'first');
%=== if there is more than 1 group, ignore it for width estimation
% this is probably temporary
opt1 = opt; opt2 = opt;
N1 = sum(idxSplit); N2 = numel(idxSplit)-N1;
opt1.Group = ones(N1,1);
opt2.Group = ones(N2,1);
[ forests1 ] = NicForest_train(xtrain(idxSplit,:),ytrain(idxSplit,:),opt1);
[ forests2 ] = NicForest_train(xtrain(~idxSplit,:),ytrain(~idxSplit,:),opt2);
ypred = zeros(size(ytrain,1),1);
[ ypred1 ] = NicForest_apply_quick( forests1 , xtrain(~idxSplit,:) );
ypred(~idxSplit) = ypred1;
[ ypred2 ] = NicForest_apply_quick( forests2 , xtrain(idxSplit,:) );
ypred(idxSplit) = ypred2;
% width^2*Ntrees/4 ~ var(logit(Pi)) where Pi is the pred from reasonable model.
width = 2*sqrt(var(logit(ypred))/Ntrees);
else
width = 2*Ntrees^-0.5; % ytrain variance is normalized to be 1 in fcn
% width = sqrt(4*var(ytrain)/Ntrees);
end
end
function [p] = logit(p) % logit
p = log(p) - log(1-p);
end