Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry8/NicForest_apply.m (4,254 bytes)
function [ ypred ] = NicForest_apply( f_out, xtest )
% [ ypred ] = NicForest_apply( forest, xtrain, xtest , ytest) applies a
% model in forests, originally trained on xtrain, to xtest. ytest is used
% to determine if it is a binary or regression problem.

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $
%	$Revision: 10 $
%	Originally written on MACI64 by Louis Mayaud, 25-April-2012 14:05:26
%	Contact: alistairewj@gmail.com

forests = f_out.forests;
xtrain = f_out.xtrain;
NaNNbre = f_out.TrainNaN; % number of non-nans in training data set
N = size(xtest,1);

idxTraining = [false(size(xtest,1),1);true(size(xtrain,1),1)]; % elements used for ranking
xtest_rk = tiedrankrelative([xtest;xtrain],idxTraining);
xtest_rk = xtest_rk(1:size(xtest,1),:); % remove training set

%=== NORMALIZE DATA ===%
%=== Scale ranks between 0->1
xtest_rk_normalize = bsxfun(@rdivide,xtest_rk,NaNNbre);
xtest_rk_normalize = norminv(xtest_rk_normalize,0,1);


[NForests , ~ , Ntrees]  = size(forests);
Ntrees = Ntrees -1 ; % remove the intercept


% create prediction vector for each forest
ypred=zeros(size(xtest_rk,1),NForests);
% For each forest
for i=1:NForests
	ypred(:,i)=ypred(:,i)+forests(i,1,Ntrees+1);
    val = zeros(N,1);
    for j=1:Ntrees % For each tree
        val= val + apply_tree( forests(i,:,j) , xtest_rk , NaNNbre );    
        
    end
    ypred(:,i)=ypred(:,i)+val;
end

%=== invlogit if binomial case
%=== method: regression or classification
if strcmp(f_out.Family,'binomial')
    ypred = invlogit(ypred);
    %=== sum across forests and normalize by number of forests
    ypred = sum(ypred,2)/NForests;
else
    % un-normalize predictions using normcdf
    ypred = sum(ypred,2)/NForests;
    
    %=== re-scale to original values
    ypred = normcdf(ypred,0,1)*f_out.ynum;
    
    %=== use inverse ranking mapping to find actual predicted values
    %=== first get the ranks of the predictions/training values
    yrk = [f_out.yrk;ypred]; 
    idxPred = [false(size(f_out.yrk));true(size(ypred))];
    
    [yrk,idxSort] = sort(yrk,1,'ascend');
    idxPred = idxPred(idxSort);
    
    %=== now order the actual y values in the training set similarly
    ytrain = zeros(numel(yrk),1);
    ytrain(~idxPred) = f_out.ytrain(idxSort(~idxPred));
    
    %=== now calculate predictions using weighted average
    ypred_sorted = propogateValues(ytrain,yrk);
    idxUnsort = 1:numel(idxSort);
    idxUnsort(idxSort) = idxUnsort;
    
    ypred_sorted = ypred_sorted(idxUnsort);
    ypred = ypred_sorted(idxPred(idxUnsort));
end

end

function [x] = invlogit(x)
x = 1./(1+exp(-x));
end


function [ x ] = propogateValues(x,y)
%PROPOGATEVALUES	Carry non-zero values forward in vector
%	[ x ] = propogateValues(x) 
%	
%
%	Inputs:
%		x   - A vector containing 0s and non-zeroes, in which the zeroes
%           should be replaced with the first prior non-zero value.
%       y   - A vector containing ranks of x, with no 0s.
%
%	Outputs:
%		x   - A vector with 0s replaced by the first prior non-zero value.
%		
%
%	Example
%       x = [0,1,0,0,6,0,0,0,5,0]
%		[ x ] = propogateValues(x) 
%       
%	See also FIND DIFF


%	Copyright 2012 Alistair Johnson

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $
%	$Revision: 10 $
%	Originally written on GLNXA64 by Alistair Johnson, 08-May-2012 11:27:49
%	Contact: alistairewj@gmail.com

valind = find(x==0,1,'first');
%=== handle the extrenum
if valind(1) == 1
    
end
valind = find(x==0,1,'last');
if valind(end) == numel(x)
    
end

idx0 = x==0;
x1 = x; x2 = flipud(x);
valind1 = find(x1);
valind2 = find(x2);
%=== x1 = propogate values forward
x1(valind1(2:end)) = diff(x1(valind1));
x1 = cumsum(x1);
%=== x2 = propogate values backward
x2(valind2(2:end)) = diff(x2(valind2));
x2 = cumsum(x2);
x2=flipud(x2);
%=== y1/y2 are the same but using the ranks
y1=y; y1(idx0) = 0; 
y2=y; y2(idx0) = 0; y2=flipud(y2);
y1(valind1(2:end)) = diff(y1(valind1));
y1 = cumsum(y1);
y2(valind2(2:end)) = diff(y2(valind2));
y2 = cumsum(y2);
y2=flipud(y2);

%=== calculate weights using y values which are ranks
y1=y1(idx0); y2=y2(idx0); x1=x1(idx0); x2=x2(idx0);
y0 = y(idx0);
x(idx0) = ((y0-y1).*x2 + (y2-y0).*x1)./(y2-y1);

end