Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(3,203 bytes)
function pred = apply_tree( tree , xtrain , NaNNbre, xtrain_normalized )
% APPLY_TREE calculates a single tree's contribution to the prediction
%
% pred = apply_tree( tree, xtrain, NaNNbre) calculates the contributions
% from tree on data in xtrain. NaNNbre is the number of non-NaN
% entries in each column of xtrain.
%
% pred = apply_tree( tree, xtrain, NaNNbre, xtrain_normalized) allows the
% user to input xtrain_normalized, which is the value of xtrain if it
% were used to generate a normal variable with zero mean and unit
% standard deviation. This will speed up the function.
%
% tree is a vector has the following parameters
% - 1-3: Variables indices for first three nodes
% - 4-5: Threshods for nodes 1 and 2
% - 6: slope
% - 7-9 : missing value param for nodes 1 to 3
% - 10 : architechture type (1,2,3,4), i.e. location of the final node in the tree
% - 11: intercept
% $LastChangedBy: alistair $
% $LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $
% $Revision: 10 $
% Originally written on GLNXA64 by Alistair Johnson, 09-May-2012 16:26:13
% Contact: alistairewj@gmail.com
N = size(xtrain,1);
pred=zeros(N,1);
%% First Node - split the data based upon tree(4), which represents a fractional rank (i.e., data ranks are from 1-10, tree(4) = 0.4, then the split is 4)
% first split value [0,1] * number of non-nan values
obs1 = xtrain(:,tree(1))>tree(4)*NaNNbre(tree(1));
% rank greater than...
% if number of NaNs * missingness for that param < number of values in the split
if(NaNNbre(tree(1))*tree(7)<sum(obs1)) % tree(7) is a factor which indicates whether missing values should be kept in this split
obs1(isnan(obs1))=1;
end % missing values
% obs1 is now all the values to the right of the split
if(tree(10)<3) obs1=~obs1; end % tree structure
%% Second Node - get the indices of the observations in the 2nd split
obs2=xtrain(:,tree(2))>tree(5)*NaNNbre(tree(2));
if(NaNNbre(tree(2))*tree(8)<sum(obs2)) obs2(isnan(obs2))=1; end
if(mod(tree(10),2)==0) obs2=~obs2; end
%% Third node (final leaf) - only use observations that satisfy both splits
obs1 = obs1 & obs2; % take the intersection
% now s1 aren't missing, s2 are missing (again indices logicals)
obs1nans = obs1 & isnan(xtrain(:,tree(3)));
obsTot = obs1 & ~isnan(xtrain(:,tree(3)));
% take the ranks, assume normal distribution and synthetize the
% data: don't give a #*& about the values
% THIS STEP SHOULD BE DONE BEFORE THE MCMC LOOP.
if nargin<4
xtrain_normalized = norminv(xtrain(obsTot,tree(3))/NaNNbre(tree(3)));
else
xtrain_normalized = xtrain_normalized(obsTot,tree(3));
end
% %=== Center the normalization on the patients in the regression subset
% xtrain_normalized = xtrain_normalized - mean(xtrain_normalized);
temp2=tree(6)*sort(xtrain_normalized);
if(sum(obsTot)>0) % if you have at least one non-missing value
% allocate missing value score, intercept + synthetic data based on t7(9)
% synthetic data is the value closest to t9
pred(obs1nans) = tree(11) + temp2(round(0.5+tree(9)*sum(obsTot)));
end
% non-missong
pred(obsTot) = tree(11) + tree(6)*xtrain_normalized;
pred(isnan(pred))=0;