Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry4/pniClassifyF.m (2,775 bytes)
function [ pred ] = pniClassifyA(data, target)
%PNIALISTAIR	Alistair's initial entry
%	[ pred ] = pniAlistair(data) calculates a mortality prediction for each
%	each row (observation/subject) in data
%
%   The score uses the following variables:
%		
%
%	Inputs:
%		data    - Cell array of data.
%           Column 1 - Subject IDs
%           Column 2 - Time stamp vectors for each subject
%           Column 3 - Feature name vectors for each subject
%           Column 4 - Data value vectors for each subject
%
%	Outputs:
%		pred   - Column vector of predictions
%
%	Example
%       %=== Load data in
%       load('data_processed_cell.mat');
%       
%       %=== Calculate prediction
%       [ score ] = pniAlistair(data);
%
%	See also PNMAIN PNPREPROCESSDATA

%	References:
%       Physionet Challenge 2012
%       http://physionet.org/challenge/2012/

%	Copyright 2012 Alistair Johnson

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-04-24 17:46:42 +0100 (Tue, 24 Apr 2012) $
%	$Revision: 339 $
%	Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:40:03
%	Contact: alistairewj@gmail.com

pred = zeros(size(data,1),1);
header_extract = {'Urine','Platelets','BUN','Creatinine','PaO2',...
    'PaCO2','pH','HR','Temp','Age',...
    'FiO2','NIMAP','MAP'}; % fields to extract high/low data from


[low,h_L] = pnSubsampleData(data, 60*24,'low',header_extract(2:end)); % Lowest data for 24 hours
[high,h_H] = pnSubsampleData(data, 60*24,'high',header_extract(2:end)); % Highest data for 24 hours

%=== Split into training + test
[ idxK, idxTraining, idxTest ] = pnCreateIndices;
idxTraining = idxTraining(:,4); idxTest = idxTest(:,4);

trainL = low(idxTraining,:);
trainH = high(idxTraining,:);
testL = low(idxTest,:);
testH = high(idxTest,:);

%=== Impute data as needed
medianL = nanmedian(trainL,1);
medianH = nanmedian(trainH,1);

for k=1:size(trainL,2)
    trainL(isnan(trainL(:,k)),k) = medianL(k);
    testL(isnan(testL(:,k)),k) = medianL(k);
end
for k=1:size(trainH,2)
    trainH(isnan(trainH(:,k)),k) = medianH(k);
    testH(isnan(testH(:,k)),k) = medianH(k);
end

train = [trainL, trainH];
test = [testL, testH];

%=== Develop model
[trainNormalized,testNormalized] = normalizeData(train,test);
mdl.svm = svmtrain(data_target(idxTraining),trainNormalized,'-b 1');
mdl.glm = glmfit(train,data_target(idxTraining),'binomial');
mdl.rf = TreeBagger(200,train,data_target(idxTraining));

%=== Output predictions
pred.svm = svmpredict(data_target(idxTest),testNormalized,mdl.svm,'-b 1');
pred.glm = glmval(mdl.glm,test,'logit');
[~,pred.rf] = predict(mdl.rf,test); pred.rf = pred.rf(:,2);


stats = structfun(@(x) stat_calc_struct(x,data_target(idxTest)), pred);
end