Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(9,715 bytes)
function [ score ] = pniAndrew(data)
%PNIANDREW Andrew's initial entry - severity of illness score
% [ score ] = pniAndrew(data) calculates a severity of illness score for
% each row (observation) in data
%
% The score uses the following variables:
% urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
% heart_rate, temperature, BP, and age.
%
% Inputs:
% data - Cell array of data.
% Column 1 - Subject IDs
% Column 2 - Time stamp vectors for each subject
% Column 3 - Feature name vectors for each subject
% Column 4 - Data value vectors for each subject
%
% Outputs:
% score - Column vector of severity scores
%
% Example
% %=== Load data in
% load('data_processed_cell.mat');
%
% %=== Calculate score
% [ score ] = pniAndrew(data);
%
% See also PNMAIN PNPREPROCESSDATA
% References:
% Physionet Challenge 2012
% http://physionet.org/challenge/2012/
%
% APACHE III (1991)
% Knaus WA, Wagner DP, Draper EA, Zimmerman JE, Bergner M,
% Bastos PG, Sirio CA, Murphy DJ, Lotring T, Damiano A, et al.
% The APACHE III prognostic system. Risk prediction of hospital
% mortality for critically ill hospitalized adults.
% Chest 100(6):1619-1636 (1991 Dec).
% Copyright 2012 Alistair Johnson and Andrew Kramer
% $LastChangedBy: alistair $
% $LastChangedDate: 2012-04-24 22:39:24 +0100 (Tue, 24 Apr 2012) $
% $Revision: 342 $
% Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:39:14
% Contact: alistairewj@gmail.com
%=== Variables used in severity of illness score
header = {'Urine','Platelets','BUN','Creatinine','PaO2',...
'PaCO2','PaFi','pH','HR','Temp','BP','Age'};
nVar = numel(header);
N = size(data,1);
% 1. The severity of illness score is initialized at zero. All component weights are set = 0;
score=zeros(N,nVar);
% 2. There are 12 components: urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
% heart_rate, temperature, BP, and age.
header = {'Urine','Platelets','BUN','Creatinine','PaFi','PaO2',...
'PaC2','pH','HR','Temp','BP','Age'}; % Fields used in score
header_extract = {'Urine','Platelets','BUN','Creatinine','PaO2',...
'PaCO2','pH','HR','Temp','Age',...
'FiO2','NIMAP','MAP'}; % Fields to extract high/low data from
%=======================%
%=== DATA EXTRACTION ===%
%=======================%
[low,h_L] = pnSubsampleData(data, 60*48,'low',header_extract(2:end)); % Lowest data for 48 hours
[high,h_H] = pnSubsampleData(data, 60*48,'high',header_extract(2:end)); % Highest data for 48 hours
[urine_L,u_L] = pnSubsampleData(data,60*24,'low',header_extract(1));
[urine_H,u_H] = pnSubsampleData(data,60*24,'high',header_extract(1));
%=== Only use data from first 24 hours for urine
low = [urine_L(:,1),low]; h_L = [u_L(1),h_L];
high = [urine_H(:,1),high]; h_H = [u_H(1),h_H];
h_L = cellfun(@(x) strrep(x,'_0',''), h_L, 'UniformOutput',false);
h_H = cellfun(@(x) strrep(x,'_0',''), h_H, 'UniformOutput',false);
%=== Load data
dataL = zeros(size(low,1),numel(header));
dataH = zeros(size(high,1),numel(header));
for k=1:nVar
switch header{k}
case {'Urine','Platelets','BUN','Creatinine','PaO2',...
'HR','Temp','Age'}
dataL(:,k) = low(:,strcmp(h_L,header{k}));
dataH(:,k) = high(:,strcmp(h_L,header{k}));
case 'PaFi'
%=== Extract data for PaO2 and FiO2
pafi = pnPaFi(data);
pafi = pafi(:,4);
%=== Impute NaNs
idxEmpty = cellfun(@isempty, pafi);
pafi(idxEmpty) = {NaN};
dataL(:,k) = cellfun(@(x) min(x), pafi);
dataH(:,k) = cellfun(@(x) max(x), pafi);
case 'PaC2'
%=== Extract data for PaCO2 / FiO2
pac2 = pnPaC2(data);
pac2 = pac2(:,4);
%=== Impute NaNs
idxEmpty = cellfun(@isempty, pac2);
pac2(idxEmpty) = {NaN};
dataL(:,k) = cellfun(@(x) min(x), pac2);
dataH(:,k) = cellfun(@(x) max(x), pac2);
case 'BP'
dataL(:,k) = low(:,strcmp(h_L,'NIMAP'));
dataH(:,k) = high(:,strcmp(h_L,'NIMAP'));
% 3. For the variables BP_min & BP_max, they were derived from NiMAP_min & NiMAP_max
% unless those variables were missing, in which case BP_min & BP_max came from MAP_min &
% MAP_max.
idxMissing = isnan(dataL(:,k));
dataL(idxMissing,k) = low(idxMissing,strcmp(h_L,'MAP'));
idxMissing = isnan(dataH(:,k));
dataH(idxMissing,k) = high(idxMissing,strcmp(h_H,'MAP'));
case 'pH'
% pH_num, the number of times pH is measured.
% a. If pHnum = 0, pH_weight = 0
% b. If pH_num = {1, 2, 3, 4, 5, 6} then pH_weight = 4
% c. If pH_num ≥ 6 then pH_weight = 2
pH = pnExtractField(data,'pH');
dataH(:,k) = cellfun(@(x) numel(x), pH(:,2));
end
end
%=== clear uneeded vars
clear h_H h_L u_H u_L urine_H urine_L header_extract
%=======================%
%=== CALCULATE SCORE ===%
%=======================%
for k=1:nVar
low = dataL(:,k);
high = dataH(:,k);
idxNotMissing = ~isnan(low) & ~isnan(high);
switch header{k}
% 4. Urine, BUN, creatinine, PaO2, heart_rate, temperature, BP, & age use the same weighting
% structure as in APACHE.
case {'Urine','BUN','Creatinine','PaO2','HR','Temp','BP','Age'}
switch header{k}
case 'Urine'
lower_limit = [0,400,600,900,1500,2000,4000];
upper_limit = [399,599,899,1499,1999,3999,Inf];
temp_score = [15,8,7,5,4,0,1];
case 'BUN'
lower_limit = [0,17,20,40,80]; % mg/dL
upper_limit = [16.9,19,39,79,Inf];
temp_score = [0,2,7,11,12];
case 'Creatinine'
lower_limit = [0,0.5,1.5,1.95];
upper_limit = [0.4,1.4,1.94,Inf];
temp_score = [3,0,4,7];
case 'PaO2' % mmHg
lower_limit = [0,50,70,80];
upper_limit = [49,69,79,Inf];
temp_score = [15,5,2,0];
case 'HR'
lower_limit = [0,40,50,100,110,120,140,155];
upper_limit = [39,49,99,109,119,139,154,Inf];
temp_score = [8,5,0,1,5,7,13,17];
case 'Temp'
lower_limit = [0,33,33.5,34,35,36,40];
upper_limit = [32.9,33.4,33.9,34.9,35.9,39.9,Inf];
temp_score = [28,16,13,8,2,0,4];
case 'BP'
lower_limit = [0,40,60,70,80,110,120,130,140];
upper_limit = [39,59,69,79,99,119,129,139,Inf];
temp_score = [23,15,7,6,0,4,7,9,10];
case 'Age'
lower_limit = [0,45,60,65,70,75,85];
upper_limit = [44,59,64,69,74,84,Inf];
temp_score = [0,5,11,13,16,17,24];
end
scoreL=zeros(N,1); scoreH=zeros(N,1);
idxUsedL = false(size(low));
idxUsedH = false(size(high));
for m=1:numel(temp_score)
idxAddL = low>=lower_limit(m) & low<=upper_limit(m) & ~idxUsedL;
idxAddH = high>=lower_limit(m) & high<=upper_limit(m) & ~idxUsedH;
% Keep track of those already scored
idxUsedL = idxUsedL | idxAddL;
idxUsedH = idxUsedH | idxAddH;
scoreL(idxNotMissing & idxAddL) = temp_score(m);
scoreH(idxNotMissing & idxAddH) = temp_score(m);
end
score(idxNotMissing,k) = max([scoreL(idxNotMissing),scoreH(idxNotMissing)],[],2);
case 'pH'
% 5. pH is based on pH_num, the number of times pH is measured.
% a. If pHnum = 0, pH_weight = 0
% b. If pH_num = {1, 2, 3, 4, 5, 6} then pH_weight = 4
% c. If pH_num > 6 then pH_weight = 2
score(high>6,k) = 2;
score(high>0 & high<=6,k) = 4;
case 'PaFi'
% 6. The weight for PaFi ratio is calculated thus:
% a. If PaFi_min ne missing & PaFi_min ≤ 64 then PaFi_weight = 10
% b. If PaFi_min is between 65 to 86 then PaFi_weight = 6
score((idxNotMissing & low>64 & low <= 86),k) = 6;
score((idxNotMissing & low<=64),k) = 10;
case 'Platelets'
% 7. The weight for platelets is calculated thus:
% a. If Platelets_max > 380 then Platelets_weight = 3
% b. If Platelets_min ne missing & Platelets_min < 100 then Platelets_weight = 8
score((idxNotMissing & high>380),k) = 3;
score((idxNotMissing & low<100),k) = 8;
case 'PaC2' % mmHg
% 8. The weight for PaCO2 is calculated thus:
% a. If PaCO2_min ne missing and PaCO2_min < 2.241 then PaCO2_weight = 10
% b. If PaCO2_min is between 2.412 and 3.580 then PaCO2_weight = 2
score((idxNotMissing & low>=2.412 & low <=3.580),k) = 4;
score((idxNotMissing & low<2.241),k) = 10;
end
end
% 9. The weights for the 12 components get summed to arrive at the severity of illness score.
score = sum(score,2);
end