Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry1/pniAndrew.m (9,715 bytes)
function [ score ] = pniAndrew(data)
%PNIANDREW	Andrew's initial entry - severity of illness score
%	[ score ] = pniAndrew(data) calculates a severity of illness score for
%	each row (observation) in data
%
%   The score uses the following variables:
%		urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
%       heart_rate, temperature, BP, and age.
%
%	Inputs:
%		data    - Cell array of data.
%           Column 1 - Subject IDs
%           Column 2 - Time stamp vectors for each subject
%           Column 3 - Feature name vectors for each subject
%           Column 4 - Data value vectors for each subject
%
%	Outputs:
%		score   - Column vector of severity scores
%
%	Example
%       %=== Load data in
%       load('data_processed_cell.mat');
%
%       %=== Calculate score
%       [ score ] = pniAndrew(data);
%
%	See also PNMAIN PNPREPROCESSDATA

%	References:
%       Physionet Challenge 2012
%       http://physionet.org/challenge/2012/
%
%       APACHE III (1991)
%       Knaus WA, Wagner DP, Draper EA, Zimmerman JE, Bergner M,
%       Bastos PG, Sirio CA, Murphy DJ, Lotring T, Damiano A, et al.
%       The APACHE III prognostic system. Risk prediction of hospital
%       mortality for critically ill hospitalized adults.
%       Chest 100(6):1619-1636 (1991 Dec).

%	Copyright 2012 Alistair Johnson and Andrew Kramer

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-04-24 22:39:24 +0100 (Tue, 24 Apr 2012) $
%	$Revision: 342 $
%	Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:39:14
%	Contact: alistairewj@gmail.com

%=== Variables used in severity of illness score
header = {'Urine','Platelets','BUN','Creatinine','PaO2',...
    'PaCO2','PaFi','pH','HR','Temp','BP','Age'};
nVar = numel(header);
N = size(data,1);
% 1. The severity of illness score is initialized at zero. All component weights are set = 0;
score=zeros(N,nVar);

% 2. There are 12 components: urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
% heart_rate, temperature, BP, and age.
header = {'Urine','Platelets','BUN','Creatinine','PaFi','PaO2',...
    'PaC2','pH','HR','Temp','BP','Age'}; % Fields used in score
header_extract = {'Urine','Platelets','BUN','Creatinine','PaO2',...
    'PaCO2','pH','HR','Temp','Age',...
    'FiO2','NIMAP','MAP'}; % Fields to extract high/low data from

%=======================%
%=== DATA EXTRACTION ===%
%=======================%

[low,h_L] = pnSubsampleData(data, 60*48,'low',header_extract(2:end)); % Lowest data for 48 hours
[high,h_H] = pnSubsampleData(data, 60*48,'high',header_extract(2:end)); % Highest data for 48 hours

[urine_L,u_L] = pnSubsampleData(data,60*24,'low',header_extract(1));
[urine_H,u_H] = pnSubsampleData(data,60*24,'high',header_extract(1));

%=== Only use data from first 24 hours for urine
low = [urine_L(:,1),low]; h_L = [u_L(1),h_L];
high = [urine_H(:,1),high]; h_H = [u_H(1),h_H];

h_L = cellfun(@(x) strrep(x,'_0',''), h_L, 'UniformOutput',false);
h_H = cellfun(@(x) strrep(x,'_0',''), h_H, 'UniformOutput',false);
%=== Load data
dataL = zeros(size(low,1),numel(header));
dataH = zeros(size(high,1),numel(header));
for k=1:nVar
    switch header{k}
        case {'Urine','Platelets','BUN','Creatinine','PaO2',...
                'HR','Temp','Age'}
            dataL(:,k) = low(:,strcmp(h_L,header{k}));
            dataH(:,k) = high(:,strcmp(h_L,header{k}));
        case 'PaFi'
            %=== Extract data for PaO2 and FiO2
            pafi = pnPaFi(data);
            pafi = pafi(:,4);
            %=== Impute NaNs
            idxEmpty = cellfun(@isempty, pafi);
            pafi(idxEmpty) = {NaN};
            
            dataL(:,k) = cellfun(@(x) min(x), pafi);
            dataH(:,k) = cellfun(@(x) max(x), pafi); 
        case 'PaC2'
            %=== Extract data for PaCO2 / FiO2
            pac2 = pnPaC2(data);
            pac2 = pac2(:,4);
            %=== Impute NaNs
            idxEmpty = cellfun(@isempty, pac2);
            pac2(idxEmpty) = {NaN};
            
            dataL(:,k) = cellfun(@(x) min(x), pac2);
            dataH(:,k) = cellfun(@(x) max(x), pac2); 
            
        case 'BP'
            dataL(:,k) = low(:,strcmp(h_L,'NIMAP'));
            dataH(:,k) = high(:,strcmp(h_L,'NIMAP'));
            % 3. For the variables BP_min & BP_max, they were derived from NiMAP_min & NiMAP_max
            % unless those variables were missing, in which case BP_min & BP_max came from MAP_min &
            % MAP_max.
            idxMissing = isnan(dataL(:,k));
            dataL(idxMissing,k) = low(idxMissing,strcmp(h_L,'MAP'));
            idxMissing = isnan(dataH(:,k));
            dataH(idxMissing,k) = high(idxMissing,strcmp(h_H,'MAP'));
        case 'pH'
            % pH_num, the number of times pH is measured.
            % a. If pHnum = 0, pH_weight = 0
            % b. If pH_num = {1, 2, 3, 4, 5, 6} then pH_weight = 4
            % c. If pH_num ≥ 6 then pH_weight = 2
            pH = pnExtractField(data,'pH');
            dataH(:,k) = cellfun(@(x) numel(x), pH(:,2));
    end
end
%=== clear uneeded vars
clear h_H h_L u_H u_L urine_H urine_L header_extract
%=======================%
%=== CALCULATE SCORE ===%
%=======================%
for k=1:nVar
    low = dataL(:,k);
    high = dataH(:,k);
    idxNotMissing = ~isnan(low) & ~isnan(high);
    switch header{k}
        % 4. Urine, BUN, creatinine, PaO2, heart_rate, temperature, BP, & age use the same weighting
        % structure as in APACHE.
        case {'Urine','BUN','Creatinine','PaO2','HR','Temp','BP','Age'}
            switch header{k}
                case 'Urine'
                    lower_limit = [0,400,600,900,1500,2000,4000];
                    upper_limit = [399,599,899,1499,1999,3999,Inf];
                    temp_score = [15,8,7,5,4,0,1];
                case 'BUN'
                    lower_limit = [0,17,20,40,80]; % mg/dL
                    upper_limit = [16.9,19,39,79,Inf];
                    temp_score = [0,2,7,11,12];
                case 'Creatinine'
                    lower_limit = [0,0.5,1.5,1.95];
                    upper_limit = [0.4,1.4,1.94,Inf];
                    temp_score = [3,0,4,7];
                case 'PaO2' % mmHg
                    lower_limit = [0,50,70,80];
                    upper_limit = [49,69,79,Inf];
                    temp_score = [15,5,2,0];
                case 'HR'
                    lower_limit = [0,40,50,100,110,120,140,155];
                    upper_limit = [39,49,99,109,119,139,154,Inf];
                    temp_score = [8,5,0,1,5,7,13,17];
                case 'Temp'
                    lower_limit = [0,33,33.5,34,35,36,40];
                    upper_limit = [32.9,33.4,33.9,34.9,35.9,39.9,Inf];
                    temp_score = [28,16,13,8,2,0,4];
                case 'BP'
                    lower_limit = [0,40,60,70,80,110,120,130,140];
                    upper_limit = [39,59,69,79,99,119,129,139,Inf];
                    temp_score = [23,15,7,6,0,4,7,9,10];
                case 'Age'
                    lower_limit = [0,45,60,65,70,75,85];
                    upper_limit = [44,59,64,69,74,84,Inf];
                    temp_score = [0,5,11,13,16,17,24];
            end
            scoreL=zeros(N,1); scoreH=zeros(N,1);
            idxUsedL = false(size(low));
            idxUsedH = false(size(high));
            for m=1:numel(temp_score)
                idxAddL = low>=lower_limit(m) & low<=upper_limit(m) & ~idxUsedL;
                idxAddH = high>=lower_limit(m) & high<=upper_limit(m) & ~idxUsedH;
                % Keep track of those already scored
                idxUsedL = idxUsedL | idxAddL;
                idxUsedH = idxUsedH | idxAddH;
                scoreL(idxNotMissing & idxAddL) = temp_score(m);
                scoreH(idxNotMissing & idxAddH) = temp_score(m);
            end
            score(idxNotMissing,k) = max([scoreL(idxNotMissing),scoreH(idxNotMissing)],[],2);
        case 'pH'
            % 5. pH is based on pH_num, the number of times pH is measured.
            % a. If pHnum = 0, pH_weight = 0
            % b. If pH_num = {1, 2, 3, 4, 5, 6} then pH_weight = 4
            % c. If pH_num > 6 then pH_weight = 2
            score(high>6,k) = 2;
            score(high>0 & high<=6,k) = 4;
            
        case 'PaFi'
            % 6. The weight for PaFi ratio is calculated thus:
            % a. If PaFi_min ne missing & PaFi_min ≤ 64 then PaFi_weight = 10
            % b. If PaFi_min is between 65 to 86 then PaFi_weight = 6
            score((idxNotMissing & low>64 & low <= 86),k) = 6;
            score((idxNotMissing & low<=64),k) = 10;
            
        case 'Platelets'
            % 7. The weight for platelets is calculated thus:
            % a. If Platelets_max > 380 then Platelets_weight = 3
            % b. If Platelets_min ne missing & Platelets_min < 100 then Platelets_weight = 8
            score((idxNotMissing & high>380),k) = 3;
            score((idxNotMissing & low<100),k) = 8;
            
            
        case 'PaC2' % mmHg
            % 8. The weight for PaCO2 is calculated thus:
            % a. If PaCO2_min ne missing and PaCO2_min < 2.241 then PaCO2_weight = 10
            % b. If PaCO2_min is between 2.412 and 3.580 then PaCO2_weight = 2
            score((idxNotMissing & low>=2.412 & low <=3.580),k) = 4;
            score((idxNotMissing & low<2.241),k) = 10;
            
    end
end

% 9. The weights for the 12 components get summed to arrive at the severity of illness score.
score = sum(score,2);

end