Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry1/pniExtractFeaturesC.m (8,338 bytes)
function [ d1,d2 ] = pniExtractFeaturesC(data)
%PNILOUIS	Louis's initial entry - severity of illness score
%	[ pred ] = pniLouis(data) calculates a mortality prediction for each
%	each row (observation/subject) in data
%
%   The score uses the following variables:
%		urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
%       heart_rate, temperature, BP, and age.
%
%	Inputs:
%		data    - Cell array of data.
%           Column 1 - Subject IDs
%           Column 2 - Time stamp vectors for each subject
%           Column 3 - Feature name vectors for each subject
%           Column 4 - Data value vectors for each subject
%
%	Outputs:
%		pred   - Column vector of predictions
%
%	Example
%       %=== Load data in
%       load('data_processed_cell.mat');
%       
%       %=== Calculate score
%       [ score ] = pniAndrew(data);
%
%	See also PNMAIN PNPREPROCESSDATA

%	References:
%       Physionet Challenge 2012
%       http://physionet.org/challenge/2012/
%

%	Copyright 2012 Alistair Johnson

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-04-25 01:26:50 +0100 (Wed, 25 Apr 2012) $
%	$Revision: 344 $
%	Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:40:13
%	Contact: alistairewj@gmail.com

%=== Extract the following features from processed data:
% Mean over Day-1 (non-linear transformation)
%     HR
%     Mg
%     Na
%     Weight
%     H2Odeficit
%     WBC
%     HCT (sqr)
%     
% Mean over Day-2 (non-linear transformation)
%     GCS
%     BUN (logabs)
%     dUrineDt
%     Temp (abs)
%     Creatinine
%     BUNCreatR (sqr)
%     Platelets (sqr)
%     OOR_dUrineDt
%     Glucose
%     Na (abs)
%     WBC
%     H2Odeficit
%
% D1var is a binary vector saying which of the variables belongs to Day-1.

records = data(:,1);
N = numel(records); % number of observations
[dataDesc,dataFixed]  = pnDataDescriptions; % Output data descriptions
varNames = [dataFixed(:,1);dataDesc(:,1)]';

Data=cell(N,2);
Data(:,1) = records;

for v=1:length(varNames)
    fieldName = varNames{v};
    
    s = pnExtractField(data,fieldName);
    idxImpute = ~cellfun(@isempty, s(:,2));
    Data(idxImpute,2) = cellfun(@(x,y,z) setfield(x,fieldName,[y,z]),...
        Data(idxImpute,2), s(idxImpute,2), s(idxImpute,4),...
        'UniformOutput',false);
    
    %=== Print percent completion
    checkpoint = mod(v,ceil(length(varNames)*0.25));
    if checkpoint==0
        fprintf('%2.0f%% complete.\n',floor(v/length(varNames) * 100));
    end        
    
end

% Mean over Day-1 (non-linear transformation)
%     HR
%     Mg
%     Na
%     Weight
%     H2Odeficit
%     WBC
%     HCT (sqr)

% Mean over Day-2 (non-linear transformation)
%     GCS
%     BUN (logabs)
%     dUrineDt
%     Temp (abs)
%     Creatinine
%     BUNCreatR (sqr)
%     Platelets (sqr)
%     OOR_dUrineDt
%     Glucose
%     Na (abs)
%     WBC
%     H2Odeficit
Stats = cell(size(Data));
Stats(:,1) = Data(:,1);

%=== Already generated values, split into day1 and day2
d1h = {'HR','Mg','Na','Weight','WBC'}; d1h_L = numel(d1h);
d2h = {'GCS','dUrineDt','Creatinine','Glucose','WBC'}; d2h_L = numel(d2h);
Feats1 = cell(size(Data)); Feats2 = cell(size(Data)); % day1
Feats1(:,1) = Data(:,1); Feats2(:,1) = Data(:,1); %day2

d1h_full = [d1h,'H2Odeficit','HCT_sqrt'];
d2h_full = [d2h,'BUN_logabs','dUrineDt','Temp_abs','BUNCreatR_sqrt',...
    'Platelets_sqrt','OOR_dUrineDt','Na_abs','H2Odeficit'];
for r = 1:N
    %== Compute new temporal features
    
    %=== Easy to extract features
    for k=1:d1h_L
        if isfield(Data{r,2},d1h{k})
            Feats1{r,2}.(d1h{k}) = Data{r,2}.(d1h{k});
        end
    end
    for k=1:d2h_L
        if isfield(Data{r,2},d2h{k})
        Feats2{r,2}.(d2h{k}) = Data{r,2}.(d2h{k});
        end
    end
    
    %=== Features which require just a bit of transformation
    % Day 1
    if isfield(Data{r,2},'HCT')
        Feats1{r,2}.HCT_sqrt = [Data{r,2}.HCT(:,1),sqrt(Data{r,2}.HCT(:,2))];
    end
    
    % Day 2
    if isfield(Data{r,2},'BUN')
        Feats2{r,2}.BUN_logabs = [Data{r,2}.BUN(:,1),log(abs(Data{r,2}.BUN(:,2)))];
    end
    if isfield(Data{r,2},'Temp')
        Feats2{r,2}.Temp_abs = [Data{r,2}.Temp(:,1),abs(Data{r,2}.Temp(:,2))];
    end
    if isfield(Data{r,2},'Platelets')
        Feats2{r,2}.Platelets_sqrt = [Data{r,2}.Platelets(:,1),sqrt(Data{r,2}.Platelets(:,2))];
    end
    if isfield(Data{r,2},'Na')
        Feats2{r,2}.Na_abs = [Data{r,2}.Na(:,1),abs(Data{r,2}.Na(:,2))];
    end
    
    %=== New physiological features
    %     H2Odeficit
    %     dUrineDt
    %     BUNCreatR (sqr)
    %     OOR_dUrineDt
    %     H2Odeficit
    
    % H2Odeficit
    if isfield(Data{r,2},'Weight') && isfield(Data{r,2},'Na')
        Data{r,2}.H2Odeficit(:,2) = 0.6 * Data{r,2}.Weight(1,2) * ((Data{r,2}.Na(:,2) / 140) - 1);
        Data{r,2}.H2Odeficit(:,1) = Data{r,2}.Na(:,1);
    
        Feats1{r,2}.H2Odeficit = Data{r,2}.H2Odeficit;
        Feats2{r,2}.H2Odeficit = Data{r,2}.H2Odeficit;
    
    end
    
    % dUrineDt
    if isfield(Data{r,2},'Urine')
        if length(Data{r,2}.Urine)>2
            dUrineDt = (cumtrapz(Data{r,2}.Urine(:,1),Data{r,2}.Urine(:,2))./Data{r,2}.Urine(:,1)); % remove first
            Feats2{r,2}.dUrineDt(:,2) = dUrineDt;
            Feats2{r,2}.dUrineDt(:,1) = Data{r,2}.Urine(:,1);
        end
    end
    
    % BUNCreatR (sqr)
    Feats2{r,2}.BUNCreatR = extractBUNCreatRate(Data{r,2});
    
    % OOR_dUrineDt
    range.dUrineDt = {NaN,30};
    OOR = [];
    if isfield(Feats2{r,2},'dUrineDt')      
        for t=1:15:(24*60*2) % Scroll over 2 days
            % look at 2hrs time window
            timeWin = find( Feats2{r,2}.dUrineDt(:,1) > t & Feats2{r,2}.dUrineDt(:,1) < t + 120 ); 
            OORtmp = sum(find( Feats2{r,2}.dUrineDt(timeWin,2) < range.dUrineDt{1} ...
                | Feats2{r,2}.dUrineDt(timeWin,2) > range.dUrineDt{2} ) );
            if ~isempty(OORtmp)
                OOR(end+1,:) = [t OORtmp];
            end            
        end       
    end          
    if ~isempty(OOR)
        Feats2{r,2}.OOR_dUrineDt = OOR;
    end
    
    %=== Remove data from Feats1 from day2, and vice versa
    for k=1:numel(d1h_full)
        if isfield(Feats1{r,2},d1h_full{k})
            idxRem = Feats1{r,2}.(d1h_full{k})(:,1) > 1440;
            Feats1{r,2}.(d1h_full{k})(idxRem,:) = [];
            if isempty(Feats1{r,2}.(d1h_full{k}))
                Feats1{r,2} = rmfield(Feats1{r,2},d1h_full{k});
            end
        end
    end
    for k=1:numel(d2h_full)
        if isfield(Feats2{r,2},d2h_full{k})
            idxRem = Feats2{r,2}.(d2h_full{k})(:,1) < 1441;
            Feats2{r,2}.(d2h_full{k})(idxRem,:) = [];
            if isempty(Feats2{r,2}.(d2h_full{k}))
                Feats2{r,2} = rmfield(Feats2{r,2},d2h_full{k});
            end
        end
    end
    
    %=== Print percent completion
    checkpoint = mod(r,ceil(N*0.25));
    if checkpoint==0
        fprintf('%2.0f%% complete.\n',floor(r/N * 100));
    end
end

%=== Extract mean day1 and day2 values for each feature
d1h_L = numel(d1h_full);
d2h_L = numel(d2h_full);
d1=nan(N,d1h_L);
d2=nan(N,d1h_L);
for r=1:N
    for k=1:d1h_L
        if isfield(Feats1{r,2},d1h_full{k})
            d1(r,k) = mean(Feats1{r,2}.(d1h_full{k})(:,2));
        end
    end
    for k=1:d2h_L
        if isfield(Feats2{r,2},d2h_full{k})
            d2(r,k) = mean(Feats2{r,2}.(d2h_full{k})(:,2));
        end
    end
end

end

function [BUNCreatR] = extractBUNCreatRate(record)
BUNCreatR = [];
for t=1:(60*4):(24*60*2) % Scroll over 2 days
    % Every 4 hours  
    if isfield(record,'BUN')
        BUNc = pinterp(record.BUN,t,28*60);
    else
        BUNc = NaN;
    end
    if isfield(record,'Creatinine')
        Creatc = pinterp(record.Creatinine,t,28*60);
    else
        Creatc = NaN;
    end
    
    if sum(isnan([BUNc Creatc]))==0
        BUNCreatR(end+1,:) = [t get_BUNCreatR(BUNc,Creatc)];
    end
end

if ~isempty(BUNCreatR)
    BUNCreatR(:,2) = sqrt(BUNCreatR(:,2));
end
end

function BUNCreatR = get_BUNCreatR(BUN,Creat)
    BUNCreatR = BUN/Creat;
end