Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(8,364 bytes)
function [ d1,d2 ] = pniExtractFeaturesC(data)
%PNILOUIS Louis's initial entry - severity of illness score
% [ pred ] = pniLouis(data) calculates a mortality prediction for each
% each row (observation/subject) in data
%
% The score uses the following variables:
% urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH,
% heart_rate, temperature, BP, and age.
%
% Inputs:
% data - Cell array of data.
% Column 1 - Subject IDs
% Column 2 - Time stamp vectors for each subject
% Column 3 - Feature name vectors for each subject
% Column 4 - Data value vectors for each subject
%
% Outputs:
% pred - Column vector of predictions
%
% Example
% %=== Load data in
% load('data_processed_cell.mat');
%
% %=== Calculate score
% [ score ] = pniAndrew(data);
%
% See also PNMAIN PNPREPROCESSDATA
% References:
% Physionet Challenge 2012
% http://physionet.org/challenge/2012/
%
% Copyright 2012 Alistair Johnson
% $LastChangedBy: alistair $
% $LastChangedDate: 2012-04-25 01:26:50 +0100 (Wed, 25 Apr 2012) $
% $Revision: 344 $
% Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:40:13
% Contact: alistairewj@gmail.com
%=== Extract the following features from processed data:
% Mean over Day-1 (non-linear transformation)
% HR
% Mg
% Na
% Weight
% H2Odeficit
% WBC
% HCT (sqr)
%
% Mean over Day-2 (non-linear transformation)
% GCS
% BUN (logabs)
% dUrineDt
% Temp (abs)
% Creatinine
% BUNCreatR (sqr)
% Platelets (sqr)
% OOR_dUrineDt
% Glucose
% Na (abs)
% WBC
% H2Odeficit
%
% D1var is a binary vector saying which of the variables belongs to Day-1.
records = data(:,1);
N = numel(records); % number of observations
[dataDesc,dataFixed] = pnDataDescriptions; % Output data descriptions
varNames = [dataFixed(:,1);dataDesc(:,1)]';
Data=cell(N,2);
Data(:,1) = records;
for v=1:length(varNames)
fieldName = varNames{v};
s = pnExtractField(data,fieldName);
idxImpute = ~cellfun(@isempty, s(:,2));
Data(idxImpute,2) = cellfun(@(x,y,z) setfield(x,fieldName,[y,z]),...
Data(idxImpute,2), s(idxImpute,2), s(idxImpute,4),...
'UniformOutput',false);
%
% %=== Print percent completion
% checkpoint = mod(v,ceil(length(varNames)*0.25));
% if checkpoint==0
% fprintf('%2.0f%% complete.\n',floor(v/length(varNames) * 100));
% end
%
end
% Mean over Day-1 (non-linear transformation)
% HR
% Mg
% Na
% Weight
% H2Odeficit
% WBC
% HCT (sqr)
% Mean over Day-2 (non-linear transformation)
% GCS
% BUN (logabs)
% dUrineDt
% Temp (abs)
% Creatinine
% BUNCreatR (sqr)
% Platelets (sqr)
% OOR_dUrineDt
% Glucose
% Na (abs)
% WBC
% H2Odeficit
Stats = cell(size(Data));
Stats(:,1) = Data(:,1);
%=== Already generated values, split into day1 and day2
d1h = {'HR','Mg','Na','Weight','WBC'}; d1h_L = numel(d1h);
d2h = {'GCS','dUrineDt','Creatinine','Glucose','WBC'}; d2h_L = numel(d2h);
Feats1 = cell(size(Data)); Feats2 = cell(size(Data)); % day1
Feats1(:,1) = Data(:,1); Feats2(:,1) = Data(:,1); %day2
d1h_full = [d1h,'H2Odeficit','HCT_sqrt'];
d2h_full = [d2h,'BUN_logabs','dUrineDt','Temp_abs','BUNCreatR_sqrt',...
'Platelets_sqrt','OOR_dUrineDt','Na_abs','H2Odeficit'];
for r = 1:N
%== Compute new temporal features
%=== Easy to extract features
for k=1:d1h_L
if isfield(Data{r,2},d1h{k})
Feats1{r,2}.(d1h{k}) = Data{r,2}.(d1h{k});
end
end
for k=1:d2h_L
if isfield(Data{r,2},d2h{k})
Feats2{r,2}.(d2h{k}) = Data{r,2}.(d2h{k});
end
end
%=== Features which require just a bit of transformation
% Day 1
if isfield(Data{r,2},'HCT')
Feats1{r,2}.HCT_sqrt = [Data{r,2}.HCT(:,1),sqrt(Data{r,2}.HCT(:,2))];
end
% Day 2
if isfield(Data{r,2},'BUN')
Feats2{r,2}.BUN_logabs = [Data{r,2}.BUN(:,1),log(abs(Data{r,2}.BUN(:,2)))];
end
if isfield(Data{r,2},'Temp')
Feats2{r,2}.Temp_abs = [Data{r,2}.Temp(:,1),abs(Data{r,2}.Temp(:,2))];
end
if isfield(Data{r,2},'Platelets')
Feats2{r,2}.Platelets_sqrt = [Data{r,2}.Platelets(:,1),sqrt(Data{r,2}.Platelets(:,2))];
end
if isfield(Data{r,2},'Na')
Feats2{r,2}.Na_abs = [Data{r,2}.Na(:,1),abs(Data{r,2}.Na(:,2))];
end
%=== New physiological features
% H2Odeficit
% dUrineDt
% BUNCreatR (sqr)
% OOR_dUrineDt
% H2Odeficit
% H2Odeficit
if isfield(Data{r,2},'Weight') && isfield(Data{r,2},'Na')
Data{r,2}.H2Odeficit(:,2) = 0.6 * Data{r,2}.Weight(1,2) * ((Data{r,2}.Na(:,2) / 140) - 1);
Data{r,2}.H2Odeficit(:,1) = Data{r,2}.Na(:,1);
Feats1{r,2}.H2Odeficit = Data{r,2}.H2Odeficit;
Feats2{r,2}.H2Odeficit = Data{r,2}.H2Odeficit;
end
% dUrineDt
if isfield(Data{r,2},'Urine')
if length(Data{r,2}.Urine)>2
dUrineDt = (cumtrapz(Data{r,2}.Urine(:,1),Data{r,2}.Urine(:,2))./Data{r,2}.Urine(:,1)); % remove first
Feats2{r,2}.dUrineDt(:,2) = dUrineDt;
Feats2{r,2}.dUrineDt(:,1) = Data{r,2}.Urine(:,1);
end
end
% BUNCreatR (sqr)
Feats2{r,2}.BUNCreatR = extractBUNCreatRate(Data{r,2});
% OOR_dUrineDt
range.dUrineDt = {NaN,30};
OOR = [];
if isfield(Feats2{r,2},'dUrineDt')
for t=1:15:(24*60*2) % Scroll over 2 days
% look at 2hrs time window
timeWin = find( Feats2{r,2}.dUrineDt(:,1) > t & Feats2{r,2}.dUrineDt(:,1) < t + 120 );
OORtmp = sum(find( Feats2{r,2}.dUrineDt(timeWin,2) < range.dUrineDt{1} ...
| Feats2{r,2}.dUrineDt(timeWin,2) > range.dUrineDt{2} ) );
if ~isempty(OORtmp)
OOR(end+1,:) = [t OORtmp];
end
end
end
if ~isempty(OOR)
Feats2{r,2}.OOR_dUrineDt = OOR;
end
%=== Remove data from Feats1 from day2, and vice versa
for k=1:numel(d1h_full)
if isfield(Feats1{r,2},d1h_full{k})
idxRem = Feats1{r,2}.(d1h_full{k})(:,1) > 1440;
Feats1{r,2}.(d1h_full{k})(idxRem,:) = [];
if isempty(Feats1{r,2}.(d1h_full{k}))
Feats1{r,2} = rmfield(Feats1{r,2},d1h_full{k});
end
end
end
for k=1:numel(d2h_full)
if isfield(Feats2{r,2},d2h_full{k})
idxRem = Feats2{r,2}.(d2h_full{k})(:,1) < 1441;
Feats2{r,2}.(d2h_full{k})(idxRem,:) = [];
if isempty(Feats2{r,2}.(d2h_full{k}))
Feats2{r,2} = rmfield(Feats2{r,2},d2h_full{k});
end
end
end
%
% %=== Print percent completion
% checkpoint = mod(r,ceil(N*0.25));
% if checkpoint==0
% fprintf('%2.0f%% complete.\n',floor(r/N * 100));
% end
end
%=== Extract mean day1 and day2 values for each feature
d1h_L = numel(d1h_full);
d2h_L = numel(d2h_full);
d1=nan(N,d1h_L);
d2=nan(N,d1h_L);
for r=1:N
for k=1:d1h_L
if isfield(Feats1{r,2},d1h_full{k})
d1(r,k) = mean(Feats1{r,2}.(d1h_full{k})(:,2));
end
end
for k=1:d2h_L
if isfield(Feats2{r,2},d2h_full{k})
d2(r,k) = mean(Feats2{r,2}.(d2h_full{k})(:,2));
end
end
end
end
function [BUNCreatR] = extractBUNCreatRate(record)
BUNCreatR = [];
for t=1:(60*4):(24*60*2) % Scroll over 2 days
% Every 4 hours
if isfield(record,'BUN')
BUNc = pinterp(record.BUN,t,28*60);
else
BUNc = NaN;
end
if isfield(record,'Creatinine')
Creatc = pinterp(record.Creatinine,t,28*60);
else
Creatc = NaN;
end
if sum(isnan([BUNc Creatc]))==0
BUNCreatR(end+1,:) = [t get_BUNCreatR(BUNc,Creatc)];
end
end
if ~isempty(BUNCreatR)
BUNCreatR(:,2) = sqrt(BUNCreatR(:,2));
end
end
function BUNCreatR = get_BUNCreatR(BUN,Creat)
BUNCreatR = BUN/Creat;
end