Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(5,582 bytes)
function [ X, header ] = pnExtractData(data, rule, T, fields)
%PNEXTRACTDATA Extract data using the given rule for a given window
% [ X ] = pnExtractData(data, rule, window) extracts data according to
% the rule given in the window, T, provided. Rule is a choice of what
% data to extract (e.g., 'max'), and window is a 2 element vector with
% the minimum and maximum time values to consider. If window is one
% element, only that time value is considered.
%
% [ X ] = pnExtractData(data, rule, window, fields) limits the data
% extracted to the given fields.
%
% [ X, header] = pnExtractData( ... ) also outputs a header vector for
% the data matrix X.
%
% Inputs:
% data - Cell array of data.
% Column 1 - Subject IDs
% Column 2 - Time stamp vectors for each subject
% Column 3 - Feature name vectors for each subject
% Column 4 - Data value vectors for each subject
%
% rule - String specifying the rule to use when selecting data
% max - Maximum value
% min - Minimum value
% mean - Mean value
% average - Mean value
% median - Median value
%
% first - First measurement recorded
% last - Last measurement recorded
%
% T - Window used - 2 element vector
%
% Outputs:
% X - Data, NxD, where D is the number of features and N is the
% number of observations.
%
% Example
% bpath = './set-a/';
% data = pnLoadTextFilesCell(bpath);
% X = pnExtractData(data,'min',[0 2880]); % extract minimum value across 2 days
% See also PNGENERATEFEATURES
% References:
% Physionet Challenge 2012
% Copyright 2012 Alistair Johnson
% $LastChangedBy: alistair $
% $LastChangedDate: 2012-08-23 18:39:12 -0400 (Thu, 23 Aug 2012) $
% $Revision: 152 $
% Originally written on GLNXA64 by Alistair Johnson, 15-May-2012 15:08:24
% Contact: alistairewj@gmail.com
if nargin<1
X=[]; return;
end
if nargin<2
rule = {'first'};
elseif ischar(rule)
rule = {lower(rule)};
elseif iscell(rule)
rule = lower(rule(:));
else
rule = {'first'};
end
if nargin<3 || ~isnumeric(T)
T = [0,2880];
end
feats = unique(vertcat(data{:,3}));
if nargin<4
%=== Use all features
data_used = data;
else
%=== Extract only given fields
if ischar(fields)
fields = {fields}; % encapsulate in cell array of strings
end
fields = sort(fields);
if numel(fields)==numel(feats) && any(strcmp(feats,fields)==0) % if any strings do not match
%=== Preallocate
data_used = cell(size(data));
%=== Loop through fields and input them into data_used
for k=1:numel(fields)
data_temp = pnExtractField(data,fields{k});
data_used = pnImputeField(data_used,data_temp);
end
else
%=== save time by skipping extract/impute
data_used = data;
end
end
%=== Use rule to set evaluation function
R = numel(rule);
rfcn = cell(1,R);
for r=1:R
switch rule{r}
case {'min','lowest'}
rfcn{r} = @min; rule{r} = 'min';
case {'max','highest'}
rfcn{r} = @min; rule{r} = 'max';
case 'median'
rfcn{r} = @median;
case {'mean','average'}
rfcn{r} = @mean; rule{r} = 'mean';
case 'first'
rfcn{r} = @(x) x(1);
case 'last'
rfcn{r} = @(x) x(end);
case 'sum'
rfcn{r} = @sum;
otherwise % default first value
rfcn{r} = @(x) x(1);
end
end
%=== Check window
if numel(T)==1
T = T;
windowFcn = @(x,win) x==win;
elseif numel(T)==2
T = sort(T);
windowFcn = @(x,win) x>=win(1) & x<=win(2);
else
T = sort(T(1:2));
windowFcn = @(x,win) x>=win(1) & x<=win(2);
end
%=== Get feature names in data
idxExist = cellfun(@(x) ~isempty(x), data_used(:,3));
D = numel(feats);
X = nan(size(data_used,1), D);
%=== Get indices of data within window
idxUsedExist = idxExist;
idxUsed = cellfun(@(x) windowFcn(x,T), data_used(idxExist,2),'UniformOutput',false);
idxUsedExist(cellfun(@isempty, idxUsed)) = false;
%=== Remove un-used data from each cell
data_used(idxUsedExist,2) = cellfun(@(x,y) x(y), data_used(idxUsedExist,2), idxUsed,'UniformOutput',false);
data_used(idxUsedExist,3) = cellfun(@(x,y) x(y), data_used(idxUsedExist,3), idxUsed,'UniformOutput',false);
data_used(idxUsedExist,4) = cellfun(@(x,y) x(y), data_used(idxUsedExist,4), idxUsed,'UniformOutput',false);
for f=1:D
%=== Get value of data within that window
idxUsed = cellfun(@(x) strcmp(x,feats{f}), data_used(:,3), 'UniformOutput',false);
idxUsedExist = cellfun(@any, idxUsed);
idxData = f*R-1; % Index of storage in matrix of data
for r=1:R
v = cellfun(@(x,y) rfcn{r}(x(y)), data_used(idxUsedExist,4), idxUsed(idxUsedExist));
X(idxUsedExist,idxData+r) = v;
end
%=== Remove used features from data_used to speed up function
data_used(idxUsedExist,2:4) = cellfun(@(x,y) x(~y),...
data_used(idxUsedExist,2:4), repmat(idxUsed(idxUsedExist),1,3),...
'UniformOutput',false);
end
%=== Generate header
for r=1:R
rule{r} = [upper(rule{r}(1)), rule{r}(2:end)]; % capitalize first letter
end
if numel(T)==1
Tstr = num2str(T(1));
else
if T(1) == 0 && T(2) == 2880
%=== don't output anything for simplicity
Tstr = '';
else
Tstr = [num2str(T(1)) 'to' num2str(T(2))];
end
end
% Reshape features to match data (feature 1 rule 1, feature 1 rule 2, etc)
feats = repmat(feats',R,1); feats = feats(:);
rule = repmat(rule,D,1);
header = strcat(feats,rule);
header = strcat(header,Tstr)';
end