Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/alistairewj_at_gmail.com/entry6/pnExtractData.m (5,580 bytes)
function [ X, header ] = pnExtractData(data, rule, T, fields)
%PNEXTRACTDATA	Extract data using the given rule for a given window
%	[ X ] = pnExtractData(data, rule, window) extracts data according to
%	the rule given in the window, T, provided. Rule is a choice of what 
%	data to extract (e.g., 'max'), and window is a 2 element vector with 
%	the minimum and maximum time values to consider. If window is one 
%	element, only that time value is considered.
%   
%	[ X ] = pnExtractData(data, rule, window, fields) limits the data
%	extracted to the given fields.
%   
%   [ X, header] = pnExtractData( ... ) also outputs a header vector for 
%   the data matrix X. 
%	
%	Inputs:
%		data	- Cell array of data.
%				Column 1 - Subject IDs
%				Column 2 - Time stamp vectors for each subject
%				Column 3 - Feature name vectors for each subject
%				Column 4 - Data value vectors for each subject
%
%		rule	- String specifying the rule to use when selecting data
%				max     - Maximum value
%				min     - Minimum value
%				mean    - Mean value
%				average - Mean value
%				median  - Median value
%               
%				first   - First measurement recorded
%				last    - Last measurement recorded
%
%		T		- Window used - 2 element vector
%
%	Outputs:
%		X    - Data, NxD, where D is the number of features and N is the
%		number of observations.
%
%	Example
%		bpath = './set-a/';
%		data = pnLoadTextFilesCell(bpath);
%		X = pnExtractData(data,'min',[0 2880]); % extract minimum value across 2 days
%	See also PNGENERATEFEATURES

%	References:
%		Physionet Challenge 2012

%	Copyright 2012 Alistair Johnson

%	$LastChangedBy: alistair $
%	$LastChangedDate: 2012-06-18 11:35:16 -0400 (Mon, 18 Jun 2012) $
%	$Revision: 100 $
%	Originally written on GLNXA64 by Alistair Johnson, 15-May-2012 15:08:24
%	Contact: alistairewj@gmail.com

if nargin<1
    X=[]; return;
end

if nargin<2
    rule = {'first'};
elseif ischar(rule)
    rule = {lower(rule)};
elseif iscell(rule)
    rule = lower(rule(:));
else
    rule = {'first'};
end
if nargin<3 || ~isnumeric(T)
    T = [0,2880];
end

feats = unique(vertcat(data{:,3}));
if nargin<4
    %=== Use all features
    data_used = data;
else
    %=== Extract only given fields
    if ischar(fields)
        fields = {fields}; % encapsulate in cell array of strings
    end
    
    fields = sort(fields);
    if numel(fields)==numel(feats) && any(strcmp(feats,fields)==0) % if any strings do not match
        %=== Preallocate
        data_used = cell(size(data));
        %=== Loop through fields and input them into data_used
        for k=1:numel(fields)
            data_temp = pnExtractField(data,fields{k});
            data_used = pnImputeField(data_used,data_temp);
        end
    else
        %=== save time by skipping extract/impute
        data_used = data;
    end
end


%=== Use rule to set evaluation function
R = numel(rule);
rfcn = cell(1,R);
for r=1:R
    switch rule{r}
        case {'min','lowest'}
            rfcn{r} = @min; rule{r} = 'min';
        case {'max','highest'}
            rfcn{r} = @min; rule{r} = 'max';
        case 'median'
            rfcn{r} = @median;
        case {'mean','average'}
            rfcn{r} = @mean; rule{r} = 'mean';
        case 'first'
            rfcn{r} = @(x) x(1);
        case 'last'
            rfcn{r} = @(x) x(end);
        case 'sum'
            rfcn{r} = @sum;
        otherwise % default first value
            rfcn{r} = @(x) x(1);
    end
end

%=== Check window
if numel(T)==1
    T = T;
    windowFcn = @(x,win) x==win;
elseif numel(T)==2
    T = sort(T);
    windowFcn = @(x,win) x>=win(1) & x<=win(2);
else
    T = sort(T(1:2));
    windowFcn = @(x,win) x>win(1) & x<win(2);
end

%=== Get feature names in data
idxExist = cellfun(@(x) ~isempty(x), data_used(:,3));
D = numel(feats);
X = nan(size(data_used,1), D);

%=== Get indices of data within window
idxUsedExist = idxExist;
idxUsed = cellfun(@(x) windowFcn(x,T), data_used(idxExist,2),'UniformOutput',false);
idxUsedExist(cellfun(@isempty, idxUsed)) = false;

%=== Remove un-used data from each cell
data_used(idxUsedExist,2) = cellfun(@(x,y) x(y), data_used(idxUsedExist,2), idxUsed,'UniformOutput',false);
data_used(idxUsedExist,3) = cellfun(@(x,y) x(y), data_used(idxUsedExist,3), idxUsed,'UniformOutput',false);
data_used(idxUsedExist,4) = cellfun(@(x,y) x(y), data_used(idxUsedExist,4), idxUsed,'UniformOutput',false);
for f=1:D
    %=== Get value of data within that window
    idxUsed = cellfun(@(x) strcmp(x,feats{f}), data_used(:,3), 'UniformOutput',false);
    idxUsedExist = cellfun(@any, idxUsed);
    
    idxData = f*R-1; % Index of storage in matrix of data
    for r=1:R
        v = cellfun(@(x,y) rfcn{r}(x(y)), data_used(idxUsedExist,4), idxUsed(idxUsedExist));
        X(idxUsedExist,idxData+r) = v;
    end
    %=== Remove used features from data_used to speed up function
    data_used(idxUsedExist,2:4) = cellfun(@(x,y) x(~y),...
        data_used(idxUsedExist,2:4), repmat(idxUsed(idxUsedExist),1,3),...
        'UniformOutput',false);
end

%=== Generate header
for r=1:R
    rule{r} = [upper(rule{r}(1)), rule{r}(2:end)]; % capitalize first letter
end

if numel(T)==1
    Tstr = num2str(T(1));
else
    if T(1) == 0 && T(2) == 2880
        %=== don't output anything for simplicity
        Tstr = '';
    else
        Tstr = [num2str(T(1)) 'to' num2str(T(2))];
    end
end

 % Reshape features to match data (feature 1 rule 1, feature 1 rule 2, etc)
feats = repmat(feats',R,1); feats = feats(:);
rule = repmat(rule,D,1);
header = strcat(feats,rule); 
header = strcat(header,Tstr)';

end