Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/mlipoff_at_gmail.com/entry2/TabulatePatientRecordData_binsChangeable2.m (10,530 bytes)
function [cleanData, columnHeaders] = TabulatePatientRecordData_binsChangeable2(allPatientStruct, spreads)

%arranges all the patient data into one array named 'cleanData'. the colum headers are saved
%to 'columnHeaders'
%arranges the variables by binned times


normRangFile = importdata('ICU Ranges.csv', ',');
normalRanges = normRangFile.data;
normalRangesNames = normRangFile.textdata;



%fields to be calc'ed based on the numbers. These are basic parameters that
%are usued in all ICU models
calcFields = {'BMI', 'BUN to Creatinine', 'PaO2FiO2', 'BPSys', 'BPDias'};

binableFields = {'Albumin';'ALP';'ALT';'AST';'Bilirubin';...
    'Cholesterol';'GCS';...
    'Glucose';'HCO3';'HCT';'HR';'K';'Lactate';'Mg';...
    'Na';'PaCO2';'pH';'Platelets';'RespRate';...
    'SaO2';'Temp';'Urine';'WBC'};

%define the number of hours in each bin. Eg, [8,8,8,8,8,8] would mean
%that for the first day, it is binned by 8 hrs, and for the second day, it
%is grouped by 6 hrs. The sum of the vector should be 48

binSpreads = spreads;
%     {...
%     'BUN to Creatinine', ones(1,48);...
%     'PaO2FiO2', ones(1,48);...
%     'BPSys', ones(1,48);...
%     'BPDias', ones(1,48);...
%     'Albumin', ones(1,48);...
%     'ALP', ones(1,48);...
%     'ALT', ones(1,48);...
%     'AST', ones(1,48);...
%     'Bilirubin', ones(1,48);...
%     'Cholesterol', ones(1,48);...
%     'GCS', ones(1,48);...
%     'Glucose', ones(1,48);...
%     'HCO3', ones(1,48);...
%     'HCT', ones(1,48);...
%     'HR', ones(1,48);...
%     'K', ones(1,48);...
%     'Lactate', ones(1,48);...
%     'Mg', ones(1,48);...
%     'Na', ones(1,48);...
%     'PaCO2', ones(1,48);...
%     'pH', ones(1,48);...
%     'Platelets', ones(1,48);...
%     'RespRate', ones(1,48);...
%     'SaO2', ones(1,48);...
%     'Temp', ones(1,48);...
%     'Urine', ones(1,48);...
%     'WBC', ones(1,48)};


%field in the raw data
allGivenFields = fieldnames(allPatientStruct);

cleanData = [];
columnHeaders = {};


for i = 1:length(allPatientStruct)
    disp(i);
    
    Xtemp = [];
    
    %---------
    %field with no bins
    %---------
    
    %gender
    gender = allPatientStruct(i).Gender;
    gender = gender(2);
    if gender < 0
        gender = 0;
    end
    Xtemp = [Xtemp, gender];
    if i== 1 columnHeaders = [columnHeaders, 'Gender']; end
    
    %age
    Xtemp = [Xtemp, allPatientStruct(i).Age(2)];
    if i==1 columnHeaders = [columnHeaders, 'Age']; end
    
    %BMI
    if ~isempty(allPatientStruct(i).Height) && ~isempty(allPatientStruct(i).Weight)
        bmi = mean(allPatientStruct(i).Weight(2))./mean((allPatientStruct(i).Height(:,2)/100).^2); %definition of BMI
    else
        bmi = 21.75;
    end
    
    %make sure it is reasonable
    if bmi < 10 | bmi > 70
        bmi = 21.75;
    end
    Xtemp = [Xtemp, bmi];
    if i==1 columnHeaders = [columnHeaders, 'BMI']; end
    
    %---------
    %fields with bins
    %---------
    
    
    % do for most of the fields, bin them
    for j = 1:length(binableFields)
        
        series = allPatientStruct(i).(binableFields{j});
        if isempty(series)
            times = NaN;
            values = NaN;
        else
            times = series(:,1);
            values = series(:,2);
        end
        
        bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
        
        for k = 1:length(bins)
            if k==1
                low = 0;
            else
                low = sum(bins(1:k-1));
            end
            isInBin = (times >= low) & (times < sum(bins(1:k)));
            
            %get the mean value in the binalpha and beta
            meanv = mean(values(isInBin));
            
            %append
            Xtemp = [Xtemp, meanv];
            if i==1 columnHeaders = [columnHeaders, [binableFields{j}, '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
        end
        
    end
    
    
    %deal with each of the calc field individualy
    
    
    %blood pressure
    %dias
    if ~isempty(allPatientStruct(i).NIDiasABP)
        temp = allPatientStruct(i).NIDiasABP;
    elseif ~isempty(allPatientStruct(i).DiasABP)
        temp = allPatientStruct(i).DiasABP;
    else
        temp = [];
    end
    
    if isempty(temp)
        times = NaN;
        values = NaN;
    else
        times = temp(:,1);
        values = temp(:,2);
    end
     bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2}
        
        for k = 1:length(bins)
            if k==1
                low = 0;
            else
                low = sum(bins(1:k-1));
            end
            isInBin = (times >= low) & (times < sum(bins(1:k)));
           
        %get the mean value in the bin
        meanv = mean(values(isInBin));
        %append
        Xtemp = [Xtemp, meanv];
        if i==1 columnHeaders = [columnHeaders, ['BPDias', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
        
    end
    
    
    %sys bp
    if ~isempty(allPatientStruct(i).NISysABP)
        temp = allPatientStruct(i).NISysABP;
    elseif ~isempty(allPatientStruct(i).SysABP)
        temp = allPatientStruct(i).SysABP;
    else
        temp = [];
    end
    
    if isempty(temp)
        times = NaN;
        values = NaN;
    else
        times = temp(:,1);
        values = temp(:,2);
    end
     bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
        
        for k = 1:length(bins)
            if k==1
                low = 0;
            else
                low = sum(bins(1:k-1));
            end
            isInBin = (times >= low) & (times < sum(bins(1:k)));
           %get the alpha and beta
        meanv = mean(values(isInBin));
        %append
        Xtemp = [Xtemp, meanv];
        if i==1 columnHeaders = [columnHeaders, ['BPSys', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
    end
    
    
    %BUN to creatinine
    temp1 = allPatientStruct(i).BUN;
    temp2 = allPatientStruct(i).Creatinine;
    
    if isempty(temp1) | isempty(temp2)
        times = NaN;
        y1 = NaN; y2 = NaN;
    else
        times = sort([temp1(:,1); temp2(:,2)]);
        try
            y1 = interp1(temp1(:,1), temp1(:,2), times, 'nearest','extrap');
        catch
            y1 = mean(temp1(:,2))*ones(length(times));
        end
        
        try
            y2 = interp1(temp2(:,1), temp2(:,2), times, 'nearest','extrap');
        catch
            y2 = mean(temp2(:,2))*ones(length(times));
        end
    end
     bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
        
        for k = 1:length(bins)
            if k==1
                low = 0;
            else
                low = sum(bins(1:k-1));
            end
            isInBin = (times >= low) & (times < sum(bins(1:k)));
           meanv = mean(y1(isInBin)./y2(isInBin));
        Xtemp = [Xtemp, meanv];
        if i==1 columnHeaders = [columnHeaders, ['BUN to Creatinine', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
    end
    
    %pao2/fio2
    temp1 = allPatientStruct(i).PaO2;
    temp2 = allPatientStruct(i).FiO2;
    
    if isempty(temp1) | isempty(temp2)
        times = NaN;
        y1 = NaN; y2 = NaN;
    else
        times = sort([temp1(:,1); temp2(:,2)]);
        try
            y1 = interp1(temp1(:,1), temp1(:,2), times, 'nearest','extrap');
        catch
            y1 = mean(temp1(:,2))*ones(length(times));
        end
        
        try
            y2 = interp1(temp2(:,1), temp2(:,2), times, 'nearest','extrap');
        catch
            y2 = mean(temp2(:,2))*ones(length(times));
        end
        
    end
     bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
        
        for k = 1:length(bins)
            if k==1
                low = 0;
            else
                low = sum(bins(1:k-1));
            end
            isInBin = (times >= low) & (times < sum(bins(1:k)));
           meanv = mean(y1(isInBin)./y2(isInBin));
        Xtemp = [Xtemp, meanv];
        if i==1 columnHeaders = [columnHeaders, ['PaO2FiO2', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
    end
    
    %add to the temp to the total array
    
    disp(size(Xtemp))
    cleanData = [cleanData;Xtemp];
    
end

cleanData = removeNaNs(cleanData, columnHeaders);

end

%helper function-----------------------------
function out = cellContains(cellArray, searchValue)

out = -1;
for i = 1:length(cellArray)
    
    if strcmp(cellArray{i}, searchValue)
        out = i;
        return
    end
end


end

%--------------------
%HELPER FUNCTION

function X = removeNaNs(Xold, colHeaders)
normRangFile = importdata('ICU Ranges.csv', ',');
normalRanges = normRangFile.data;
normalRangesNames = normRangFile.textdata;

X = Xold;

%find the binned variables
isBinned = zeros(length(colHeaders), 1);
for i = 1:length(colHeaders)
    isBinned(i) = ~isempty(strfind(colHeaders{i}, '_'));
end

assocVariable = {};
for i = 1:length(colHeaders)
    locOfUnderS = strfind(colHeaders{i}, '_');
    if ~isempty(locOfUnderS)
        str = colHeaders{i};
        assocVariable = [assocVariable, str(1:locOfUnderS-1)];
    else
        assocVariable = [assocVariable, 'NONE'];
    end
end

variables = unique(assocVariable);
variables(strcmp(variables, 'NONE')) = [];
for i = 1:size(X, 1)
    disp(i)
    gender = X(i,1);
    for j = 1:length(variables)
        binVals = X(i, strcmp(assocVariable, variables(j)));
        if sum(isnan(binVals)) == length(binVals)
            
            loc = cellContains(normalRangesNames, variables(j));
            highRange = normalRanges(loc, (1-gender)*2+2);
            lowRange = normalRanges(loc, (1-gender)*2+1);
            if strcmp(variables(j), 'Urine')
                highRange = highRange*length(binVals);
                lowRange = lowRange*length(binVals);
            end
            
            X(i,boolean(strcmp(assocVariable, variables(j))))=...
                mean([highRange, lowRange]);
        elseif sum(isnan(binVals)) == (length(binVals)-1)
            X(i,strcmp(assocVariable, variables(j)))=...
                nanmean(binVals);
        else
            times = 1:length(binVals);
            interped = interp1(times(~isnan(binVals)), binVals(~isnan(binVals)), times, 'linear', nan);
            exterped = interp1(times(~isnan(interped)), interped(~isnan(interped)), times, 'nearest', 'extrap');
            disp(variables(j))
            disp(exterped')
            X(i,strcmp(assocVariable, variables(j)))=exterped;
        end
        
    end
end



end