ECG-Kit 1.0
(3,839 bytes)
%% (Internal) Replace NaN from PRdatasets
%
% dsOut = deNaN_dataset(dsIn, type)
%
% Arguments:
%
% + dsIn: paths to add
%
% + type: Choose the type of replacement:
%
% - 'remove', remove the whole vector which includes NaN
%
% - 'change', change NaNs for repeated values of the same
% feature, added an small variance, estimated ignoring this nan
% values.
%
% - 'change_same_class' change NaNs for repeated values of the
% same feature, same class, added an small variance, estimated
% ignoring this nan values.
%
% + func_ptr: A pointer to an isX function. Default @isnan
%
% Output:
%
% + dsOut: the clean PRdataset
%
% Example:
%
% Author: Mariano Llamedo Soria llamedom@electron.frba.utn.edu.ar
% Version: 0.1 beta
% Last update: 14/5/2014
% Birthdate : 21/4/2015
% Copyright 2008-2015
%
function dsOut = deNaN_dataset(dsIn, type, func_ptr)
if( nargin < 2 || isempty(type) )
type = 'remove';
end
if( nargin < 3 || isempty(func_ptr) )
func_ptr = @isnan;
end
features_with_nans = find(any(func_ptr(+dsIn)));
rows_without_nans = ~any(func_ptr(+dsIn),2);
if( any(~rows_without_nans) )
rows_without_nans_idx = find(rows_without_nans);
switch(type)
case 'remove'
% just remove nans
dsOut = dsIn(rows_without_nans_idx,:);
case 'change'
% change NaNs for repeated values of the same feature, added an small variance, estimated ignoring this nan
% values.
dsOut = dsIn;
robust_std = nanmeda(+dsIn(rows_without_nans_idx,:));
for kk = rowvec(features_with_nans)
nan_idx = find(func_ptr(+(dsIn(:,kk))));
not_nan_idx = find(~func_ptr(+(dsIn(:,kk))));
laux_idx = length(nan_idx);
dsIn(nan_idx,kk) = randsample( +dsIn(not_nan_idx,kk), laux_idx, true ) + 1/100*robust_std(kk)*rand(laux_idx,1);
dsOut(:,kk) = dsIn(:,kk);
end
case 'change_same_class'
% change NaNs for repeated values of the same feature, same
% class, added an small variance, estimated ignoring this nan
% values.
dsOut = dsIn;
dsIn = seldat(dsIn);
ds_aux = dsIn(rows_without_nans_idx,:);
ds_aux = setprior(ds_aux, 0);
w_aux = qdc(ds_aux);
w_aux = +w_aux;
Class_indexes = getnlab(dsIn);
for kk = rowvec(features_with_nans)
nan_idx = find(func_ptr(+(dsIn(:,kk))));
classes_involved = unique(Class_indexes(nan_idx));
for ll = rowvec(classes_involved)
aux_idx = find(Class_indexes(nan_idx) == ll);
this_class_not_nan_idx = find(Class_indexes == ll);
this_class_not_nan_idx = setdiff(this_class_not_nan_idx, nan_idx(aux_idx) );
laux_idx = length(aux_idx);
dsIn(nan_idx(aux_idx),kk) = randsample( +dsIn(this_class_not_nan_idx,kk), laux_idx, true ) + 1/100*w_aux.cov(kk,kk,ll)*rand(laux_idx,1);
end
dsOut(:,kk) = dsIn(:,kk);
end
end
else
dsOut = dsIn;
end