Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

function [J,G] = objFuncLR(w,X,y,alpha,lambda,we)

% Logistic regression objective function and gradient
%
% Inputs:
%
% w - vector of model parameters. In two-category case, this
% is (d+1)-dimensional vector, where d is the number of variables in the 
% data. In multicategory case, this is M*(d+1)-dimensional vector where
% M is the number of categories
%
% X - n*d data matrix with n observations and d variables  
%
% y - binary coded category labels. In two category case, this is n-dimensional
% vector of zeros and ones. In multicategory case, this is n*M matrix
% of zeros and ones (use "codeCategories.m" to convert labels to this format)
%
% alpha - controls the extent of L1- and L2-regularization. 
% alpha = 1 means pure L1-regularization 
% alpha = 0 means pure L2-regularization
% 0 < alpha < 1 means combination of L1- and L2-regularization
%
% lambda - controls the extent of regularization with respect to log-likelihood
% lambda = 0 means that regularization is not used at all, and increase of lambda
% increases the extent of regularization.
% 
% we - optional parameter to weight model parameters. Do not use in the current version.
%
% Outputs:
%
% J - value of objective function
%
% G -gradient of the objective function
%
%
%
% Jukka-Pekka Kauppi
% University of Helsinki, Department of Computer Science
%
% History:
% Multinomial logreg implemented 22.8.2012
% Binomial logreg implemented and comments added 23.8.2012



if nargin == 3
  alpha = 0;
  lambda = 1;
end


n = size(X,1);
d = size(X,2);
M = size(y,2);

% in two-category case, make sure that labels are given as column vector:
if min(size(y)) == 1
  y = y(:);
  M = 2;
end


% add column of ones to include bias to scalar products:
X = [X ones(size(X,1),1)];


if M > 2 % multi-category case:
  
  % order weights category-wise for easier handling:
  w = reshape(w,(d+1),M);
  
  % w = w.*we; % optional: "we" can be use to weight parameter
  
  q_tm = zeros(n,M); %n*M
  for m = 1:M
    q_tm(:,m) = X*w(:,m);
  end
    
  R = sum(exp(q_tm),2); % n*1;
  Ptm = exp(q_tm)./repmat(R,1,M); % n*M
  
  % COMPUTE GRADIENT:
  
  % gradient of log-likelihood:
  dw = zeros(d+1,M);
  for m = 1:M
    dw(:,m) = sum(X.*(repmat(y(:,m),1,d+1) - repmat(Ptm(:,m),1,d+1)),1)';
  end          
  
  % gradient of penalty:
  der_penalty = lambda*( alpha*( [sign(w(:))] ) + (1-alpha)*[w(:)] );
  
  % dw = dw.*we;
  
  % complete gradient:
  G = dw(:) - der_penalty;
  
  % flip signs because objective function sign is flipped (see below):
  G = -1*G;
  
  % COMPUTE OBJECTIVE FUNCTION:
  
  % log-likelihood:
  term1 = sum(y.*q_tm,2);
  term2 = -log(R);
  J = sum( term1 + term2 );
   
  % penalty term (prior):
  penalty = lambda*( alpha*( sum( abs(w(:)) ) ) + 0.5*(1-alpha)*( sum( w(:).^2 ) ) );
  
  % complete objective function:
  J = J - penalty;
  
  % convert maximization to minimization problem:
  J = -1*J;
  
  % disp(num2str(J)) % optional: display objective function value

else % two-category case:
                    
    q_t = X*w;
    exp_q_t = exp(q_t);
    R = 1 + exp_q_t;
    P_t = exp_q_t ./ R;
    
    % COMPUTE OBJECTIVE FUNCTION:
    
    % log-likelihood:
    J = sum( (y.*q_t) - log(R) );
    
    % penalty term (prior):
    penalty = lambda*( alpha*( sum( abs(w) ) ) + 0.5*(1-alpha)*( sum( w.^2 ) ) );
          
    % complete objective function:
    J = J - penalty;
    
    % convert maximization to minimization problem:
    J = -1*J;
            
    % COMPUTE GRADIENT:
    
    % gradient of log-likelihood:
      dw = sum(X.*(repmat(y,1,d+1) - repmat(P_t,1,d+1)), 1)';
    
    % gradient of penalty:
    der_penalty = lambda*( alpha*( [sign(w)] ) + (1-alpha)*w );
    
    % dw = dw.*we;
    
    % complete gradient:
    G = dw - der_penalty;
    
    % flip signs because objective function sign is flipped:
    G = -1*G;
    
end