diff --git a/lsq_classopath.m b/lsq_classopath.m index e363a2b..b43b818 100644 --- a/lsq_classopath.m +++ b/lsq_classopath.m @@ -9,8 +9,7 @@ % Aeq*beta = beq and linear inequality constraints A*beta <= b. The % result RHO_PATH contains the values of the tuning parameter rho along % the solution path. The result BETA_PATH has a vector of the estimated -% regression coefficients for each value of rho. If the design matrix is -% +% regression coefficients for each value of rho. % % [RHO_PATH, BETA_PATH] = LSQ_CLASSOPATH(X, y, A, b, Aeq, beq, ... % 'PARAM1', val1, 'PARAM2', val2, ...) allows you to specify optional @@ -28,10 +27,6 @@ % OPTIONAL NAME-VALUE PAIRS: % % 'qp_solver': 'matlab' (default), 'gurobi' currently is not working -% 'penidx': a logical vector indicating penalized coefficients -% 'init_method': 'qp' (default) or 'lp' method to initialize. 'lp is -% recommended only when it's reasonable to assume that all -% coefficient estimates initialize at zero. % 'epsilon': tuning parameter for ridge penalty. Default is 1e-4. % % OUTPUTS: @@ -64,16 +59,12 @@ argin.addRequired('Aeq', @(x) size(x,2)==p || isempty(x)); argin.addRequired('beq', @(x) isnumeric(x) || isempty(x)); argin.addParamValue('qp_solver', 'matlab', @ischar); -argin.addParamValue('init_method', 'qp', @ischar); -argin.addParamValue('penidx', true(p,1), @(x) islogical(x) && length(x)==p); argin.addParamValue('epsilon', 1e-4, @isnumeric); % parse inputs y = reshape(y, n, 1); argin.parse(X, y, A, b, Aeq, beq, varargin{:}); qp_solver = argin.Results.qp_solver; -init_method = argin.Results.init_method; -penidx = reshape(argin.Results.penidx,p,1); epsilon = argin.Results.epsilon; % check validity of qp_solver @@ -82,17 +73,6 @@ 'qp_solver not recognized'); end -% check validity of initialization method -if ~(strcmpi(init_method, 'qp') || strcmpi(init_method, 'lp')) - error('sparsereg:lsq_classopath:init_method', ... - 'init_method not recognized'); -end - -% issue warning if LP is used for initialization -if strcmpi(init_method, 'lp') - warning('LP used for initialization, assumes initial solution is unique') -end - % save original number of observations (for when ridge penalty is used) n_orig = n; @@ -150,110 +130,95 @@ violationsPath = Inf(1, maxiters); -%# intialization +%# intialization (LP based for all cases) H = X'*X; -% if strcmpi(qp_solver, 'matlab') - % using matlab - if strcmpi(init_method, 'lp') - % use Matlab lsqlin - [x,~,~,~,lambda] = ... - linprog(ones(2*p,1), [A -A], b, [Aeq -Aeq], beq, ... - zeros(2*p,1), inf(2*p,1)); - betaPath(:,1) = x(1:p) - x(p+1:end); - dualpathEq(:,1) = lambda.eqlin; - dualpathIneq(:,1) = lambda.ineqlin; - elseif strcmpi(init_method, 'qp') - %# First use LP to find rho_max - % solve LP problem - [x,~,~,~,lambda] = ... - linprog(ones(2*p,1),[A -A],b,[Aeq -Aeq],beq, ... - zeros(2*p,1), inf(2*p,1)); - betaPath(:,1) = x(1:p) - x(p+1:end); - dualpathEq(:,1) = lambda.eqlin; - dualpathIneq(:,1) = lambda.ineqlin; - % initialize sets - dualpathIneq(dualpathIneq(:,1) < 0,1) = 0; % fix negative dual variables - setActive = abs(betaPath(:,1))>1e-4 | ~penidx; - betaPath(~setActive,1) = 0; - % find the maximum rho and initialize subgradient vector + + % compute solution for rho->inf via LP + [x,~,~,~,lambda] = ... + linprog(ones(2*p,1), [A -A], b, [Aeq -Aeq], beq, ... + zeros(2*p,1), inf(2*p,1)); + betaPath(:,1) = x(1:p) - x(p+1:end); + + % if all coefficients initialize at zero, use Rosset and Zhu (2007) type + % initialization + numZero = 1e-6; % set threshold for numerically equal to zero + if sum(abs(betaPath(:,1))>numZero)==0 % + dualpathEq(:,1) = lambda.eqlin; %every real number works here + dualpathIneq(:,1) = lambda.ineqlin; % lambda.ineqlin from LP ensures compl. slackness so this choice works resid = y - X*betaPath(:, 1); subgrad = X'*resid - Aeq'*dualpathEq(:,1) - A'*dualpathIneq(:,1); - rho_max = max(abs(subgrad)); + rhoPath(1) = max(abs(subgrad)); + + else % if at least one coefficient initializes not at zero, use second LP to obtain + % valid Lagrange multipliers (LMs) + + % identify sets + idtBiggerZero = betaPath(:,1) > numZero; + idtSmallerZero = betaPath(:,1) < -numZero; + idtNotActive = abs(betaPath(:,1)) <= numZero; + idtActive = idtNotActive == 0; + sActive = idtBiggerZero - idtSmallerZero; + sActive(sActive==0)=[]; + nInactive = sum(idtNotActive); + + % identify inequalities that bind at rho->inf solution + idtBinding = abs(A * betaPath(:,1) - b) < numZero; + nBinding = sum(idtBinding); + + % set up LP over LMs + resid = y - X*betaPath(:, 1); + r = - X'*resid; + V = [Aeq' A(idtBinding,:)']; + fInit = zeros(size(dualpathEq,1)+nBinding,1); + Ainit = [V(idtActive,:);-V(idtActive,:); V(idtNotActive,:);-V(idtNotActive,:);zeros(nBinding,size(V(idtNotActive,:),2))]; + if nBinding>0 + Ainit(end-nBinding+1:end,end-nBinding+1:end) = -eye(nBinding); % Binding LMs for inequality constraints must be positive + end + + % set very large rhoStart and run LP. Repeat with larger rhoStart + % if no solution could be found that solves the stationarity + % condition + rhoStart = 1000; + statCondHolds = 0; + while statCondHolds == 0 + + % inside loop because it depends on rhoStart + binit = [-r(idtActive,1)-rhoStart*sActive;+r(idtActive,1)+rhoStart*sActive;rhoStart * ones(nInactive,1) - r(idtNotActive,1); rhoStart * ones(nInactive,1) + r(idtNotActive,1); zeros(nBinding,1)]; + + % run LP over LMs + g = linprog(fInit,Ainit,binit); + + % remeber LMs + dualpathEq(:,1) = g(1:size(Aeq,1)); + dualpathIneq(:,1) = zeros(size(A,1),1); + dualpathIneq(idtBinding,1) = g(size(Aeq,1)+1:end); + + % get errors in stat. cond. equations + SSEStatCon = norm(r(idtActive,1) + rhoStart * sActive + V(idtActive,:)*g,2); + + %check whether stationarity condition of active coefficients + %holds + statCondHolds = abs(SSEStatCon) < numZero; + if statCondHolds == 0 + rhoStart = rhoStart * 10; + end + end + + rhoPath(1) = rhoStart; - %# Use QP at rho_max to initialize - [betaPath(:,1), stats] = lsq_constrsparsereg(X, y, ... - (rho_max*1), ... - 'method', 'qp', 'qp_solver', 'matlab', 'Aeq', Aeq,... - 'beq', beq, 'A', A, 'b', b); - dualpathEq(:,1) = stats.qp_dualEq; - dualpathIneq(:,1) = stats.qp_dualIneq; end - -% elseif strcmpi(qp_solver, 'GUROBI') -% % use GUROBI solver if possible -% if strcmpi(init_method, 'lp') -% % linear programming -% gmodel.obj = ones(2*p,1); -% gmodel.A = sparse([A -A; Aeq -Aeq]); -% gmodel.sense = [repmat('<', m2, 1); repmat('=', m1, 1)]; -% gmodel.rhs = [b; beq]; -% gmodel.lb = zeros(2*p,1); -% gparam.OutputFlag = 0; -% gresult = gurobi(gmodel, gparam); -% betaPath(:,1) = gresult.x(1:p) - gresult.x(p+1:end); -% dualpathEq(:,1) = reshape(gresult.pi(m2+1:end), m1, 1); -% dualpathIneq(:,1) = reshape(gresult.pi(1:m2), m2, 1); -% elseif strcmpi(init_method, 'qp') -% %# First use LP to find rho_max -% % solve LP problem -% gmodel.obj = ones(2*p,1); -% gmodel.A = sparse([A -A; Aeq -Aeq]); -% gmodel.sense = [repmat('<', m2, 1); repmat('=', m1, 1)]; -% gmodel.rhs = [b; beq]; -% gmodel.lb = zeros(2*p,1); -% gparam.OutputFlag = 0; -% gresult = gurobi(gmodel, gparam); -% betaPath(:,1) = gresult.x(1:p) - gresult.x(p+1:end); -% dualpathEq(:,1) = reshape(gresult.pi(m2+1:end), m1, 1); -% dualpathIneq(:,1) = reshape(gresult.pi(1:m2), m2, 1); -% % initialize sets -% dualpathIneq(dualpathIneq(:,1) < 0,1) = 0; % fix negative dual variables -% setActive = abs(betaPath(:,1))>1e-4 | ~penidx; -% betaPath(~setActive,1) = 0; -% % find the maximum rho and initialize subgradient vector -% resid = y - X*betaPath(:, 1); -% subgrad = X'*resid - Aeq'*dualpathEq(:,1) - A'*dualpathIneq(:,1); -% rho_max = max(abs(subgrad)); -% -% -% % quadratic programming -% [betaPath(:,1), stats] = lsq_constrsparsereg(X, y, ... -% rho_max,... -% 'method','qp','qp_solver','gurobi','Aeq', Aeq,... -% 'beq', beq, 'A',A,'b',b); -% dualpathEq(:,1) = stats.qp_dualEq; -% dualpathIneq(:,1) = stats.qp_dualIneq; -% end -% end - -% may wanna switch this so the first rho isn't re-calculated? + % initialize sets dualpathIneq(dualpathIneq(:,1) < 0,1) = 0; % fix Gurobi negative dual variables -setActive = abs(betaPath(:,1))>1e-4 | ~penidx; +setActive = abs(betaPath(:,1)) > numZero; betaPath(~setActive,1) = 0; -% setIneqBorder = dualpathIneq(:,1)>0; +setIneqBorder = dualpathIneq(:,1) > numZero; residIneq = A*betaPath(:,1) - b; -setIneqBorder = residIneq == 0; nIneqBorder = nnz(setIneqBorder); -% find the maximum rho and initialize subgradient vector +% initialize subgradient vector resid = y - X*betaPath(:, 1); -subgrad = X'*resid - Aeq'*dualpathEq(:,1) - A'*dualpathIneq(:,1); -% subgrad(setActive) = 0; -[rhoPath(1), idx] = max(abs(subgrad)); -subgrad(setActive) = sign(betaPath(setActive,1)); -subgrad(~setActive) = subgrad(~setActive)/rhoPath(1); -setActive(idx) = true; +subgrad = 1/rhoPath(1) * (X'*resid - Aeq'*dualpathEq(:,1) - A'*dualpathIneq(:,1)); nActive = nnz(setActive); % calculate value for objective function @@ -263,7 +228,6 @@ rankAeq = rank(Aeq); % should do this more efficiently dfPath(1) = nActive - rankAeq - nIneqBorder; - % set initial violations counter to 0 violationsPath(1) = 0; @@ -620,7 +584,6 @@ % calculate derivative for residual inequality dirResidIneq = A(~setIneqBorder, setActive)*dir(1:nActive); - %### Determine rho for next event (via delta rho) ###% @@ -647,7 +610,7 @@ % choose smaller delta rho out of t1 and t2 nextrhoBeta(~setActive) = min(t1, t2); % ignore delta rhos numerically equal to zero - nextrhoBeta(nextrhoBeta <= 1e-8 | ~penidx) = inf; + nextrhoBeta(nextrhoBeta <= 1e-8) = inf; %## Events based inequality constraints ##% @@ -664,7 +627,7 @@ % ignore delta rhos equal to zero nextrhoIneq(nextrhoIneq <= 1e-8) = inf; - + %# determine next rho ## % find smallest rho chgrho = min([nextrhoBeta; nextrhoIneq]);