A
download lsselect.m
Language: Matlab
LOC: 169
Project Info
MatLinks/Chorus(matlinks)
Server: SourceForge
Type: cvs
...chest\copyleft\gpl\stixbox\
   bincoef.m
   cat2tbl.m
   ciboot.m
   ciquant.m
   cmpmod.m
   Contents.m
   contincy.m
   corr.m
   covboot.m
   covjack.m
   cvar.m
   datas1.m
   datas10.m
   datas11.m
   datas2.m
   datas3.m
   datas4.m
   datas5.m
   datas6.m
   datas7.m
   datas8.m
   datas9.m
   dbeta.m
   dbinom.m
   dchisq.m
   df.m
   dgamma.m
   dgumbel.m
   dhypg.m
   dlognorm.m
   dnorm.m
   dt.m
   dweib.m
   egumbel.m
   getdata.m
   histo.m
   identif5.m
   identify.m
   kaplamai.m
   ldiscrim.m
   linreg.m
   lodds.m
   loddsinv.m
   logitfit.m
   lsfit.m
   lsselect.m
   normmix.m
   octvinit.m
   pairs.m
   pbeta.m
   pbinom.m
   pchisq.m
   pf.m
   pgamma.m
   pgumbel.m
   phypg.m
   plognorm.m
   plotdens.m
   plotempd.m
   plotsym.m
   pnorm.m
   pt.m
   pweib.m
   qbeta.m
   qbinom.m
   qchisq.m
   qf.m
   qgamma.m
   qgumbel.m
   qhypg.m
   qlognorm.m
   qnorm.m
   qqgamma.m
   qqgumbel.m
   qqnorm.m
   qqplot.m
   qqweib.m
   qt.m
   quantile.m
   qweib.m
   ranktrf.m
   rbeta.m
   rbinom.m
   rboot.m
   rchisq.m
   Readme.m
   rf.m
   rgamma.m
   rgumbel.m
   rhypg.m
   rlognorm.m
   rnorm.m
   rt.m
   rweib.m
   spearman.m
   stdboot.m
   stdize.m
   stdjack.m
   stixdemo.m
   test1b.m
   test1n.m
   test1r.m
   test2n.m
   test2r.m

function [Q, I, B, BB] = lsselect(y,x,crit,how,pmax,level);
%LSSELECT Select a predictor subset for regression
%
%     	  [Q, I, B, BB] = lsselect(y,x,crit,how,pmax,level)
%
%	  Selects a good subset of regressors in a multiple linear 
%	  regression model. The criterion is one of the following 
%	  ones, determined by the third argument, crit. 
%
%	    'HT'   Hypothesis Test (default level = 0.05)
%	    'AIC'  Akaike's Information Criterion 
%	    'BIC'  Bayesian Information Criterion
%	    'CMV'  Cross Model Validation (inner criterion RSS)
%
%	  The forth argument, how, choses between 
%
%	    'AS'   All Subsets
%	    'FI'   Forward Inclusion
%	    'BE'   Backward Elimination
%
%	  The fifth argument, pmax, limits the number of included
%	  parameters. The returned Q is the criterion as a function of 
%	  the number of parameters; it might be interpreted as an 
%	  estimate of the prediction standard deviation. For the method
%	  'HT' the reported Q is instead the successive p-values for
%	  inclusion or elimination.
%
%	  The last column of the prediction matrix x must be an intercept 
%	  column, ie all elements are ones. This column is never excluded 
%	  in the search for a good model. If it is not present it is added.  
%	  The output I contains the index numbers of the included columns.
%	  For the method 'HT' the optional input argument "level" is the 
%	  p-value reference used for inclusion or deletion. Output B is
%	  the vector of coefficients, ie the suggested model is 
%	  Y = X*B. Column p of BB is the best B of parameter size p. 
%
%	  This function is not highly optimized for speed but rather for
%	  flexibility. It would be faster if 'all subsets' were in a 
%	  separate routine and 'forward' and 'backward' were in another
%	  routine, especially for CMV.
%
%         See also LSFIT and LINREG.

%       Anders Holtsberg, 14-12-94
%       Copyright (c) Anders Holtsberg

n = length(y);
nc = size(x,2);

if nargin<5 
   pmax = nc;
elseif isempty(pmax)
   pmax = nc;
end
if nargin<4
   how = 'FI';
   fprintf('   Default is forward selection\n');
end
if nargin<3
   crit = 'CMV';
   fprintf('   Default criterion is CMV\n');
end
if strcmp(how,'BE')
   pmax = nc;
end
if nargin<6 & strcmp(crit,'HT') 
   level = 0.05;
   fprintf('   Default level is 0.05\n');
end

if any(x(:,nc)~=1)
   fprintf('   An intercept column added')
   x = [x ones(n,1)];
   nc = nc + 1;
   pmax = pmax + 1;
end
if nc<2, disp('only one model'), return, end 

if ~(strcmp(crit,'HT')|strcmp(crit,'AIC')|...
     strcmp(crit,'BIC')|strcmp(crit,'CMV'))
  error('Third argument error');
end
if ~(strcmp(how,'BE')|strcmp(how,'FI')|strcmp(how,'AS'))
  error('Forth argument error');
end
      
Qsml = NaN*ones(pmax,1);

XX = x'*x;
XY = x'*y; 
YY = y'*y;  

% === If all subsets then set up an all-subsets-indicator-matrix ====

if strcmp(how,'AS')
   C = [];
   for i = 1:nc-1
      d = max(1,size(C,1));
      C = [zeros(d,1) C; ones(d,1) C];
      H = C * ones(size(C,2),1);
      J = find(H<pmax);
      C = C(J,:);
   end
   H = H(J);
   [S,I] = sort(H);
   C = C(I,:);
   C = [C ones(size(C,1),1)];
   AllSubsets = C;
   AllSubsetsH = sum(C')';
end

% === This is for CMV ===============================================

if strcmp(crit,'CMV')
   dataloopend = n + 1;
   Qcmv = zeros(pmax,1);
   XXs = XX;
   XYs = XY;
   YYs = YY;
else
   dataloopend = 1;
end

for idata = 1:dataloopend
   
   if strcmp(crit,'CMV')
      fprintf('\n %3.0f:', idata*(idata ~= dataloopend))
      if idata == dataloopend
         XX = XXs;
         XY = XYs;
         YY = YYs;
      else
         xi = x(idata,:);
         yi = y(idata);
         XX = XXs - xi'*xi;
         XY = XYs - xi'*yi;
         YY = YYs - yi^2; 
      end
   end

% === Now we begin to loop over model sizes =========================

   if strcmp(how,'BE')
      p = nc;
      loopto = 1;
      C = ones(1,nc);
   else
      p = 1;
      loopto = pmax;
      C = [zeros(1,nc-1) 1];
   end
   BB = zeros(nc,pmax);

   fprintf('  ');

   while 1;

% === The whole loop over the models of size p  ======================
     
      fprintf('%2.0f ', p)
      qmin = 1e99;
      for k = 1:size(C,1)
         Jk = find(C(k,:));
         q = YY - XY(Jk)'*(XX(Jk,Jk)\XY(Jk));               
         if q < qmin
            qmin = q;
            Cbest = C(k,:);
         end
      end
      Qsml(p) = qmin;
      I = find(Cbest);
      BB(I,p)  = XX(I,I)\XY(I);
   
% === And a piece for CMV only ======================================

      if strcmp(crit,'CMV') & idata < n + 1
         Jk = find(Cbest);
         q = yi - xi(Jk)*(XX(Jk,Jk)\XY(Jk));               
         Qcmv(p) = Qcmv(p) + q^2;
      end

% === Next parameter size ===========================================
   
      if p == loopto, break, end

      if strcmp(how,'FI')
         p = p + 1;
         C = [];
         for i = 1:nc-1
            if Cbest(i)==0
               Cnew = Cbest;
               Cnew(i) = 1;
               C = [C; Cnew];
            end
         end

      elseif strcmp(how,'BE')
         p = p - 1;
         C = [];
         for i = 1:nc-1
            if Cbest(i)==1
               Cnew = Cbest;
               Cnew(i) = 0;
               C = [C; Cnew];
            end
         end
   
      elseif strcmp(how,'AS')
         p = p + 1;
         C = AllSubsets(find(AllSubsetsH==p),:);
      end
   
   end
end

% === Finished at last ===============================================

if strcmp(crit,'CMV') 
   Q = sqrt(Qcmv/n);
end
if strcmp(crit,'AIC') 
   Q = sqrt(Qsml/n).*exp((1:pmax)'/n);
end;
if strcmp(crit,'BIC')
   Q = sqrt(Qsml/n).*exp((1:pmax)'/n*log(n)/2);
end

if strcmp(crit,'HT')
   Qsml = sqrt(Qsml/n);
   Fvals = (Qsml(1:pmax-1).^2 - Qsml(2:pmax).^2) ...
            ./ (Qsml(2:pmax).^2 ./ (n-(2:pmax))' );
   Q = 1-pf(Fvals,1,(n-(2:pmax))');
   i = find(Q>level);
   if strcmp(how,'BE')
      i = [1; Q<level];
      i = find(i);
      i = i(length(i));
   else
      i = [Q>level; 1];
      i = find(i);
      i = i(1);
   end
else
   [S,i] = sort(Q);
   i = i(1);
end

B = BB(:,i);
I = find(B);

fprintf('\n');

About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us