MATLAB实现偏最小,偏最小二乘法 matlab程序

大家好，欢迎来到IT知识分享网。

函数：

function [T,P,U,Q,B,W] = pls (X,Y,tol2)

% PLS Partial Least Squares Regrassion

% [T,P,U,Q,B,Q] = pls(X,Y,tol) performs particial least squares

regrassion

% between the independent variables, X and dependent Y as

% X = T*P’ + E;

% Y = U*Q’ + F = T*B*Q’ + F1;

% Inputs:

% X data matrix of independent variables

% Y data matrix of dependent variables

% tol the tolerant of convergence (defaut 1e-10)

% Outputs:

% T score matrix of X

% P loading matrix of X

% U score matrix of Y

% Q loading matrix of Y

% B matrix of regression coefficient

% W weight matrix of X

% Using the PLS model, for new X1, Y1 can be predicted as

% Y1 = (X1*P)*B*Q’ = X1*(P*B*Q’)

% or

% Y1 = X1*(W*inv(P’*W)*inv(T’*T)*T’*Y)

% Without Y provided, the function will return the principal

components as

% X = T*P’ + E

% Example: taken from Geladi, P. and Kowalski, B.R., “An example of

2-block

% predictive partial least-squares regression with simulated

data”,

% Analytica Chemica Acta, 185(1996) 19–32.

x=[4 9 6 7 7 8 3 2;6 15 10 15 17 22 9 4;8 21 14 23 27 36 15

10 21 14 13 11 10 3 4; 12 27 18 21 21 24 9 6; 14 33 22 29 31 38 15

16 33 22 19 15 12 3 6; 18 39 26 27 25 26 9 8;20 45 30 35 35 40 15

10];

y=[1 1;3 1;5 1;1 3;3 3;5 3;1 5;3 5;5 5];

% leave the last sample for test

N=size(x,1);

x1=x(1:N-1,:);

y1=y(1:N-1,:);

x2=x(N,:);

y2=y(N,:);

% normalization

xmean=mean(x1);

xstd=std(x1);

ymean=mean(y1);

ystd=std(y);

X=(x1-xmean(ones(N-1,1),:))./xstd(ones(N-1,1),:);

Y=(y1-ymean(ones(N-1,1),:))./ystd(ones(N-1,1),:);

% PLS model

[T,P,U,Q,B,W]=pls(X,Y);

% Prediction and error

yp = (x2-xmean)./xstd * (P*B*Q’);

fprintf(‘Prediction error: %g/n’,norm(yp-(y2-ymean)./ystd));

% By Yi Cao at Cranfield University on 2nd Febuary 2008

% Reference:

% Geladi, P and Kowalski, B.R., “Partial Least-Squares Regression:

% Tutorial”, Analytica Chimica Acta, 185 (1986) 1–7.

Input check

error(nargchk(1,3,nargin));

error(nargoutchk(0,6,nargout));

if nargin<2

Y=X;

end

tol = 1e-10;

if nargin<3

tol2=1e-10;

end

Size of x and y

[rX,cX] = size(X);

[rY,cY] = size(Y);

assert(rX==rY,’Sizes of X and Y mismatch.’);

Allocate memory to the maximum size

n=max(cX,cY);

T=zeros(rX,n);

P=zeros(cX,n);

U=zeros(rY,n);

Q=zeros(cY,n);

B=zeros(n,n);

W=P;

k=0;

% iteration loop if residual is larger than specfied

while norm(Y)>tol2 && k% choose the column of x has the

largest square of sum as t.

% choose the column of y has the largest square of sum as u.

[dummy,tidx] = max(sum(X.*X));

[dummy,uidx] = max(sum(Y.*Y));

t1 = X(:,tidx);

u = Y(:,uidx);

t = zeros(rX,1);

iteration for outer modeling until convergence

while norm(t1-t) > tol

w = X’*u;

w = w/norm(w);

t = t1;

t1 = X*w;

q = Y’*t1;

q = q/norm(q);

u = Y*q;

end

% update p based on t

t=t1;

p=X’*t/(t’*t);

pnorm=norm(p);

p=p/pnorm;

t=t*pnorm;

w=w*pnorm;

% regression and residuals

b = u’*t/(t’*t);

X = X – t*p’;

Y = Y – b*t*q’;

% save iteration results to outputs:

k=k+1;

T(:,k)=t;

P(:,k)=p;

U(:,k)=u;

Q(:,k)=q;

W(:,k)=w;

B(k,k)=b;

% uncomment the following line if you wish to see the

convergence

% disp(norm(Y))

end

T(:,k+1:end)=[];

P(:,k+1:end)=[];

U(:,k+1:end)=[];

Q(:,k+1:end)=[];

W(:,k+1:end)=[];

B=B(1:k,1:k);

例子：——————————————————————————————————————————————————————————

Principal Component Analysis and Partial Least Squares

% Principal Component Analysis (PCA) and Partial Least Squares

(PLS) are

% widely used tools. This code is to show their relationship

through the

% Nonlinear Iterative PArtial Least Squares (NIPALS) algorithm.

The Eigenvalue and Power Method

% The NIPALS algorithm can be derived from the Power method to

solve the

% eigenvalue problem. Let x be the eigenvector of a square matrix,

% corresponding to the eignvalue s:

% $$Ax=sx$$

% Modifying both sides by A iteratively leads to

% $$A^kx=s^kx$$

% Now, consider another vectro y, which can be represented as a

linear

% combination of all eigenvectors:

% $$y=/sum_i^n b_ix_i=Xb$$

% where

% $$X=/left[x_1/,/,/, /cdots/,/,/, x_n /right]$$

% and

% $$b = /left[b_1/,/,/, /cdots/,/,/, b_n /right]^T$$

% Modifying y by A gives

% $$Ay=AXb=XSb$$

% Where S is a diagnal matrix consisting all eigenvalues.

Therefore, for

% a large enough k,

% $$A^ky=XS^kb/approx /alpha x_1$$

% That is the iteration will converge to the direction of x_1,

which is the

% eigenvector corresponding to the eigenvalue with the maximum

module.

% This leads to the following Power method to solve the eigenvalue

problem.

A=randn(10,5);

% sysmetric matrix to ensure real eigenvalues

B=A’*A;

%find the column which has the maximum norm

[dum,idx]=max(sum(A.*A));

x=A(:,idx);

%storage to judge convergence

x0=x-x;

%convergence tolerant

tol=1e-6;

%iteration if not converged

while norm(x-x0)>tol

%iteration to approach the eigenvector direction

y=A’*x;

%normalize the vector

y=y/norm(y);

%save previous x

x0=x;

%x is a product of eigenvalue and eigenvector

x=A*y;

end

% the largest eigen value corresponding eigenvector is y

s=x’*x;

% compare it with those obtained with eig

[V,D]=eig(B);

[d,idx]=max(diag(D));

v=V(:,idx);

disp(d-s)

% v and y may be different in signs

disp(min(norm(v-y),norm(v+y)))

The NIPALS Algorithm for PCA

% The PCA is a dimension reduction technique, which is based on

the

% following decomposition:

% $$X=TP^T+E$$

% Where X is the data matrix (m x n) to be analysed, T is the so

called

% score matrix (m x a), P the loading matrix (n x a) and E the

residual.

% For a given tolerance of residual, the number of principal

components, a,

% can be much smaller than the orginal variable dimension, n.

% The above power algorithm can be extended to get T and P by

iteratively

% subtracting A (in this case, X) by x*y’ (in this case, t*p’)

until the

% given tolerance satisfied. This is the so called NIPALS

algorithm.

% The

data matrix with normalization

A=randn(10,5);

meanx=mean(A);

stdx=std(A);

X=(A-meanx(ones(10,1),:))./stdx(ones(10,1),:);

B=X’*X;

% allocate T and P

T=zeros(10,5);

P=zeros(5);

% tol for convergence

tol=1e-6;

% tol for PC of 95 percent

tol2=(1-0.95)*5*(10-1);

for k=1:5

%find the column which has the maximum norm

[dum,idx]=max(sum(X.*X));

t=A(:,idx);

%storage to judge convergence

t0=t-t;

%iteration if not converged

while norm(t-t0)>tol

%iteration to approach the eigenvector direction

p=X’*t;

%normalize the vector

p=p/norm(p);

%save previous t

t0=t;

%t is a product of eigenvalue and eigenvector

t=X*p;

end

%subtracing PC identified

X=X-t*p’;

T(:,k)=t;

P(:,k)=p;

if norm(X)break

end

T(:,k+1:5)=[];

P(:,k+1:5)=[];

S=diag(T’*T);

% compare it with those obtained with eig

[V,D]=eig(B);

[D,idx]=sort(diag(D),’descend’);

D=D(1:k);

V=V(:,idx(1:k));

fprintf(‘The number of PC: %i/n’,k);

fprintf(‘norm of score difference between EIG and NIPALS:

%g/n’,norm(D-S));

fprintf(‘norm of loading difference between EIG and NIPALS:

%g/n’,norm(abs(V)-abs(P)));

The NIPALS Algorithm for PLS

% For PLS, we will have two sets of data: the independent X and

dependent

% Y. The NIPALS algorithm can be used to decomposes both X and Y so

that

% $$X=TP^T+E,/,/,/,/,Y=UQ^T+F,/,/,/,/,U=TB$$

% The regression, U=TB is solved through least sequares whilst

the

% decompsition may not include all components. That is why the

approach is

% called partial least squares. This algorithm is implemented in

the PLS

% function.

Example: Discriminant PLS using the NIPALS Algorithm

% From Chiang, Y.Q., Zhuang, Y.M and Yang, J.Y, “Optimal

Fisher

% discriminant analysis using the rank decomposition”, Pattern

Recognition,

% 25 (1992), 101–111.

Three classes data, each has 50 samples and 4 variables.

x1=[5.1 3.5 1.4 0.2; 4.9 3.0 1.4 0.2; 4.7 3.2 1.3 0.2; 4.6 3.1 1.5

0.2;…

5.0 3.6 1.4 0.2; 5.4 3.9 1.7 0.4; 4.6 3.4 1.4 0.3; 5.0 3.4 1.5 0.2;

…

4.4 2.9 1.4 0.2; 4.9 3.1 1.5 0.1; 5.4 3.7 1.5 0.2; 4.8 3.4 1.6 0.2;

…

4.8 3.0 1.4 0.1; 4.3 3.0 1.1 0.1; 5.8 4.0 1.2 0.2; 5.7 4.4 1.5 0.4;

…

5.4 3.9 1.3 0.4; 5.1 3.5 1.4 0.3; 5.7 3.8 1.7 0.3; 5.1 3.8 1.5 0.3;

…

5.4 3.4 1.7 0.2; 5.1 3.7 1.5 0.4; 4.6 3.6 1.0 0.2; 5.1 3.3 1.7 0.5;

…

4.8 3.4 1.9 0.2; 5.0 3.0 1.6 0.2; 5.0 3.4 1.6 0.4; 5.2 3.5 1.5 0.2;

…

5.2 3.4 1.4 0.2; 4.7 3.2 1.6 0.2; 4.8 3.1 1.6 0.2; 5.4 3.4 1.5 0.4;

…

5.2 4.1 1.5 0.1; 5.5 4.2 1.4 0.2; 4.9 3.1 1.5 0.2; 5.0 3.2 1.2 0.2;

…

5.5 3.5 1.3 0.2; 4.9 3.6 1.4 0.1; 4.4 3.0 1.3 0.2; 5.1 3.4 1.5 0.2;

…

5.0 3.5 1.3 0.3; 4.5 2.3 1.3 0.3; 4.4 3.2 1.3 0.2; 5.0 3.5 1.6 0.6;

…

5.1 3.8 1.9 0.4; 4.8 3.0 1.4 0.3; 5.1 3.8 1.6 0.2; 4.6 3.2 1.4 0.2;

…

5.3 3.7 1.5 0.2; 5.0 3.3 1.4 0.2];

x2=[7.0 3.2 4.7 1.4; 6.4 3.2 4.5 1.5; 6.9 3.1 4.9 1.5; 5.5 2.3 4.0

1.3; …

6.5 2.8 4.6 1.5; 5.7 2.8 4.5 1.3; 6.3 3.3 4.7 1.6; 4.9 2.4 3.3 1.0;

…

6.6 2.9 4.6 1.3; 5.2 2.7 3.9 1.4; 5.0 2.0 3.5 1.0; 5.9 3.0 4.2 1.5;

…

6.0 2.2 4.0 1.0; 6.1 2.9 4.7 1.4; 5.6 2.9 3.9 1.3; 6.7 3.1 4.4 1.4;

…

5.6 3.0 4.5 1.5; 5.8 2.7 4.1 1.0; 6.2 2.2 4.5 1.5; 5.6 2.5 3.9 1.1;

…

5.9 3.2 4.8 1.8; 6.1 2.8 4.0 1.3; 6.3 2.5 4.9 1.5; 6.1 2.8 4.7 1.2;

…

6.4 2.9 4.3 1.3; 6.6 3.0 4.4 1.4; 6.8 2.8 4.8 1.4; 6.7 3.0 5.0 1.7;

…

6.0 2.9 4.5 1.5; 5.7 2.6 3.5 1.0; 5.5 2.4 3.8 1.1; 5.5 2.4 3.7 1.0;

…

5.8 2.7 3.9 1.2; 6.0 2.7 5.1 1.6; 5.4 3.0 4.5 1.5; 6.0 3.4 4.5 1.6;

…

6.7 3.1 4.7 1.5; 6.3 2.3 4.4 1.3; 5.6 3.0 4.1 1.3; 5.5 2.5 5.0 1.3;

…

5.5 2.6 4.4 1.2; 6.1 3.0 4.6 1.4; 5.8 2.6 4.0 1.2; 5.0 2.3 3.3 1.0;

…

5.6 2.7 4.2 1.3; 5.7 3.0 4.2 1.2; 5.7 2.9 4.2 1.3; 6.2 2.9 4.3 1.3;

…

5.1 2.5 3.0 1.1; 5.7 2.8 4.1 1.3];

x3=[6.3 3.3 6.0 2.5; 5.8 2.7 5.1 1.9; 7.1 3.0 5.9 2.1; 6.3 2.9 5.6

1.8; …

6.5 3.0 5.8 2.2; 7.6 3.0 6.6 2.1; 4.9 2.5 4.5 1.7; 7.3 2.9 6.3 1.8;

…

6.7 2.5 5.8 1.8; 7.2 3.6 6.1 2.5; 6.5 3.2 5.1 2.0; 6.4 2.7 5.3 1.9;

…

6.8 3.0 5.5 2.1; 5.7 2.5 5.0 2.0; 5.8 2.8 5.1 2.4; 6.4 3.2 5.3 2.3;

…

6.5 3.0 5.5 1.8; 7.7 3.8 6.7 2.2; 7.7 2.6 6.9 2.3; 6.0 2.2 5.0 1.5;

…

6.9 3.2 5.7 2.3; 5.6 2.8 4.9 2.0; 7.7 2.8 6.7 2.0; 6.3 2.7 4.9 1.8;

…

6.7 3.3 5.7 2.1; 7.2 3.2 6.0 1.8; 6.2 2.8 4.8 1.8; 6.1 3.0 4.9 1.8;

…

6.4 2.8 5.6 2.1; 7.2 3.0 5.8 1.6; 7.4 2.8 6.1 1.9; 7.9 3.8 6.4 2.0;

…

6.4 2.8 5.6 2.2; 6.3 2.8 5.1 1.5; 6.1 2.6 5.6 1.4; 7.7 3.0 6.1 2.3;

…

6.3 3.4 5.6 2.4; 6.4 3.1 5.5 1.8; 6.0 3.0 4.8 1.8; 6.9 3.1 5.4 2.1;

…

6.7 3.1 5.6 2.4; 6.9 3.1 5.1 2.3; 5.8 2.7 5.1 1.9; 6.8 3.2 5.9 2.3;

…

6.7 3.3 5.7 2.5; 6.7 3.0 5.2 2.3; 6.3 2.5 5.0 1.9; 6.5 3.0 5.2 2.0;

…

6.2 3.4 5.4 2.3; 5.9 3.0 5.1 1.8];

%Split data set into training (1:25) and testing (26:50)

idxTrain = 1:25;

idxTest = 26:50;

Combine training data with normalization

X = [x1(idxTrain,:);x2(idxTrain,:);x3(idxTrain,:)];

% Define class indicator as Y

Y = kron(eye(3),ones(25,1));

% Normalization

xmean = mean(X);

xstd = std(X);

ymean = mean(Y);

ystd = std(Y);

X = (X – xmean(ones(75,1),:))./xstd(ones(75,1),:);

Y = (Y – ymean(ones(75,1),:))./ystd(ones(75,1),:);

% Tolerance for 90 percent score

tol = (1-0.9) * 25 * 4;

% Perform PLS

[T,P,U,Q,B] = pls(X,Y,tol);

% Results

fprintf(‘Number of components retained: %i/n’,size(B,1))

% Predicted classes

X1 = (x1(idxTest,:) –

xmean(ones(25,1),:))./xstd(ones(25,1),:);

X2 = (x2(idxTest,:) –

xmean(ones(25,1),:))./xstd(ones(25,1),:);

X3 = (x3(idxTest,:) –

xmean(ones(25,1),:))./xstd(ones(25,1),:);

Y1 = X1 * (P*B*Q’);

Y2 = X2 * (P*B*Q’);

Y3 = X3 * (P*B*Q’);

Y1 = Y1 .* ystd(ones(25,1),:) + ymean(ones(25,1),:);

Y2 = Y2 .* ystd(ones(25,1),:) + ymean(ones(25,1),:);

Y3 = Y3 .* ystd(ones(25,1),:) + ymean(ones(25,1),:);

% Class is determined from the cloumn which is most close to

[dum,classid1]=min(abs(Y1-1),[],2);

[dum,classid2]=min(abs(Y2-1),[],2);

[dum,classid3]=min(abs(Y3-1),[],2);

bar(1:25,classid1,’b’);

hold on

bar(26:50,classid2,’r’);

bar(51:75,classid3,’g’);

hold off

免责声明：本站所有文章内容,图片，视频等均是来源于用户投稿和互联网及文摘转载整编而成，不代表本站观点，不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益，请在线联系站长,一经查实,本站将立刻删除。本文来自网络,若有侵权，请联系删除，如若转载，请注明出处：https://yundeesoft.com/10778.html

MATLAB实现偏最小,偏最小二乘法 matlab程序

相关推荐

发表回复