#-*- coding: utf-8 -*- # Author: Bob # Date: 2016.11.24 import numpy as np from matplotlib import pyplot as plt from scipy import io as spio from sklearn.decomposition import pca ''' 主成分分析_2维数据降维1维演示函数 ''' def PCA_2D(): data_2d = spio.loadmat("data.mat") X = data_2d['X'] m = X.shape[0] plt = plot_data_2d(X,'bo') # 显示二维的数据 plt.show() X_copy = X.copy() X_norm,mu,sigma = featureNormalize(X_copy) # 归一化数据 #plot_data_2d(X_norm) # 显示归一化后的数据 #plt.show() Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma U,S,V = np.linalg.svd(Sigma) # 求Sigma的奇异值分解 plt = plot_data_2d(X,'bo') # 显示原本数据 drawline(plt, mu, mu+S[0]*(U[:,0]), 'r-') # 线,为投影的方向 plt.axis('square') plt.show() K = 1 # 定义降维多少维(本来是2维的,这里降维1维) '''投影之后数据(降维之后)''' Z = projectData(X_norm,U,K) # 投影 '''恢复数据''' X_rec = recoverData(Z,U,K) # 恢复 '''作图-----原数据与恢复的数据''' plt = plot_data_2d(X_norm,'bo') plot_data_2d(X_rec,'ro') for i in range(X_norm.shape[0]): drawline(plt, X_norm[i,:], X_rec[i,:], '--k') plt.axis('square') plt.show() '''主成分分析_PCA图像数据降维''' def PCA_faceImage(): print (u'加载图像数据.....') data_image = spio.loadmat('data_faces.mat') X = data_image['X'] display_imageData(X[0:100,:]) m = X.shape[0] # 数据条数 print (u'运行PCA....') X_norm,mu,sigma = featureNormalize(X) # 归一化 Sigma = np.dot(np.transpose(X_norm),X_norm)/m # 求Sigma U,S,V = np.linalg.svd(Sigma) # 奇异值分解 display_imageData(np.transpose(U[:,0:36])) # 显示U的数据 print (u'对face数据降维.....') K = 100 # 降维100维(原先是32*32=1024维的) Z = projectData(X_norm, U, K) print (u'投影之后Z向量的大小:%d %d' %Z.shape) print (u'显示降维之后的数据......') X_rec = recoverData(Z, U, K) # 恢复数据 display_imageData(X_rec[0:100,:]) # 可视化二维数据 def plot_data_2d(X,marker): plt.plot(X[:,0],X[:,1],marker) return plt # 归一化数据 def featureNormalize(X): '''(每一个数据-当前列的均值)/当前列的标准差''' n = X.shape[1] mu = np.zeros((1,n)); sigma = np.zeros((1,n)) mu = np.mean(X,axis=0) # axis=0表示列 sigma = np.std(X,axis=0) for i in range(n): X[:,i] = (X[:,i]-mu[i])/sigma[i] return X,mu,sigma # 映射数据 def projectData(X_norm,U,K): Z = np.zeros((X_norm.shape[0],K)) U_reduce = U[:,0:K] # 取前K个 Z = np.dot(X_norm,U_reduce) return Z # 画一条线 def drawline(plt,p1,p2,line_type): plt.plot(np.array([p1[0],p2[0]]),np.array([p1[1],p2[1]]),line_type) # 恢复数据 def recoverData(Z,U,K): X_rec = np.zeros((Z.shape[0],U.shape[0])) U_recude = U[:,0:K] X_rec = np.dot(Z,np.transpose(U_recude)) # 还原数据(近似) return X_rec # 显示图片 def display_imageData(imgData): sum = 0 ''' 显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可) - 初始化一个二维数组 - 将每行的数据调整成图像的矩阵,放进二维数组 - 显示即可 ''' m,n = imgData.shape width = np.int32(np.round(np.sqrt(n))) height = np.int32(n/width); rows_count = np.int32(np.floor(np.sqrt(m))) cols_count = np.int32(np.ceil(m/rows_count)) pad = 1 display_array = -np.ones((pad+rows_count*(height+pad),pad+cols_count*(width+pad))) for i in range(rows_count): for j in range(cols_count): max_val = np.max(np.abs(imgData[sum,:])) display_array[pad+i*(height+pad):pad+i*(height+pad)+height,pad+j*(width+pad):pad+j*(width+pad)+width] = imgData[sum,:].reshape(height,width,order="F")/max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行 sum += 1 plt.imshow(display_array,cmap='gray') #显示灰度图像 plt.axis('off') plt.show() if __name__ == "__main__": PCA_2D() PCA_faceImage()