R语言主成分分析之SVD

时间:2023-01-27 17:02:03
 #全数据集PCA
    all_col_mean = colMeans(data.learn.x) #计算训练集每一列的均值
    data.learn.PCAx = data.learn.x
    cols = ncol(data.learn.x) #获取列数
    all_col_sd = apply(data.learn.x,2,sd)
    for (j in 1:cols){
      data.learn.PCAx[,j] =  data.learn.x[,j] - all_col_mean[j]
      data.learn.PCAx[,j] = data.learn.PCAx[,j]/all_col_sd[j]
    } #对训练集每一列特征值进行标准化
    
    data.learn.cov <- cov(data.learn.PCAx,data.learn.PCAx)#求协方差矩阵
    data.learn.svd = svd(data.learn.cov)#SVD分解为 U d V
    all_U <- data.learn.svd$u[,1:REDUCTION] #保留REDUCTION维,约一半
    lamda = 1/sqrt(data.learn.svd$d) #计算方差倒数
    lamda = lamda[1:REDUCTION] #选择前REDUCTION维
    for (i in 1:REDUCTION){
      all_U[,i] <-  all_U[,i] * lamda[i] #ZCA白化
    }
    data.learn.PCAx = data.learn.PCAx%*%all_U #原特征正交旋转并降维
    colnames(data.learn.PCAx) = c("V1","V2","V3","V4")
    
    data.valid.PCAx = data.valid.x
    for (j in 1:cols){
      data.valid.PCAx[,j] =  data.valid.x[,j] - all_col_mean[j]
      data.valid.PCAx[,j] = data.valid.PCAx[,j]/all_col_sd[j]
    } #对测试集每一列特征值进行标准化
    data.valid.PCAx = data.valid.PCAx%*%all_U #原特征正交旋转并降维    
    colnames(data.valid.PCAx) = c("V1","V2","V3","V4")