文章

基于SVD的PCA算法

import numpy as np
import matplotlib.pyplot as plt

# 以三维空间中的某个平面为中心,产生包含噪声的数据点
n = 100
A = np.random.randn(3, n)

# 标准化
A = (A - np.mean(A, axis=1, keepdims=True)) / np.std(A, axis=1, keepdims=True)

# 使用 A^T·A 计算特征值和特征向量
ATA = np.dot(A.T, A)
eigvals, eigvecs = np.linalg.eig(ATA)

# 根据特征值和特征向量计算奇异值和右奇异向量
sigma = np.sqrt(eigvals)
V = eigvecs

# 计算左奇异向量
U = np.dot(A, np.dot(V, np.diag(1/sigma)))

# 保留最大的两个奇异值和相应的奇异向量
sigma_2 = sigma[:2]
U_2 = U[:, :2]
V_2 = V[:, :2]

# 降维后的数据
A_2 = np.dot(U_2, np.dot(np.diag(sigma_2), V_2.T))
A_1 = np.dot(U[:, 0:1], np.dot(np.diag(sigma[0:1]), V[:, 0:1].T))

# 使用 A·A^T 计算特征值和特征向量
AAT = np.dot(A, A.T)
eigvals, eigvecs = np.linalg.eig(AAT)

# 根据特征值和特征向量计算奇异值和左奇异向量
sigma = np.sqrt(eigvals)
U = eigvecs

# 计算右奇异向量
V = np.dot(A.T, np.dot(U, np.diag(1/sigma)))

# 保留最大的两个奇异值和相应的奇异向量
sigma_2 = sigma[:2]
U_2 = U[:, :2]
V_2 = V[:, :2]

# 分别可视化 A, A_2 和 A_1 (A^T·A)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(241, projection='3d')
ax.scatter(A[0], A[1], A[2])
ax.set_title('A (A^T·A)')

ax = fig.add_subplot(242, projection='3d')
ax.scatter(A_2[0], A_2[1], A_2[2])
ax.set_title('A_2 (A^T·A)')

ax = fig.add_subplot(243, projection='3d')
ax.scatter(A_1[0], A_1[1], A_1[2])
ax.set_title('A_1 (A^T·A)')

# 可视化 A, A_2 和 A_1 共图 (A^T·A)
ax = fig.add_subplot(244, projection='3d')
ax.scatter(A[0], A[1], A[2], label='A')
ax.scatter(A_2[0], A_2[1], A_2[2], label='A_2')
ax.scatter(A_1[0], A_1[1], A_1[2], label='A_1')
ax.set_title('A A_2 A_1 (A^T·A)')
ax.legend()

# 分别可视化 A, A_2 和 A_1 (A·A^T)
ax = fig.add_subplot(245, projection='3d')
ax.scatter(A[1], A[2], A[0])
ax.set_title('A (A·A^T)')

ax = fig.add_subplot(246, projection='3d')
ax.scatter(A_2[1], A_2[2], A_2[0])
ax.set_title('A_2 (A·A^T)')

ax = fig.add_subplot(247, projection='3d')
ax.scatter(A_1[1], A_1[2], A_1[0])
ax.set_title('A_1 (A·A^T)')

# 可视化 A, A_2 和 A_1 共图 (A·A^T)
ax = fig.add_subplot(248, projection='3d')
ax.scatter(A[1], A[2], A[0], label='A')
ax.scatter(A_2[1], A_2[2], A_2[0], label='A_2')
ax.scatter(A_1[1], A_1[2], A_1[0], label='A_1')
ax.set_title('A A_2 A_1 (A·A^T)')
ax.legend()

plt.tight_layout()
plt.show()

License:  CC BY 4.0