Untitled page

实验内容:以三维空间中的某个平面为中心(均值),产生一些列包含噪声的数据点,假设所有数据存储在一个矩阵 $$A∈R^{3×n}$$中,使用基于 SVD 的 PCA 算法对这批数据降维,降为二维数据 $$A_2∈R^{2×n}$$和一维数据 $$A_1∈R^{1×n}$$,并将原始数据和降维后数据可视化出来,同时将SVD的空间变换原理(即行空间的基变换为列空间的基;反之亦然)用可视化的手段展现出来。注意:不允许使用Python中相关的SVD包和PCA包,必须使用原生Python自己实现SVD和PCA。实验步骤:参考步骤:1. 生成数据A;2. 对A进行标准化;3. 计算$$A^TA$$的特征值和特征向量,从而获得奇异值Σ和右奇异向量V;4. 根据奇异值Σ和右奇异向量V,计算得到左奇异向量U,这样便得到A = UΣVT;5. 保留最大的两个(或一个)奇异值以及相应的奇异向量,产生降维后数据$$A_2∈R^{2×n} $$和 $$A_1∈R^{1×n}$$;6. 将A、A2和A1可视化出来,将“行空间的基变换为列空间的基”这一过程可视化出来;7. 从$$AA^T$$出发再次实现降维和可视化。

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

# 生成数据 A

def generate_data(n):

	np.random.seed(0)
	
	mean = [1, 2, 3]
	
	cov = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
	
	A = np.random.multivariate_normal(mean, cov, n).T
	
	return A

# 对 A 进行标准化

def normalize(A):

	mean = np.mean(A, axis=1)
	
	std = np.std(A, axis=1)
	
	A_normalized = (A - mean[:, np.newaxis]) / std[:, np.newaxis]
	
	return A_normalized
	
# 计算 A^TA 的特征值和特征向量

def calculate_eigenvectors(A):

	ATA = np.dot(A.T, A)
	
	eigenvalues, eigenvectors = np.linalg.eig(ATA)
	
	# 按降序排列特征值和特征向量
	
	idx = np.argsort(eigenvalues)[::-1]
	
	eigenvalues = eigenvalues[idx]
	
	eigenvectors = eigenvectors[:, idx]
	
	return eigenvalues, eigenvectors
	
# 获得奇异值Σ和右奇异向量V

def calculate_singular_values(eigenvalues, eigenvectors):

	singular_values = np.sqrt(eigenvalues)
	
	right_singular_vectors = eigenvectors
	
	return singular_values, right_singular_vectors

# 计算左奇异向量U

def calculate_left_singular_vectors(A, singular_values, right_singular_vectors):

	U = np.dot(A, np.dot(np.linalg.inv(np.diag(singular_values)), right_singular_vectors.T))
	
	return U

# 保留最大的 k 个奇异值以及相应的奇异向量

def truncate_singular_values(singular_values, right_singular_vectors, k):

	truncated_singular_values = singular_values[:k]
	
	truncated_right_singular_vectors = right_singular_vectors[:, :k]
	
	return truncated_singular_values, truncated_right_singular_vectors
	
# 降维

def reduce_dimension(A, singular_values, right_singular_vectors, k):

	truncated_singular_values, truncated_right_singular_vectors = truncate_singular_values(singular_values, right_singular_vectors, k)
	
	reduced_A = np.dot(truncated_right_singular_vectors.T, A)
	
	return reduced_A
	
# 可视化

def visualize(A, reduced_A):

	fig = plt.figure()
	
	ax1 = fig.add_subplot(121, projection='3d')
	
	ax1.scatter(A[0, :], A[1, :], A[2, :])
	
	ax1.set_title('Original Data')
	
	ax2 = fig.add_subplot(122)
	
	ax2.scatter(reduced_A[0, :], reduced_A[1, :])
	
	ax2.set_title('Reduced Data')
	
	plt.show()

# 主函数

def main():

	n = 100 # 数据点个数
	
	k = 2 # 降维后的维度
	
	A = generate_data(n)
	
	A_normalized = normalize(A)
	
	eigenvalues, eigenvectors = calculate_eigenvectors(A_normalized)
	
	singular_values, right_singular_vectors = calculate_singular_values(eigenvalues, eigenvectors)
	
	left_singular_vectors = calculate_left_singular_vectors(A_normalized, singular_values, right_singular_vectors)
	
	reduced_A = reduce_dimension(A_normalized, singular_values, right_singular_vectors, k)
	
	visualize(A, reduced_A)
	
if __name__ == '__main__':

	main()

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

# 生成包含噪声的数据点

def generate_data_plane(center, num_points, noise_std):

plane_normal = np.random.randn(3)

plane_normal /= np.linalg.norm(plane_normal)

plane_point = center

plane_d = -np.dot(plane_normal, plane_point)

points = []

for _ in range(num_points):

x = np.random.uniform(center[0] - 1, center[0] + 1)

y = np.random.uniform(center[1] - 1, center[1] + 1)

z = (-plane_normal[0] * x - plane_normal[1] * y - plane_d) / plane_normal[2]

point = np.array([x, y, z]) + np.random.randn(3) * noise_std

points.append(point)

return np.array(points).T

# 计算SVD

def svd(A):

U, S, V = np.linalg.svd(A)

return U, S, V.T

# 使用SVD进行PCA降维

def pca(A, num_components):

mean = np.mean(A, axis=1)

centered_A = A - mean.reshape(-1, 1)

U, S, V = svd(centered_A)

A_2 = U[:, :num_components].T @ centered_A

A_1 = U[:, :1].T @ centered_A

return A_2, A_1

# 可视化数据

def visualize_data(A, title):

fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')

ax.scatter(A[0], A[1], A[2])

ax.set_xlabel('X')

ax.set_ylabel('Y')

ax.set_zlabel('Z')

ax.set_title(title)

plt.show()

# 生成数据

np.random.seed(0)

center = np.array([1, 2, 3])

A = generate_data_plane(center, 100, 0.1)

# 原始数据可视化

visualize_data(A, 'Original Data')

# PCA降维

A_2, A_1 = pca(A, 2)

# 降维后数据可视化

visualize_data(A_2, 'PCA Reduced to 2D')

visualize_data(A_1, 'PCA Reduced to 1D')

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D

# 生成数据A

np.random.seed(0)

n = 1000

A = np.random.randn(3, n)

# 对A进行标准化

A = A - np.mean(A, axis=1, keepdims=True)

# 计算A^T·A的特征值和特征向量

C = np.dot(A.T, A)

eigenvalues, eigenvectors = np.linalg.eig(C)

# 排序特征值和特征向量

sorted_indices = np.argsort(eigenvalues)[::-1]

sorted_eigenvalues = eigenvalues[sorted_indices]

sorted_eigenvectors = eigenvectors[:, sorted_indices]

# 根据奇异值和奇异向量计算原始数据矩阵A的奇异值分解

U = A.dot(sorted_eigenvectors)

S = np.sqrt(sorted_eigenvalues)

VT = sorted_eigenvectors.T

# 选择最大的两个奇异值和相应的奇异向量,进行降维

k = 2

Ak = U[:, :k].dot(np.diag(S[:k]))

# 可视化原始数据A

fig = plt.figure()

ax1 = fig.add_subplot(121, projection='3d')

ax1.scatter(A[0], A[1], A[2])

ax1.set_title('Original Data')

# 可视化降维后的数据Ak

ax2 = fig.add_subplot(122)

ax2.scatter(Ak[0], Ak[1])

ax2.set_title('PCA Data')

# 绘制“行空间的基变换为列空间的基”的过程

fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')

# 绘制原始基向量

origin = np.zeros(3)

ax.quiver(*origin, *sorted_eigenvectors[:, 0], color='r', label='Original Basis 1')

ax.quiver(*origin, *sorted_eigenvectors[:, 1], color='g', label='Original Basis 2')

ax.quiver(*origin, *sorted_eigenvectors[:, 2], color='b', label='Original Basis 3')

# 绘制变换后的基向量

transformed_basis = VT.dot(sorted_eigenvectors)

ax.quiver(*origin, *transformed_basis[:, 0], color='m', label='Transformed Basis 1')

ax.quiver(*origin, *transformed_basis[:, 1], color='y', label='Transformed Basis 2')

ax.quiver(*origin, *transformed_basis[:, 2], color='c', label='Transformed Basis 3')

ax.set_xlabel('X')

ax.set_ylabel('Y')

ax.set_zlabel('Z')

ax.legend()

plt.show()

plt.show()