PCA & PCR Code


# -*- coding: utf-8 -*-


# DO NOT CHANGE

from sklearn import datasets

import numpy as np

import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

from sklearn.linear_model import LinearRegression


def cal_PC(X, n_components):

    # X: input data matrix

    # n_components: the number of principal components

    # return (eigenvalues of n_components of PCs, n_feature*n_components matrix (each column is PC))

    

    # HINT: np.linalg.eigh

    XT = X.T

    for i in range(len(XT)):

        XT[i] -= XT[i].mean()

    

    

    cov = np.matmul(XT,X)                       #eigh... plot 그림 이상.

    eigenvalue_list, eigvector_list = np.linalg.eig(cov)

    eigenvalue = eigenvalue_list[:n_components]

    

    eigenvector = []

    for i in range(n_components):

        val = eigvector_list[:,i]

        eigenvector.append(val)

        

    eigenvector = np.asarray(eigenvector)

    

    return eigenvalue, eigenvector

    

    

def proj_PC(X,eigvec):

    # X: input data matrix

    # eigvec: n_feature*n_components matrix (each column is PC)

    # return n_data*n_components transformed data matrix

    data_matrix = np.matmul(X, eigvec.T)

    

    return data_matrix

    

def PCR(X, y, n_components):

    # X: input data matrix

    # y: output target vector

    # n_components: the number of principal components

    # return regression model

    regression = LinearRegression()

    eigenvalue, eigenvector = cal_PC(X, n_components)

    T = proj_PC(X,eigenvector)

    regression_model = regression.fit(T,y)

    

    return regression_model


# PCA

iris=datasets.load_iris()

X1=iris.data

y1=iris.target

n_components=2


eigval,eigvec=cal_PC(X1, n_components)

T1=proj_PC(X1, eigvec)


# TODO: Get transformed data using PCA implemented by scikit-learn

pca = PCA(n_components=2)

pca.fit(X1)

T = pca.transform(X1)


# TODO: Plot

plt.scatter(T[:,0], T[:,1], c=y1)

plt.scatter(T1[:,0], T1[:,1], c=y1)


# Regression

n_components=4

boston=datasets.load_boston()

X2=boston.data

y2=boston.target

reg_pca=PCR(X2,y2,n_components)


# TODO: Build a regression model using all features

reg = LinearRegression()

reg.fit(X2, y2)

# TODO: Compare R-square using all samples of PCR with ordinary regression model

reg_pcr = PCR(X2, y2, n_components)

eigval,eigvec=cal_PC(X2, n_components)

T2=proj_PC(X2, eigvec)


reg.score(X2, y2)

reg_pcr.score(T2, y2)



+ Recent posts