diff --git a/untitled14.py b/untitled14.py new file mode 100644 index 0000000..55bf455 --- /dev/null +++ b/untitled14.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +"""Untitled14.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1tCT28q7D3YlMnGmVLEiFWsAdT2hlW-ZY + +This is from library of LDA +""" + +import matplotlib.pyplot as plt #importing pyplot from matplotlib +from sklearn import datasets +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis #applying LDA + +iris = datasets.load_iris() +colors = ["navy", "turquoise", "darkorange"] +X = iris.data +y = iris.target +target_names = iris.target_names +lda = LinearDiscriminantAnalysis(n_components=2) +X_r2 = lda.fit(X, y).transform(X) +plt.figure() +for color, i, target_name in zip(colors, [0, 1, 2], target_names): + plt.scatter( + X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name + ) +plt.legend(loc="best", shadow=False, scatterpoints=1) +plt.title("LDA of IRIS dataset") + +plt.show() + +"""This is from scratch""" + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris + +class LDA: + def __init__(self, n_components=None): + self.n_components = n_components + self.eig_vectors = None + + def transform(self,X,y): + height, width = X.shape + unique_classes = np.unique(y) + num_classes = len(unique_classes) + + scatter_t = np.cov(X.T)*(height - 1) + scatter_w = 0 + for i in range(num_classes): + class_items = np.flatnonzero(y == unique_classes[i]) + scatter_w = scatter_w + np.cov(X[class_items].T) * (len(class_items)-1) + + scatter_b = scatter_t - scatter_w + _, eig_vectors = np.linalg.eigh(np.linalg.pinv(scatter_w).dot(scatter_b)) + print(eig_vectors.shape) + pc = X.dot(eig_vectors[:,::-1][:,:self.n_components]) + print(pc.shape) + + if self.n_components == 2: + if y is None: + plt.scatter(pc[:,0],pc[:,1]) + else: + colors = ["navy", "turquoise", "darkorange"] + labels = np.unique(y) + for color, label in zip(colors, labels): + class_data = pc[np.flatnonzero(y==label)] + plt.scatter(class_data[:,0],class_data[:,1],c=color) + plt.show() + return pc + +LDA_obj = LDA(n_components=5) +data = load_iris() +X, y = data.data, data.target +X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2) + +LDA_object = LDA(n_components=2) +X_train_modified = LDA_object.transform(X_train, Y_train) + +print("Original Data Size:",X_train.shape, "\nModified Data Size:", X_train_modified.shape) \ No newline at end of file