diff --git a/Examples/Distance_metric_example.py b/Examples/Distance_metric_example.py new file mode 100644 index 0000000..90a6bf8 --- /dev/null +++ b/Examples/Distance_metric_example.py @@ -0,0 +1,66 @@ +from MLlib.distance_metrics import Distance_metrics +import numpy as np + +data = np.genfromtxt('dataset/salaryinp.csv', delimiter=',') +X1 = np.array(data[:len(data)//2]) +X2 = np.array(data[len(data)//2:]) + + +Euc = Distance_metrics.Euclidean_Distance(X1=X1, X2=X2) +print("Euclidean Distance: ", Euc) + +Mah = Distance_metrics.Manhattan_Distance(X1=X1, X2=X2) +print("Manhattan Distance: ", Mah) + +Che = Distance_metrics.Chebyshev_Distance(X1=X1, X2=X2) +print("Chebyshev Distance: ", Che) + +# order will be as per user's requirement +Mink = Distance_metrics.Minkowski_Distance(X1=X1, X2=X2, p=3) +print("Minkowski Distance: ", Mink) + +# order and weight will be as per user's requirement +WMink = Distance_metrics.WMinkowski_Distance(X1=X1, X2=X2, p=3, w=0.5) +print("Weighted Minkowski Distance: ", WMink) + +# Variance to be provided by user +V = np.random.randint(0, 10, size=(len(X1), 1)) +SEuc = Distance_metrics.sEuclidean_distance(X1=X1, X2=X2, V=V) +print("Standardized Euclidean Distance: ", SEuc) + +Maha = Distance_metrics.Mahalanobis_distance(X1=X1, X2=X2) +print("Mahalanobis Distance: ", Maha) + +# Generating Boolean data +# User can generate boolean data as per requirement +X1 = np.random.randint(0, 2, size=(100, 5)) +X2 = np.random.randint(0, 2, size=(100, 5)) + +Ham = Distance_metrics.Hamming_Distance(X1=X1, X2=X2) +print("Hamming Distance: ", Ham) + +Dist_met = Distance_metrics() + +Jacc = Dist_met.Jaccard_Distance(X1=X1, X2=X2) +print("Jaccard Distance: ", Jacc) + +Match_dis = Dist_met.Matching_Distance(X1=X1, X2=X2) +print("Matching Distance: ", Match_dis) + +Dice = Dist_met.Dice_Distance(X1=X1, X2=X2) +print("Dice Distance: ", Dice) + +Kuls = Dist_met.Kulsinki_Distance(X1=X1, X2=X2) +print("Kulsinki Distance: ", Kuls) + +Rog = Dist_met.Rogers_Tanimoto_Distance(X1=X1, X2=X2) +print("Rogers Tanimoto Distance: ", Rog) + +Rus = Dist_met.Russell_Rao_Distance(X1=X1, X2=X2) +print("Russell Rao Distance: ", Rus) + +Sok = Dist_met.Sokal_Sneath_Distance(X1=X1, X2=X2) +print("Sokal Sneath Distance: ", Sok) + +Sok_M = Dist_met.Sokal_Michener_Distance(X1=X1, X2=X2) +print("Sokal Michener Distance: ", Sok_M) diff --git a/MLlib/distance_metrics.py b/MLlib/distance_metrics.py index 802aa80..dfb96fb 100644 --- a/MLlib/distance_metrics.py +++ b/MLlib/distance_metrics.py @@ -1,13 +1,15 @@ import numpy as np + + class Distance_metrics: """ Calculate distance between each corresponding points of two arrays using different distance metrics """ - def Eucledian_Distance(X1,X2): + def Euclidean_Distance(X1, X2): """" - Returns the list of eucledian distance - between two corresponding points of + Returns the list of euclidean distance + between two corresponding points of two arrays PARAMETERS @@ -22,18 +24,18 @@ def Eucledian_Distance(X1,X2): ========= distance:list - Returns the list of eucledian distance - between two corresponding points of + Returns the list of euclidean distance + between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((X1[i]-X2[i])**2) + single = 0 + single = np.sum((X1[i]-X2[i])**2) distance.append(np.sqrt(single)) - return(distance) + return (distance) - def Manhattan_Distance(X1,X2): + def Manhattan_Distance(X1, X2): """" Returns the list of manhattan distance between two corresponding points of @@ -52,17 +54,17 @@ def Manhattan_Distance(X1,X2): distance:list Returns the list of manhattan distance - between two corresponding points of + between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum(abs(X1[i]-X2[i])) + single = 0 + single = np.sum(abs(X1[i]-X2[i])) distance.append(single) - return(distance) + return (distance) - def Chebyshev_Distance(X1,X2): + def Chebyshev_Distance(X1, X2): """" Returns the list of chebyshev distance between two corresponding points of @@ -84,14 +86,14 @@ def Chebyshev_Distance(X1,X2): between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum(max(X1[i]-X2[i])) + single = 0 + single = np.sum(max(X1[i]-X2[i])) distance.append(single) - return(distance) + return (distance) - def Minkowski_Distance(X1,X2,p): + def Minkowski_Distance(X1, X2, p): """" Returns list of minkowski distance of order 'p' between two corresponding vectors of @@ -116,14 +118,14 @@ def Minkowski_Distance(X1,X2,p): between two corresponding vectors of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((abs(X1[i]-X2[i]))**p) + single = 0 + single = np.sum((abs(X1[i]-X2[i]))**p) distance.append((single)**(1/p)) - return(distance) - - def WMinkowski_Distance(X1,X2,p,W): + return (distance) + + def WMinkowski_Distance(X1, X2, p, W): """" Returns list of weighted minkowski distance of order 'p' between two corresponding vectors weighted by W of @@ -151,14 +153,14 @@ def WMinkowski_Distance(X1,X2,p,W): between two corresponding vectors of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((abs(W*(X1[i]-X2[i])))**p) + single = 0 + single = np.sum((abs(W*(X1[i]-X2[i])))**p) distance.append((single)**(1/p)) - return(distance) + return (distance) - def Hamming_Distance(X1,X2): + def Hamming_Distance(X1, X2): """ Returns the Hamming distance between two binary arrays @@ -178,7 +180,313 @@ def Hamming_Distance(X1,X2): two binary arrays """ s = 0 - for e1,e2 in zip(X1,X2): + for e1, e2 in zip(X1, X2): s += abs(e1-e2) distance = s/len(X1) return distance + + def sEuclidean_distance(X1, X2, V): + """ + Returns the list of standardized euclidean distance + between two corresponding points of + two arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with more than 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with more than 1 dimension + + V:list + input array with 1 dimension + + RETURNS + ========= + + distance:list + Returns the list of standardized euclidean distance + between two corresponding points of + two arrays + """ + distance = [] + for i in range(len(X1)): + single = 0 + single = np.sum(((X1[i]-X2[i])/V[i])**2) + distance.append(np.sqrt(single)) + return (distance) + + def Mahalanobis_Distance(X, d, V=None): + """ + Returns the mahalanobis distance between + points and distribution + + PARAMETERS + ========== + X:ndarray(dtype=int,axis=1) + input array with more than 1 dimension. + Represents the points + + d:ndarray(dtype=int,axis=1) + input array with more than 1 dimension.Represent + the distribution from which Mahalanobis + distance is to be calculated + + V:ndarray(dtype=float64,axis=1) + input array with more than 1 dimension.Represent + the covariance matrix.If None is given,then will + be computed from the data + + RETURNS + ========= + distance:list + Returns the list of mahalanobis distance + between points and given distribution + """ + distance = [] + for i in range(len(X)): + x_minus_mu = X[i]-np.mean(d, axis=0) + if V is None: + V = np.cov(d.T) + VI = np.linalg.inv(V) + d = np.sqrt(np.dot(np.dot(x_minus_mu, VI), x_minus_mu.T)) + distance.append(d) + return (distance) + + def __boolean_opr(self, X1, X2): + """ + Returns result of some bianry operation + between two arrays.Any non zero value is + considered as 1 + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + result:tuple + + result[0]:Number of dimensions + result[1]:Number of dims in which both values are True + result[2]:Number of dims in which the first value is True and second is False + result[3]:Number of dims in which the first value is False and second is True + result[4]:Number of dims in which both values are False + """ + if len(X1) != len(X2): + raise TypeError("X1 and X2 must have same length") + result = [] + for i in range(len(X1)): + if X1[i] != 0: + X1[i] = 1 + if X2[i] != 0: + X2[i] = 1 + result.append(len(X1)) + result.append(np.sum((X1 == 1) & (X2 == 1))) + result.append(np.sum((X1 == 1) & (X2 == 0))) + result.append(np.sum((X1 == 0) & (X2 == 1))) + result.append(np.sum((X1 == 0) & (X2 == 0))) + return (tuple(result)) + + def Jaccard_Distance(self, X1, X2): + """ + Returns the list of Jaccard distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Jaccard distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (result[1]+result[2]+result[3])) + return (distance) + + def Matching_Distance(self, X1, X2): + """ + Returns the list of Matching distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Matching distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3])/result[0]) + return (distance) + + def Dice_Distance(self, X1, X2): + """ + Returns the list of Dice distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Dice distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (2*result[1]+result[2]+result[3])) + return (distance) + + def Kulsinki_Distance(self, X1, X2): + """ + Returns the list of Kulsinki distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Kulsinki distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append( + (result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0])) + return (distance) + + def Rogers_Tanimoto_Distance(self, X1, X2): + """ + Returns the list of Rogers-Tanimoto distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Rogers-Tanimoto distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append(2*(result[2]+result[3]) / + (result[2]+result[3]+result[0])) + return (distance) + + def Russell_Rao_Distance(self, X1, X2): + """ + Returns the list of Russell-Rao distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Russell-Rao distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[0]-result[1])/result[0]) + return (distance) + + def Sokal_Michener_Distance(self, X1, X2): + """ + Returns the list of Sokal-Michener distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Sokal-Michener distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append(2*(result[2]+result[3]) / + (result[0]+result[2]+result[3])) + return (distance) + + def Sokal_Sneath_Distance(self, X1, X2): + """ + Returns the list of Sokal-Sneath distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Sokal-Sneath distance + """ + distance = [] + for i in range(len(X1)): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (0.5*result[1]+result[2]+result[3])) + return (distance) diff --git a/README.md b/README.md index 68c5a43..f587ab3 100644 --- a/README.md +++ b/README.md @@ -84,14 +84,21 @@ Follow the following steps to get started with contributing to the repository. | | | | | Z_Score | [models.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/models.py#L1637) | | | | | | Sequential Neural Network | [models.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/models.py#L1680) | -| Loss Functions | Location | Regularizer | Location | Metrics | Location | -| :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | -|**LOSS FUNCTIONS**| |**REGULARIZER**| |**METRICS**| | -| Mean Squared Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L5) | L1_Regularizer| [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L9) | Confusion Matrix | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L25) -| Logarithmic Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L57) | L2_Regularizer | [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L58) | Precision | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L81) -| Absolute Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L113) | | | Accuracy | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L80) -| Cosine Similarity | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L173) | | | Recall | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L82) -| Log_cosh | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L248) | | | F1 Score | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L85) -| Huber | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L300) | | | F-B Theta | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L88) -| Mean Squared Log Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L367) | | | Specificity | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L86) -| Mean Absolute Percentage Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L399) +| Loss Functions | Location | Regularizer | Location | Metrics | Location | Distance Metrics | Location | +| :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | +|**LOSS FUNCTIONS**| |**REGULARIZER**| |**METRICS**| |**DISTANCE METRICS**| | +| Mean Squared Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L5) | L1_Regularizer| [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L9) | Confusion Matrix | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L25) | Euclidean Distance | [distance_metrics.py]() | +| Logarithmic Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L57) | L2_Regularizer | [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L58) | Precision | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L81) | Manhattan Distance | [distance_metrics.py]() | +| Absolute Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L113) | | | Accuracy | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L80) | Minkowski Distance | [distance_metrics.py]() | +| Cosine Similarity | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L173) | | | Recall | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L82) | Chebyshev Distance | [distance_metrics.py]() | +| Log_cosh | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L248) | | | F1 Score | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L85) | WMinskowski Distance | [distance_metrics.py]() | +| Huber | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L300) | | | F-B Theta | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L88) | Hamming Distance | [distance_metrics.py]() | +| Mean Squared Log Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L367) | | | Specificity | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L86) | sEuclidean Distance | [distance_metrics.py]() | +| Mean Absolute Percentage Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L399) | | | | | Mahalanobis Distance | [distance_metrics.py]()| +| | | | | | | Jaccard Distance | [distance_metrics.py]() | +| | | | | | | Matching Distance | [distance_metrics.py]() | +| | | | | | | Dice Distance | [distance_metrics.py]() | +| | | | | | | Kulsinki Distance | [distance_metrics.py]() | +| | | | | | | Rogers-Tanimoto Distance | [distance_metrics.py]() | +| | | | | | | Russel-Rao Distance | [distance_metrics.py]() | +| | | | | | | Sokal-Michener Distance | [distance_metrics.py]() |