From 2216c2330c817f7da0154f69c158a324d3be8bbb Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 00:13:20 -0700 Subject: [PATCH 1/8] Remove old, obsolete code to improve code clarity Before this, we had a different trip format, in which the points for the trips were stored directly in the sections. We have since switched to a format in which the points are stored in a timeseries and the trips and sections are generated and stored as separate objects that simply point to the original data. The tour modelling code had functions that would consume both kinds of data, choosing between them by using an "old" flag. This made the code much harder to read, since the old code was not pulled out into a separate module but was interspersed into several functions. This removes all the obsolete code. In particular, it removes the `old` flag, all conditional checks on the `old` flag and all references to the old trip format. Now, there are no usages of the old trip in the tour model. ``` bash-3.2$ grep -r trip_old emission/analysis/modelling/tour_model/ | wc -l 0 ``` --- .../modelling/tour_model/cluster_pipeline.py | 61 ++++--------- .../modelling/tour_model/featurization.py | 33 +++---- .../modelling/tour_model/representatives.py | 85 ++++++++++-------- .../modelling/tour_model/similarity.py | 90 +++++++------------ 4 files changed, 107 insertions(+), 162 deletions(-) diff --git a/emission/analysis/modelling/tour_model/cluster_pipeline.py b/emission/analysis/modelling/tour_model/cluster_pipeline.py index 2489ca8cb..a5bfd8a6a 100644 --- a/emission/analysis/modelling/tour_model/cluster_pipeline.py +++ b/emission/analysis/modelling/tour_model/cluster_pipeline.py @@ -1,6 +1,5 @@ # Standard imports import math -import datetime import uuid as uu import sys import logging @@ -10,13 +9,6 @@ import emission.analysis.modelling.tour_model.similarity as similarity import emission.analysis.modelling.tour_model.featurization as featurization import emission.analysis.modelling.tour_model.representatives as representatives - -from emission.core.wrapper.trip_old import Trip, Section, Fake_Trip - -import emission.core.wrapper.trip as ecwt -import emission.core.wrapper.section as ecws -import emission.storage.decorations.trip_queries as ecsdtq -import emission.storage.decorations.section_queries as ecsdsq import emission.storage.decorations.analysis_timeseries_queries as esda """ @@ -41,39 +33,18 @@ """ #read the data from the database. -def read_data(uuid=None, size=None, old=True): +def read_data(uuid=None): db = edb.get_trip_db() - if not old: - logging.debug("not old") - trips = esda.get_entries(esda.CLEANED_TRIP_KEY, uuid, - time_query=None, geo_query=None) - return trips - - if old: - data = [] - trip_db = db - if uuid: - trips = trip_db.find({'user_id' : uuid, 'type' : 'move'}) - else: - trips = trip_db.find({'type' : 'move'}) - for t in trips: - try: - trip = Trip.trip_from_json(t) - except: - continue - if not (trip.trip_start_location and trip.trip_end_location and trip.start_time): - continue - data.append(trip) - if size: - if len(data) == size: - break - return data + trips = esda.get_entries(esda.CLEANED_TRIP_KEY, uuid, + time_query=None, geo_query=None) + logging.info("After reading data, returning %s trips" % len(trips)) + return trips #put the data into bins and cut off the lower portion of the bins -def remove_noise(data, radius, old=True): +def remove_noise(data, radius): if not data: return [], [] - sim = similarity.similarity(data, radius, old) + sim = similarity.similarity(data, radius) sim.bin_data() logging.debug('number of bins before filtering: %d' % len(sim.bins)) sim.delete_bins() @@ -81,10 +52,10 @@ def remove_noise(data, radius, old=True): return sim.newdata, sim.bins #cluster the data using k-means -def cluster(data, bins, old=True): +def cluster(data, bins): if not data: return 0, [], [] - feat = featurization.featurization(data, old=old) + feat = featurization.featurization(data) min = bins max = int(math.ceil(1.5 * bins)) feat.cluster(min_clusters=min, max_clusters=max) @@ -92,10 +63,10 @@ def cluster(data, bins, old=True): return feat.clusters, feat.labels, feat.data #prepare the data for the tour model -def cluster_to_tour_model(data, labels, old=True): +def cluster_to_tour_model(data, labels): if not data: return [] - repy = representatives.representatives(data, labels, old=old) + repy = representatives.representatives(data, labels) repy.list_clusters() repy.get_reps() repy.locations() @@ -103,12 +74,12 @@ def cluster_to_tour_model(data, labels, old=True): repy.cluster_dict() return repy.tour_dict -def main(uuid=None, old=True): - data = read_data(uuid, old=old) +def main(uuid=None): + data = read_data(uuid) logging.debug("len(data) is %d" % len(data)) - data, bins = remove_noise(data, 300, old=old) - n, labels, data = cluster(data, len(bins), old=old) - tour_dict = cluster_to_tour_model(data, labels, old=old) + data, bins = remove_noise(data, 300) + n, labels, data = cluster(data, len(bins)) + tour_dict = cluster_to_tour_model(data, labels) return tour_dict if __name__=='__main__': diff --git a/emission/analysis/modelling/tour_model/featurization.py b/emission/analysis/modelling/tour_model/featurization.py index ba5ae3167..d4ad6783b 100644 --- a/emission/analysis/modelling/tour_model/featurization.py +++ b/emission/analysis/modelling/tour_model/featurization.py @@ -1,18 +1,13 @@ # Standard imports import logging -import matplotlib -# matplotlib.use('Agg') import matplotlib.pyplot as plt -import math import numpy from sklearn.cluster import KMeans from sklearn import metrics import sys # our imports -from emission.core.wrapper.trip_old import Trip, Coordinate from kmedoid import kmedoids -import emission.storage.decorations.trip_queries as esdtq """ @@ -25,9 +20,8 @@ """ class featurization: - def __init__(self, data, old=True): + def __init__(self, data): self.data = data - self.is_old = old if not self.data: self.data = [] self.calculate_points() @@ -41,21 +35,14 @@ def calculate_points(self): if not self.data: return for trip in self.data: - if self.is_old: - start = trip.trip_start_location - end = trip.trip_end_location - else: - try: - start = trip.data.start_loc["coordinates"] - end = trip.data.end_loc["coordinates"] - except: - continue + try: + start = trip.data.start_loc["coordinates"] + end = trip.data.end_loc["coordinates"] + except: + continue if not (start and end): raise AttributeError('each trip must have valid start and end locations') - if self.is_old: - self.points.append([start.lon, start.lat, end.lon, end.lat]) - else: - self.points.append([start[0], start[1], end[0], end[1]]) + self.points.append([start[0], start[1], end[0], end[1]]) #cluster the data. input options: # - name (optional): the clustering algorithm to use. Options are 'kmeans' or 'kmedoids'. Default is kmeans. @@ -68,7 +55,7 @@ def cluster(self, name='kmeans', min_clusters=2, max_clusters=None): logging.debug("min_clusters < 2, setting min_clusters = 2") min_clusters = 2 if min_clusters > len(self.points): - sys.stderr.write('Maximum number of clusters is the number of data points.\n') + sys.stderr.write('Minimum number of clusters %d is greater than the number of data points %d.\n' % (min_clusters, len(self.points))) min_clusters = len(self.points)-1 if max_clusters == None: logging.debug("max_clusters is None, setting max_clusters = %d" % (len(self.points) - 1)) @@ -138,8 +125,8 @@ def check_clusters(self): if not self.labels: logging.debug('Please cluster before analyzing clusters.') return - logging.debug('number of clusters is %d' % str(self.clusters)) - logging.debug('silhouette score is %d' % str(self.sil)) + logging.debug('number of clusters is %d' % self.clusters) + logging.debug('silhouette score is %s' % self.sil) #map the clusters #TODO - move this to a file in emission.analysis.plotting to map clusters from the database diff --git a/emission/analysis/modelling/tour_model/representatives.py b/emission/analysis/modelling/tour_model/representatives.py index 88e71dcf1..31a16c5e3 100755 --- a/emission/analysis/modelling/tour_model/representatives.py +++ b/emission/analysis/modelling/tour_model/representatives.py @@ -3,9 +3,11 @@ import numpy import math import copy +import geojson as gj # our imports -from emission.core.wrapper.trip_old import Trip, Coordinate +import emission.core.wrapper.trip as ecwt +import emission.core.wrapper.entry as ecwe import emission.storage.decorations.analysis_timeseries_queries as esda @@ -26,9 +28,8 @@ class representatives: - def __init__(self, data, labels, old=True): + def __init__(self, data, labels): self.data = data - self.is_old = old if not self.data: self.data = [] self.labels = labels @@ -52,37 +53,46 @@ def list_clusters(self): self.clusters[a].append(self.data[i]) #get the representatives for each cluster + #I don't understand wtf this does + # Why are we getting the mean of the start and end points in the cluster and + # creating a fake trip from it? Why not just pick a real representative of + # of the trips? Alternatively, why not create a new data structure to represent + # that this is a reconstructed trip that has no bearing in reality? What does + # it even mean that we have a trip with only a start and end point and no + # actual start or end times? def get_reps(self): self.reps = [] if not self.data: return - for cluster in self.clusters: + for i, cluster in enumerate(self.clusters): + logging.debug("Considering cluster %d = %s" % (i, cluster)) points = [[], [], [], []] - for c in cluster: - if self.is_old: - points[0].append(c.trip_start_location.lat) - points[1].append(c.trip_start_location.lon) - points[2].append(c.trip_end_location.lat) - points[3].append(c.trip_end_location.lon) - else: - # We want (lat, lon) to be consistent with old above. - # But in the new, our data is in geojson so it is (lon, lat). - # Fix it by flipping the order of the indices - # Note also that we want to use the locations of the start - # and end places, not of the start point of the trip, which - # may be some distance away due to geofencing. - start_place = esda.get_entry(esda.CLEANED_PLACE_KEY, - c.data.start_place) - end_place = esda.get_entry(esda.CLEANED_PLACE_KEY, - c.data.end_place) - points[0].append(start_place.data.location["coordinates"][1]) - points[1].append(start_place.data.location["coordinates"][0]) - points[2].append(end_place.data.location["coordinates"][1]) - points[3].append(end_place.data.location["coordinates"][0]) - logging.debug("in representatives, endpoints have len = %s" % - len(points)) + + # If this cluster has no points, we skip it + if len(cluster) == 0: + logging.info("Cluster %d = %s, has length %d, skipping" % + (i, cluster, len(cluster))) + continue + + for j, c in enumerate(cluster): + logging.debug("Consider point %d = %s" % (j, c)) + start_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + c.data.start_place) + end_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + c.data.end_place) + points[0].append(start_place.data.location["coordinates"][1]) # lat + points[1].append(start_place.data.location["coordinates"][0]) # lng + points[2].append(end_place.data.location["coordinates"][1]) # lat + points[3].append(end_place.data.location["coordinates"][0]) # lng + logging.debug("in representatives, endpoints have len = %s" % + len(points)) centers = numpy.mean(points, axis=1) - a = Trip(None, None, None, None, None, None, Coordinate(centers[0], centers[1]), Coordinate(centers[2], centers[3])) + logging.debug("For cluster %d, centers are %s" % (i, centers)) + t = ecwt.Trip({ + "start_loc": gj.Point([centers[1], centers[0]]), + "end_loc": gj.Point([centers[3], centers[2]]) + }) + a = ecwe.Entry.create_entry(c.user_id, "analysis/cleaned_trip", t) self.reps.append(a) #map the representatives @@ -134,12 +144,12 @@ def locations(self): locs = [] for b in bin: if b[0] == 'start': - point = self.reps[b[1]].trip_start_location + point = self.reps[b[1]].data.start_loc if b[0] == 'end': - point = self.reps[b[1]].trip_end_location - locs.append([point.lat, point.lon]) + point = self.reps[b[1]].data.end_loc + locs.append(point.coordinates) locs = numpy.mean(locs, axis=0) - coord = Coordinate(locs[0], locs[1]) + coord = [locs[0], locs[1]] self.locs.append(coord) #create the input to the tour graph @@ -198,15 +208,16 @@ def cluster_dict(self): #check whether a point is close to all points in a bin def match(self, label, a, bin): if label == 'start': - pointa = self.reps[a].trip_start_location + pointa = self.reps[a].data.start_loc elif label == 'end': - pointa = self.reps[a].trip_end_location + pointa = self.reps[a].data.end_loc for b in bin: if b[0] == 'start': - pointb = self.reps[b[1]].trip_start_location + pointb = self.reps[b[1]].data.start_loc elif b[0] == 'end': - pointb = self.reps[b[1]].trip_end_location - if self.distance(pointa.lat, pointa.lon, pointb.lat, pointb.lon) > 300: + pointb = self.reps[b[1]].data.end_loc + if self.distance(pointa.coordinates[1], pointa.coordinates[0], + pointb.coordinates[1], pointb.coordinates[0]) > 300: return False return True diff --git a/emission/analysis/modelling/tour_model/similarity.py b/emission/analysis/modelling/tour_model/similarity.py index 77d1a83ba..c806d512d 100644 --- a/emission/analysis/modelling/tour_model/similarity.py +++ b/emission/analysis/modelling/tour_model/similarity.py @@ -6,11 +6,7 @@ import matplotlib.pyplot as plt import numpy from sklearn import metrics -import sys -from numpy import cross from numpy.linalg import norm -import emission.storage.decorations.trip_queries as esdtq -import emission.storage.decorations.section_queries as esdsq import emission.storage.decorations.analysis_timeseries_queries as esda """ @@ -30,41 +26,30 @@ """ class similarity: - def __init__(self, data, radius, old=True): + def __init__(self, data, radius): self.data = data if not data: self.data = [] self.bins = [] self.radius = float(radius) - self.old = old - if not old: - for a in self.data: - # print "a is %s" % a - t = a - try: - start_place = esda.get_entry(esda.CLEANED_PLACE_KEY, - t.data.start_place) - end_place = esda.get_entry(esda.CLEANED_PLACE_KEY, - t.data.end_place) - start_lon = start_place.data.location["coordinates"][0] - start_lat = start_place.data.location["coordinates"][1] - end_lon = end_place.data.location["coordinates"][0] - end_lat = end_place.data.location["coordinates"][1] - logging.debug("endpoints are = (%s, %s) and (%s, %s)" % - (start_lon, start_lat, end_lon, end_lat)) - if self.distance(start_lat, start_lon, end_lat, end_lon): - self.data.remove(a) - except: - logging.exception("exception while getting start and end places for %s" % t) - self.data.remove(a) - else: - for a in range(len(self.data)-1, -1, -1): - start_lat = self.data[a].trip_start_location.lat - start_lon = self.data[a].trip_start_location.lon - end_lat = self.data[a].trip_end_location.lat - end_lon = self.data[a].trip_end_location.lon + for t in self.data: + logging.debug("Considering trip %s" % t) + try: + start_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + t.data.start_place) + end_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + t.data.end_place) + start_lon = start_place.data.location["coordinates"][0] + start_lat = start_place.data.location["coordinates"][1] + end_lon = end_place.data.location["coordinates"][0] + end_lat = end_place.data.location["coordinates"][1] + logging.debug("endpoints are = (%s, %s) and (%s, %s)" % + (start_lon, start_lat, end_lon, end_lat)) if self.distance(start_lat, start_lon, end_lat, end_lon): - self.data.pop(a) + self.data.remove(t) + except: + logging.exception("exception while getting start and end places for %s" % t) + self.data.remove(t) logging.debug('After removing trips that are points, there are %s data points' % len(self.data)) self.size = len(self.data) @@ -146,12 +131,8 @@ def elbow_distance(self): #check if two trips match def match(self,a,bin): for b in bin: - if not self.old: - if not self.distance_helper_new(a,b): - return False - else: - if not self.distance_helper(a,b): - return False + if not self.distance_helper(a, b): + return False return True #create the histogram @@ -187,12 +168,19 @@ def evaluate_bins(self): points = [] for bin in self.bins: for b in bin: - start_lat = self.data[b].trip_start_location.lat - start_lon = self.data[b].trip_start_location.lon - end_lat = self.data[b].trip_end_location.lat - end_lon = self.data[b].trip_end_location.lon + tb = self.data[b] + start_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + tb.data.start_place) + end_place = esda.get_entry(esda.CLEANED_PLACE_KEY, + tb.data.end_place) + start_lon = start_place.data.location["coordinates"][0] + start_lat = start_place.data.location["coordinates"][1] + end_lon = end_place.data.location["coordinates"][0] + end_lat = end_place.data.location["coordinates"][1] path = [start_lat, start_lon, end_lat, end_lon] points.append(path) + logging.debug("number of labels are %d, number of points are = %d" % + (len(labels), len(points))) a = metrics.silhouette_score(numpy.array(points), labels) logging.debug('number of bins is %d' % len(self.bins)) logging.debug('silhouette score is %d' % a) @@ -200,20 +188,8 @@ def evaluate_bins(self): #calculate the distance between two trips def distance_helper(self, a, b): - starta = self.data[a].trip_start_location - startb = self.data[b].trip_start_location - enda = self.data[a].trip_end_location - endb = self.data[b].trip_end_location - - start = self.distance(starta.lat, starta.lon, startb.lat, startb.lon) - end = self.distance(enda.lat, enda.lon, endb.lat, endb.lon) - if start and end: - return True - return False - - def distance_helper_new(self, a, b): - tripa = self.data[a] - tripb = self.data[b] + tripa = self.data[a].data + tripb = self.data[b].data starta = tripa.start_loc["coordinates"] startb = tripb.start_loc["coordinates"] From 28e6ff22665c4c4ddd82e1f074a3cf83d9d79de8 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 00:25:55 -0700 Subject: [PATCH 2/8] Clean up unused imports in other tour model files While we are in here, we might as well clean up unused imports and ensure that nobody else is tempted to re-add the old trip wrappers --- emission/analysis/modelling/tour_model/K_medoid.py | 8 +------- .../modelling/tour_model/create_tour_model_matrix.py | 4 +--- .../analysis/modelling/tour_model/tour_model_matrix.py | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/emission/analysis/modelling/tour_model/K_medoid.py b/emission/analysis/modelling/tour_model/K_medoid.py index 7435a2224..602761c71 100644 --- a/emission/analysis/modelling/tour_model/K_medoid.py +++ b/emission/analysis/modelling/tour_model/K_medoid.py @@ -1,16 +1,10 @@ # Standard imports from __future__ import division -import logging -import numpy as np -import math import random -import time # Our imports -from emission.core.get_database import get_routeDistanceMatrix_db,get_routeCluster_db,get_section_db -from emission.core.common import calDistance, getDisplayModes +from emission.core.get_database import get_routeDistanceMatrix_db,get_section_db from emission.analysis.modelling.tour_model.trajectory_matching.route_matching import fullMatchDistance,getRoute -from emission.analysis.modelling.tour_model.trajectory_matching.LCS import lcsScore Sections=get_section_db() diff --git a/emission/analysis/modelling/tour_model/create_tour_model_matrix.py b/emission/analysis/modelling/tour_model/create_tour_model_matrix.py index 64bacf6ec..4af6c47eb 100644 --- a/emission/analysis/modelling/tour_model/create_tour_model_matrix.py +++ b/emission/analysis/modelling/tour_model/create_tour_model_matrix.py @@ -1,8 +1,6 @@ import logging -import emission.analysis.modelling.tour_model.tour_model_matrix as tm ##here -import emission.core.get_database as edb -import emission.core.wrapper.trip_old as trip +import emission.analysis.modelling.tour_model.tour_model_matrix as tm import emission.analysis.modelling.tour_model.cluster_pipeline as eamtcp from uuid import UUID import random, datetime, sys diff --git a/emission/analysis/modelling/tour_model/tour_model_matrix.py b/emission/analysis/modelling/tour_model/tour_model_matrix.py index 3b5f6af13..59b173b1f 100644 --- a/emission/analysis/modelling/tour_model/tour_model_matrix.py +++ b/emission/analysis/modelling/tour_model/tour_model_matrix.py @@ -5,7 +5,7 @@ # Standard imports import numpy as np -import math, datetime, heapq +import datetime, heapq import networkx as nx import matplotlib.pyplot as plt From 3840d795d02c364c7fba13fbf837e069175d3417 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 00:29:08 -0700 Subject: [PATCH 3/8] Change the invocations of the tour model code to match the new signature This is fairly straightforward, and was automatically done by PyCharm --- emission/storage/decorations/tour_model_queries.py | 2 +- emission/tests/analysisTests/TestUserModel.py | 4 ---- emission/tests/storageTests/TestCommonPlaceQueries.py | 2 +- emission/tests/storageTests/TestCommonTripQueries.py | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/emission/storage/decorations/tour_model_queries.py b/emission/storage/decorations/tour_model_queries.py index 9890b0d58..6f169c6b0 100644 --- a/emission/storage/decorations/tour_model_queries.py +++ b/emission/storage/decorations/tour_model_queries.py @@ -47,7 +47,7 @@ def get_common_trips(user_id): def make_tour_model_from_raw_user_data(user_id): try: - list_of_cluster_data = eamtmcp.main(user_id, False) + list_of_cluster_data = eamtmcp.main(user_id) esdcpq.create_places(list_of_cluster_data, user_id) esdctq.set_up_trips(list_of_cluster_data, user_id) except ValueError as e: diff --git a/emission/tests/analysisTests/TestUserModel.py b/emission/tests/analysisTests/TestUserModel.py index 154d71d3c..30c135d61 100644 --- a/emission/tests/analysisTests/TestUserModel.py +++ b/emission/tests/analysisTests/TestUserModel.py @@ -1,10 +1,6 @@ import unittest import emission.user_model_josh.utility_model as eum -import googlemaps import emission.net.ext_service.otp.otp as otp -import emission.net.ext_service.gmaps.googlemaps as gmaps -import emission.net.ext_service.gmaps.common as gmcommon -import emission.net.api.utility_model_api as umapi import datetime diff --git a/emission/tests/storageTests/TestCommonPlaceQueries.py b/emission/tests/storageTests/TestCommonPlaceQueries.py index e330bd184..c0563ae94 100644 --- a/emission/tests/storageTests/TestCommonPlaceQueries.py +++ b/emission/tests/storageTests/TestCommonPlaceQueries.py @@ -52,7 +52,7 @@ def testCreatePlace(self): estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) - data = eamtcp.main(self.testUUID, False) + data = eamtcp.main(self.testUUID) esdcpq.create_places(data, self.testUUID) places = esdcpq.get_all_common_places_for_user(self.testUUID) places_list = [] diff --git a/emission/tests/storageTests/TestCommonTripQueries.py b/emission/tests/storageTests/TestCommonTripQueries.py index de7c37267..273321f5c 100644 --- a/emission/tests/storageTests/TestCommonTripQueries.py +++ b/emission/tests/storageTests/TestCommonTripQueries.py @@ -106,7 +106,7 @@ def testCreateFromData(self): def get_fake_data(user_name): # Call with a username unique to your database tg.create_fake_trips(user_name, True) - return eamtcp.main(user_name, old=False) + return eamtcp.main(user_name) if __name__ == "__main__": From 9af3d4e518bd2b2b883f36c1ee6ae39564d4581d Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 00:39:37 -0700 Subject: [PATCH 4/8] Fix import in the utility model There is no googlemaps in the standard path but we have checked in a version of the google maps with some minor modifications, and are already importing it. So we can just use it instead of the failed import. I am not quite sure how this test was working earlier. --- emission/user_model_josh/utility_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/emission/user_model_josh/utility_model.py b/emission/user_model_josh/utility_model.py index a8184f6f9..7c09fd95c 100644 --- a/emission/user_model_josh/utility_model.py +++ b/emission/user_model_josh/utility_model.py @@ -13,7 +13,6 @@ import json import heapq import time -import googlemaps import requests import random @@ -127,7 +126,7 @@ def get_top_choice_places(self, start_place, end_place): return self.get_top_choices_lat_lng(start, end) def get_all_trips(self, start, end, curr_time=None): - c = googlemaps.client.Client(GOOGLE_MAPS_KEY) + c = gmaps.client.Client(GOOGLE_MAPS_KEY) if curr_time is None: curr_time = datetime.datetime.now() curr_month = curr_time.month From 17736b9270ebd149af0eb0fc249448cfa8155fc5 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 00:57:36 -0700 Subject: [PATCH 5/8] Convert all tests to the new format as well It turns out that the tests were not ported over when the code was, so they were not actually testing the current code. This was exposed when the crutch for the old code was knocked away, and the tests all started failing. Ported over the tests to use the new data structures. Also did some minor refactoring/cleaning to move commonly used code into a common module so that it could be reused. The common code included setup, cleanup and sample trip creation functions. --- .../analysisTests/TestClusterPipeline.py | 32 +++---- .../tests/analysisTests/TestFeaturization.py | 46 +++++----- .../analysisTests/TestRepresentatives.py | 87 +++++++++--------- .../tests/analysisTests/TestSimilarity.py | 89 ++++++++++--------- .../analysisTests/tourModelTests/__init__.py | 0 .../analysisTests/tourModelTests/common.py | 46 ++++++++++ 6 files changed, 180 insertions(+), 120 deletions(-) create mode 100644 emission/tests/analysisTests/tourModelTests/__init__.py create mode 100644 emission/tests/analysisTests/tourModelTests/common.py diff --git a/emission/tests/analysisTests/TestClusterPipeline.py b/emission/tests/analysisTests/TestClusterPipeline.py index 6bd00146f..fecdd9cb3 100644 --- a/emission/tests/analysisTests/TestClusterPipeline.py +++ b/emission/tests/analysisTests/TestClusterPipeline.py @@ -32,59 +32,59 @@ def import_test_info(self): eaicr.clean_and_resample(self.testUUID) def testSanity(self): - cp.main(self.testUUID, False) + cp.main(self.testUUID) def testReadData(self): - data = cp.read_data(uuid=self.testUUID, old=False) + data = cp.read_data(uuid=self.testUUID) # Test to make sure something is happening self.assertTrue(len(data) > 5) # Test to make sure that the trips are mapped to the correct uuid - bad_data = cp.read_data(uuid="FakeUUID", old=False) + bad_data = cp.read_data(uuid="FakeUUID") self.assertEqual(len(bad_data), 0) def testRemoveNoise(self): - data = cp.read_data(uuid=self.testUUID, old=False) + data = cp.read_data(uuid=self.testUUID) # Test to make sure the code doesn't break on an empty dataset - new_data, bins = cp.remove_noise(None, self.RADIUS, False) + new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) == len(bins) == 0) #Test to make sure some or no data was filtered out, but that nothing was added after filtering - new_data, bins = cp.remove_noise(None, self.RADIUS, False) + new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) <= len(data)) # Make sure there are not more bins than data; that wouldnt make sense self.assertTrue(len(bins) <= len(data)) def testCluster(self): - data = cp.read_data(uuid=self.testUUID, old=False) + data = cp.read_data(uuid=self.testUUID) # Test to make sure empty dataset doesn't crash the program - clusters, labels, new_data = cp.cluster([], 10, False) + clusters, labels, new_data = cp.cluster([], 10) self.assertTrue(len(new_data) == clusters == len(labels) == 0) # Test to make sure clustering with noise works - clusters, labels, new_data = cp.cluster(data, 10, False) + clusters, labels, new_data = cp.cluster(data, 10) self.assertEqual(len(labels), len(new_data)) self.assertEqual(cmp(new_data, data), 0) # Test to make sure clustering without noise works - data, bins = cp.remove_noise(data, self.RADIUS, False) - clusters, labels, new_data = cp.cluster(data, len(bins), False) + data, bins = cp.remove_noise(data, self.RADIUS) + clusters, labels, new_data = cp.cluster(data, len(bins)) self.assertTrue(clusters == 0 or len(bins) <= clusters <= len(bins) + 10) def testClusterToTourModel(self): # Test to make sure it doesn't crash on a empty dataset - data = cp.cluster_to_tour_model(None, None, False) + data = cp.cluster_to_tour_model(None, None) self.assertFalse(data) # Test with the real dataset - data = cp.read_data(uuid=self.testUUID, old=False) - data, bins = cp.remove_noise(data, self.RADIUS, False) - n, labels, data = cp.cluster(data, len(bins), False) - tour_dict = cp.main(uuid=self.testUUID, old=False) + data = cp.read_data(uuid=self.testUUID) + data, bins = cp.remove_noise(data, self.RADIUS) + n, labels, data = cp.cluster(data, len(bins)) + tour_dict = cp.main(uuid=self.testUUID) self.assertTrue(len(tour_dict) <= n) diff --git a/emission/tests/analysisTests/TestFeaturization.py b/emission/tests/analysisTests/TestFeaturization.py index b6ff53240..bc14ce654 100644 --- a/emission/tests/analysisTests/TestFeaturization.py +++ b/emission/tests/analysisTests/TestFeaturization.py @@ -1,35 +1,36 @@ import unittest +import time +import uuid +import logging + import emission.core.get_database as edb -import sys import emission.analysis.modelling.tour_model.featurization as featurization -from emission.core.wrapper.trip_old import Trip, Coordinate import emission.analysis.modelling.tour_model.cluster_pipeline as cp -import emission.simulation.trip_gen as tg -import datetime -import os, os.path +import emission.storage.timeseries.abstract_timeseries as esta + +import emission.tests.analysisTests.tourModelTests.common as etatc class FeaturizationTests(unittest.TestCase): def __init__(self, *args, **kwargs): super(FeaturizationTests, self).__init__(*args, **kwargs) - self.data = cp.read_data(size=100) - print 'there are ' + str(len(self.data)) - #if len(self.data) == 0: - # tg.create_fake_trips() - # self.data = cp.read_data(size=100) def setUp(self): - pass + self.data = cp.read_data() + self.testUUID = uuid.uuid4() + self.ts = esta.TimeSeries.get_time_series(self.testUUID) + print 'there are ' + str(len(self.data)) def tearDown(self): - pass + edb.get_timeseries_db().remove({'user_id': self.testUUID}) + edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID}) def testCalculatePoints(self): feat = featurization.featurization([]) self.assertTrue(not feat.data) feat = featurization.featurization(None) self.assertTrue(not feat.data) - trip = Trip(None, None, None, None, None, None, None, None) + trip = etatc._createTripEntry(self, None, None, None, None) data = [trip] try: feat = featurization.featurization(data) @@ -64,17 +65,16 @@ def testCluster(self): self.assertTrue(False) data = [] - start = Coordinate(47,-122) - end = Coordinate(47,-123) + start = [-122, 47] + end = [-123,47] + now = time.time() for i in range(10): - now = datetime.datetime.now() - a = Trip(None, None, None, None, now, now, start, end) + a = etatc._createTripEntry(self, now, now, start, end) data.append(a) - start = Coordinate(41,-74) - end = Coordinate(42, -74) + start = [-74, 41] + end = [-74, 42] for i in range(10): - now = datetime.datetime.now() - a = Trip(None, None, None, None, now, now, start, end) + a = etatc._createTripEntry(self, now, now, start, end) data.append(a) feat = featurization.featurization(data) feat.cluster() @@ -87,8 +87,10 @@ def testCheckClusters(self): feat.cluster(min_clusters=2, max_clusters=10) try: feat.check_clusters() - except Exception: + except Exception, e: + logging.exception(e.message) self.assertTrue(False) if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/emission/tests/analysisTests/TestRepresentatives.py b/emission/tests/analysisTests/TestRepresentatives.py index 905b9b028..0c0e463e4 100644 --- a/emission/tests/analysisTests/TestRepresentatives.py +++ b/emission/tests/analysisTests/TestRepresentatives.py @@ -1,25 +1,24 @@ import unittest -import emission.core.get_database as edb +import time +import logging + import emission.analysis.modelling.tour_model.representatives as rep -import emission.simulation.trip_gen as tg import emission.analysis.modelling.tour_model.featurization as feat -import emission.analysis.modelling.tour_model.cluster_pipeline as cp -from emission.core.wrapper.trip_old import Trip, Coordinate + +import emission.tests.analysisTests.tourModelTests.common as etatc class RepresentativesTests(unittest.TestCase): def __init__(self, *args, **kwargs): super(RepresentativesTests, self).__init__(*args, **kwargs) - self.data = cp.read_data(size=100) - #if len(self.data) == 0: - # tg.create_fake_trips() - # self.data = cp.read_data(size=100) - print 'there are ' + str(len(self.data)) + + def setUp(self): + etatc._setup(self) n = len(self.data)/5 - self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n) + self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n) def tearDown(self): - pass + etatc._tearDown(self) def testInit(self): repy = rep.representatives(None, None) @@ -55,18 +54,20 @@ def testReps(self): repy.get_reps() self.assertTrue(len(repy.reps) == len(repy.clusters)) clusters = [0] - tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(3,4)) - tripb = Trip(None, None, None, None, None, None, Coordinate(9,10), Coordinate(5,8)) - tripc = Trip(None, None, None, None, None, None, Coordinate(5,6), Coordinate(4,6)) + now = time.time() + tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4]) + tripb = etatc._createTripEntry(self, now, now, [9,10], [5,8]) + tripc = etatc._createTripEntry(self, now, now, [5,6], [4,6]) data = [tripa, tripb, tripc] labels = [0,0,0] repy = rep.representatives(data, labels) repy.list_clusters() repy.get_reps() - self.assertTrue(repy.reps[0].trip_start_location.lat == 5) - self.assertTrue(repy.reps[0].trip_start_location.lon == 6) - self.assertTrue(repy.reps[0].trip_end_location.lat == 4) - self.assertTrue(repy.reps[0].trip_end_location.lon == 6) + logging.debug("repy.reps[0].data.start_loc = %s" % repy.reps[0].data.start_loc) + self.assertEqual(repy.reps[0].data.start_loc.coordinates[0], 5) + self.assertEqual(repy.reps[0].data.start_loc.coordinates[1], 6) + self.assertEqual(repy.reps[0].data.end_loc.coordinates[0], 4) + self.assertEqual(repy.reps[0].data.end_loc.coordinates[1], 6) def testLocations(self): repy = rep.representatives(self.data, self.labels) @@ -78,16 +79,17 @@ def testLocations(self): for i in range(len(bin)): b = bin[i] if b[0] == 'start': - a = repy.reps[b[1]].trip_start_location + a = repy.reps[b[1]].data.start_loc if b[0] == 'end': - a = repy.reps[b[1]].trip_end_location + a = repy.reps[b[1]].data.end_loc for j in range(i): c = bin[j] if c[0] == 'start': - d = repy.reps[c[1]].trip_start_location + d = repy.reps[c[1]].data.start_loc if c[0] == 'end': - d = repy.reps[c[1]].trip_end_location - self.assertTrue(repy.distance(a.lat, a.lon, d.lat, d.lon) < 300) + d = repy.reps[c[1]].data.end_loc + self.assertTrue(repy.distance(a.coordinates[1], a.coordinates[0], + d.coordinates[1], d.coordinates[0]) < 300) total += len(bin) self.assertTrue(total == 2 * repy.num_clusters) for i in range(repy.num_clusters): @@ -95,24 +97,25 @@ def testLocations(self): self.assertTrue(sum(bin.count(('end',i)) for bin in repy.bins) == 1) self.assertTrue(len(repy.locs) == len(repy.bins)) - tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(30,40)) - tripb = Trip(None, None, None, None, None, None, Coordinate(1.0000002,2.0000002), Coordinate(55.0000002,85.0000002)) - tripc = Trip(None, None, None, None, None, None, Coordinate(30.0000002,40.0000002), Coordinate(55,85)) + now = time.time() + tripa = etatc._createTripEntry(self, now, now, [1,2], [30,40]) + tripb = etatc._createTripEntry(self, now, now, [1.0000002,2.0000002], [55.0000002,85.0000002]) + tripc = etatc._createTripEntry(self, now, now, [30.0000002,40.0000002], [55,85]) data = [tripa, tripb, tripc] labels = [0,1,2] repy = rep.representatives(data, labels) repy.list_clusters() repy.get_reps() repy.locations() - self.assertTrue(repy.bins[0] == [('start', 0), ('start', 1)]) - self.assertTrue(repy.bins[1] == [('end', 0), ('start', 2)]) - self.assertTrue(repy.bins[2] == [('end', 1), ('end', 2)]) - self.assertTrue(round(repy.locs[0].lat,7) == 1.0000001) - self.assertTrue(round(repy.locs[0].lon,7) == 2.0000001) - self.assertTrue(round(repy.locs[1].lat,7) == 30.0000001) - self.assertTrue(round(repy.locs[1].lon,7) == 40.0000001) - self.assertTrue(round(repy.locs[2].lat,7) == 55.0000001) - self.assertTrue(round(repy.locs[2].lon,7) == 85.0000001) + self.assertEqual(repy.bins[0], [('start', 0), ('start', 1)]) + self.assertEqual(repy.bins[1], [('end', 0), ('start', 2)]) + self.assertEqual(repy.bins[2], [('end', 1), ('end', 2)]) + self.assertAlmostEqual(repy.locs[0][0], 1.0000001, places=7) + self.assertAlmostEqual(repy.locs[0][1], 2.0000001, places=7) + self.assertAlmostEqual(repy.locs[1][0], 30.0000001, places=7) + self.assertAlmostEqual(repy.locs[1][1], 40.0000001, places=7) + self.assertAlmostEqual(repy.locs[2][0], 55.0000001, places=7) + self.assertAlmostEqual(repy.locs[2][1], 85.0000001, places=7) def testClusterDict(self): repy = rep.representatives(self.data, self.labels) @@ -127,19 +130,20 @@ def testClusterDict(self): self.assertTrue(('start', i) in repy.bins[cluster['start']]) self.assertTrue(('end', i) in repy.bins[cluster['end']]) for d in repy.clusters[i]: - tripid = d.trip_id - tripy = next((x for x in cluster['sections'] if x.trip_id == tripid), None) + tripid = d.get_id() + tripy = next((x for x in cluster['sections'] if x.get_id() == tripid), None) self.assertTrue(tripy) - self.assertTrue(sum(sum(t.trip_id == tripid for t in cluster['sections']) for cluster in repy.self_loops_tour_dict) == 1) + self.assertTrue(sum(sum(t.get_id() == tripid for t in cluster['sections']) for cluster in repy.self_loops_tour_dict) == 1) for c in repy.tour_dict: self.assertTrue(c['start'] != c['end']) def testMatch(self): - tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(3,4)) - tripb = Trip(None, None, None, None, None, None, Coordinate(3,4), Coordinate(1,2)) - tripc = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(9,10)) + now = time.time() + tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4]) + tripb = etatc._createTripEntry(self, now, now, [3,4], [1,2]) + tripc = etatc._createTripEntry(self, now, now, [1,2], [9,10]) data = [tripa, tripb, tripc] labels = [0,1,2] @@ -155,4 +159,5 @@ def testMatch(self): self.assertTrue(not repy.match('end', 2, bin)) if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/emission/tests/analysisTests/TestSimilarity.py b/emission/tests/analysisTests/TestSimilarity.py index 4c7400a29..5a1002029 100644 --- a/emission/tests/analysisTests/TestSimilarity.py +++ b/emission/tests/analysisTests/TestSimilarity.py @@ -1,29 +1,36 @@ +import logging import unittest +import uuid +import time +import datetime +import os, os.path + +import emission.tests.analysisTests.tourModelTests.common as etatc + import emission.core.get_database as edb + import emission.analysis.modelling.tour_model.similarity as similarity -import emission.simulation.trip_gen as tg -import math -from emission.core.wrapper.trip_old import Trip, Coordinate import emission.analysis.modelling.tour_model.cluster_pipeline as cp -import datetime -import os, os.path + +import emission.storage.timeseries.abstract_timeseries as esta class SimilarityTests(unittest.TestCase): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs): super(SimilarityTests, self).__init__(*args, **kwargs) - self.data = cp.read_data(size=100) + + def setUp(self): + self.testUUID = uuid.uuid4() + self.data = cp.read_data() #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) - print 'there are ' + str(len(self.data)) - - def setUp(self): - pass + logging.info("Found %s trips" % len(self.data)) + self.ts = esta.TimeSeries.get_time_series(self.testUUID) def tearDown(self): - return - + edb.get_timeseries_db().remove({'user_id': self.testUUID}) + edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID}) def testInit(self): try: @@ -33,15 +40,18 @@ def testInit(self): except Exception: self.assertTrue(False) + logging.debug("STARTING init test") sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) - now = datetime.datetime.now() - start = Coordinate(47,-122) - end = Coordinate(47,-123) - t1 = Trip(None, None, None, None, now, now, start, start) - t2 = Trip(None, None, None, None, now, now, start, end) + now = time.time() + start = [-122,47] + end = [-123,47] + t1 = etatc._createTripEntry(self, now, now, start, start) + t2 = etatc._createTripEntry(self, now, now, start, end) sim = similarity.similarity([t1, t2], 100) + logging.debug("sim.data = %s" % sim.data) simmy = similarity.similarity([t2], 100) + logging.debug("simmy.data = %s" % simmy.data) self.assertTrue(sim.data == simmy.data) def testBinData(self): @@ -62,17 +72,15 @@ def testBinData(self): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1])) data = [] - now = datetime.datetime.now() - start = Coordinate(47,-122) - end = Coordinate(47,-123) + now = time.time() + start = [-122, 47] + end = [-123, 47] for i in range(10): - a = Trip(None, None, None, None, now, now, start, end) - data.append(a) - start = Coordinate(41,-74) - end = Coordinate(42, -74) + data.append(etatc._createTripEntry(self, now, now, start, end)) + start = [-74, 41] + end = [-74, 42] for i in range(10): - a = Trip(None, None, None, None, now, now, start, end) - data.append(a) + data.append(etatc._createTripEntry(self, now, now, start, end)) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2) @@ -86,10 +94,10 @@ def testDeleteBins(self): self.assertTrue(b == sim.num) def testElbowDistance(self): - start = Coordinate(47,-122) - end = Coordinate(47,-123) - now = datetime.datetime.now() - t = Trip(None, None, None, None, now, now, start, end) + start = [-122,47] + end = [-133,47] + now = time.time() + t = etatc._createTripEntry(self, now, now, start, end) data = [t] * 11 bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) @@ -106,15 +114,15 @@ def testMatch(self): self.assertTrue(sim.distance_helper(b,c)) def testDistance(self): - start = Coordinate(-122.259447, 37.875174) - end1 = Coordinate(-122.259279, 37.875479) - end2 = Coordinate(-122.252287, 37.869569) - now = datetime.datetime.now() - t1 = Trip(None, None, None, None, now, now, start, end1) - t2 = Trip(None, None, None, None, now, now, start, end2) + start = [-122.259447, 37.875174] + end1 = [-122.259279, 37.875479] + end2 = [-122.252287, 37.869569] + now = time.time() + t1 = etatc._createTripEntry(self, now, now, start, end1) + t2 = etatc._createTripEntry(self, now, now, start, end2) sim = similarity.similarity(self.data, 300) - self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon)) - self.assertTrue(not sim.distance(start.lat, start.lon, end2.lat, end2.lon)) + self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0])) + self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0])) def testGraph(self): if os.path.isfile('./histogram.png'): @@ -136,12 +144,11 @@ def testEvaluateBins(self): a = sim.evaluate_bins() self.assertTrue(not a) sim = similarity.similarity(self.data, 300) - b = sim.evaluate_bins() - self.assertTrue(not b) sim.bin_data() c = sim.evaluate_bins() if sim.data: self.assertTrue(c) if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) unittest.main() diff --git a/emission/tests/analysisTests/tourModelTests/__init__.py b/emission/tests/analysisTests/tourModelTests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/emission/tests/analysisTests/tourModelTests/common.py b/emission/tests/analysisTests/tourModelTests/common.py new file mode 100644 index 000000000..47aabfae8 --- /dev/null +++ b/emission/tests/analysisTests/tourModelTests/common.py @@ -0,0 +1,46 @@ +import logging +import geojson as gj +import uuid + +import emission.core.wrapper.cleanedtrip as ecwct +import emission.core.wrapper.entry as ecwe +import emission.core.wrapper.cleanedplace as ecwcp +import emission.core.get_database as edb + +import emission.analysis.modelling.tour_model.cluster_pipeline as cp +import emission.storage.timeseries.abstract_timeseries as esta + +def _createTripEntry(self, start_ts, end_ts, start_loc, end_loc): + t = ecwct.Cleanedtrip() + t.start_ts = start_ts + t.end_ts = end_ts + t.start_loc = gj.Point(start_loc) + t.end_loc = gj.Point(end_loc) + sp = ecwcp.Cleanedplace() + sp.location = t.start_loc + sp.exit_ts = start_ts + ep = ecwcp.Cleanedplace() + ep.location = t.end_loc + ep.enter_ts = end_ts + spe = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_place", sp, create_id=True) + epe = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_place", ep, create_id=True) + t.start_place = spe.get_id() + t.end_place = epe.get_id() + te = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_trip", t, create_id=True) + self.ts.insert(spe) + self.ts.insert(epe) + self.ts.insert(te) + return te + +def _setup(self): + self.data = cp.read_data() + #if len(self.data) == 0: + # tg.create_fake_trips() + # self.data = cp.read_data(size=100) + print 'there are ' + str(len(self.data)) + self.testUUID = uuid.uuid4() + self.ts = esta.TimeSeries.get_time_series(self.testUUID) + +def _tearDown(self): + edb.get_timeseries_db().remove({'user_id': self.testUUID}) + edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID}) From ff0c3bc4221b157a124711d144f01397bc098fc2 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 01:11:37 -0700 Subject: [PATCH 6/8] More fallout from the old -> new change The coordinates stored are now a direct array and do not need to be converted from a coordinate list --- .../storage/decorations/common_place_queries.py | 16 +++------------- .../storage/decorations/common_trip_queries.py | 4 ++-- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/emission/storage/decorations/common_place_queries.py b/emission/storage/decorations/common_place_queries.py index b0798633c..95736e109 100644 --- a/emission/storage/decorations/common_place_queries.py +++ b/emission/storage/decorations/common_place_queries.py @@ -42,17 +42,6 @@ def clear_existing_places(user_id): db = edb.get_common_place_db() db.remove({'user_id': user_id}) -def get_all_place_objs(common_place): - trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]] - place_db = edb.get_place_db() - start_places = [] - end_places = [] - for t in trip.trips: - start = place_db.find_one({"_id" : t.start_place}) - end = place_db.find_one({"_id" : t.end_place}) - start_places.append(start) - end_places.append(end) - ################################################################################ def create_places(list_of_cluster_data, user_id): @@ -60,10 +49,11 @@ def create_places(list_of_cluster_data, user_id): places_dct = {} logging.debug("About to create places for %d clusters" % len(list_of_cluster_data)) for dct in list_of_cluster_data: + logging.debug("Current coords = %s" % dct) start_name = dct['start'] end_name = dct['end'] - start_loc = gj.Point(dct['start_coords'].coordinate_list()) - end_loc = gj.Point(dct['end_coords'].coordinate_list()) + start_loc = gj.Point(dct['start_coords']) + end_loc = gj.Point(dct['end_coords']) start_loc_str = gj.dumps(start_loc, sort_keys=True) end_loc_str = gj.dumps(end_loc, sort_keys=True) if start_loc_str not in places_to_successors: diff --git a/emission/storage/decorations/common_trip_queries.py b/emission/storage/decorations/common_trip_queries.py index 32eb3f56a..6b5d08ec0 100644 --- a/emission/storage/decorations/common_trip_queries.py +++ b/emission/storage/decorations/common_trip_queries.py @@ -103,8 +103,8 @@ def set_up_trips(list_of_cluster_data, user_id): for dct in list_of_cluster_data: start_times = [] durations = [] - start_loc = gj.Point(dct['start_coords'].coordinate_list()) - end_loc = gj.Point(dct['end_coords'].coordinate_list()) + start_loc = gj.Point(dct['start_coords']) + end_loc = gj.Point(dct['end_coords']) start_place_id = esdcpq.get_common_place_at_location(start_loc).get_id() end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id() #print 'dct["sections"].trip_id %s is' % dct["sections"][0] From b1eed9e62c7df1f28c9a8c397a366041d46cd1fa Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Tue, 28 Jun 2016 13:59:49 -0700 Subject: [PATCH 7/8] Replace other references to googlemaps with gmaps Thanks to @sunil07t for the catch! --- emission/user_model_josh/utility_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/emission/user_model_josh/utility_model.py b/emission/user_model_josh/utility_model.py index 7c09fd95c..685c7ffc1 100644 --- a/emission/user_model_josh/utility_model.py +++ b/emission/user_model_josh/utility_model.py @@ -504,9 +504,9 @@ def get_elevation_change(trip, testing=False): down = random.randint(1, 100) return (up, down) time.sleep(1) # so we dont run out calls - c = googlemaps.client.Client(GOOGLE_MAPS_KEY) + c = gmaps.client.Client(GOOGLE_MAPS_KEY) print get_route(trip) - jsn = googlemaps.elevation.elevation_along_path(c, get_route(trip), 200) + jsn = gmaps.elevation.elevation_along_path(c, get_route(trip), 200) up, down = 0, 0 prev = None for item in jsn: From 9ba9511b5a7f1b53c842b468f95409081a619a6c Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Wed, 29 Jun 2016 23:18:19 -0700 Subject: [PATCH 8/8] Fixed typo introduced while modifying tests Thanks to @sunil07t for the catch! --- emission/tests/analysisTests/TestSimilarity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emission/tests/analysisTests/TestSimilarity.py b/emission/tests/analysisTests/TestSimilarity.py index 5a1002029..37c42bbac 100644 --- a/emission/tests/analysisTests/TestSimilarity.py +++ b/emission/tests/analysisTests/TestSimilarity.py @@ -95,7 +95,7 @@ def testDeleteBins(self): def testElbowDistance(self): start = [-122,47] - end = [-133,47] + end = [-123,47] now = time.time() t = etatc._createTripEntry(self, now, now, start, end) data = [t] * 11