From 2216c2330c817f7da0154f69c158a324d3be8bbb Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 00:13:20 -0700
Subject: [PATCH 1/8] Remove old, obsolete code to improve code clarity

Before this, we had a different trip format, in which the points for the trips
were stored directly in the sections. We have since switched to a format in
which the points are stored in a timeseries and the trips and sections are
generated and stored as separate objects that simply point to the original
data.

The tour modelling code had functions that would consume both kinds of data,
choosing between them by using an "old" flag. This made the code much harder
to read, since the old code was not pulled out into a separate module but was
interspersed into several functions.

This removes all the obsolete code. In particular, it removes the `old` flag,
all conditional checks on the `old` flag and all references to the old trip
format.

Now, there are no usages of the old trip in the tour model.

```
bash-3.2$ grep -r trip_old emission/analysis/modelling/tour_model/ | wc -l
       0
```
---
 .../modelling/tour_model/cluster_pipeline.py  | 61 ++++---------
 .../modelling/tour_model/featurization.py     | 33 +++----
 .../modelling/tour_model/representatives.py   | 85 ++++++++++--------
 .../modelling/tour_model/similarity.py        | 90 +++++++------------
 4 files changed, 107 insertions(+), 162 deletions(-)

diff --git a/emission/analysis/modelling/tour_model/cluster_pipeline.py b/emission/analysis/modelling/tour_model/cluster_pipeline.py
index 2489ca8cb..a5bfd8a6a 100644
--- a/emission/analysis/modelling/tour_model/cluster_pipeline.py
+++ b/emission/analysis/modelling/tour_model/cluster_pipeline.py
@@ -1,6 +1,5 @@
 # Standard imports
 import math
-import datetime
 import uuid as uu
 import sys
 import logging
@@ -10,13 +9,6 @@
 import emission.analysis.modelling.tour_model.similarity as similarity
 import emission.analysis.modelling.tour_model.featurization as featurization
 import emission.analysis.modelling.tour_model.representatives as representatives
-
-from emission.core.wrapper.trip_old import Trip, Section, Fake_Trip
-
-import emission.core.wrapper.trip as ecwt
-import emission.core.wrapper.section as ecws
-import emission.storage.decorations.trip_queries as ecsdtq
-import emission.storage.decorations.section_queries as ecsdsq
 import emission.storage.decorations.analysis_timeseries_queries as esda
 
 """
@@ -41,39 +33,18 @@
 """
 
 #read the data from the database. 
-def read_data(uuid=None, size=None, old=True):
+def read_data(uuid=None):
     db = edb.get_trip_db()
-    if not old:
-        logging.debug("not old")
-        trips = esda.get_entries(esda.CLEANED_TRIP_KEY, uuid,
-                                 time_query=None, geo_query=None)
-        return trips
-
-    if old:
-        data = []
-        trip_db = db
-        if uuid:
-            trips = trip_db.find({'user_id' : uuid, 'type' : 'move'})
-        else:
-            trips = trip_db.find({'type' : 'move'})
-        for t in trips:
-            try: 
-                trip = Trip.trip_from_json(t)
-            except:
-                continue
-            if not (trip.trip_start_location and trip.trip_end_location and trip.start_time):
-                continue
-            data.append(trip)
-            if size:
-                if len(data) == size:
-                    break
-        return data
+    trips = esda.get_entries(esda.CLEANED_TRIP_KEY, uuid,
+                             time_query=None, geo_query=None)
+    logging.info("After reading data, returning %s trips" % len(trips))
+    return trips
 
 #put the data into bins and cut off the lower portion of the bins
-def remove_noise(data, radius, old=True):
+def remove_noise(data, radius):
     if not data:
         return [], []
-    sim = similarity.similarity(data, radius, old)
+    sim = similarity.similarity(data, radius)
     sim.bin_data()
     logging.debug('number of bins before filtering: %d' % len(sim.bins))
     sim.delete_bins()
@@ -81,10 +52,10 @@ def remove_noise(data, radius, old=True):
     return sim.newdata, sim.bins
 
 #cluster the data using k-means
-def cluster(data, bins, old=True):
+def cluster(data, bins):
     if not data:
         return 0, [], []
-    feat = featurization.featurization(data, old=old)
+    feat = featurization.featurization(data)
     min = bins
     max = int(math.ceil(1.5 * bins))
     feat.cluster(min_clusters=min, max_clusters=max)
@@ -92,10 +63,10 @@ def cluster(data, bins, old=True):
     return feat.clusters, feat.labels, feat.data
 
 #prepare the data for the tour model
-def cluster_to_tour_model(data, labels, old=True):
+def cluster_to_tour_model(data, labels):
     if not data:
         return []
-    repy = representatives.representatives(data, labels, old=old)
+    repy = representatives.representatives(data, labels)
     repy.list_clusters()
     repy.get_reps()
     repy.locations()
@@ -103,12 +74,12 @@ def cluster_to_tour_model(data, labels, old=True):
     repy.cluster_dict()
     return repy.tour_dict
 
-def main(uuid=None, old=True):
-    data = read_data(uuid, old=old)
+def main(uuid=None):
+    data = read_data(uuid)
     logging.debug("len(data) is %d" % len(data))
-    data, bins = remove_noise(data, 300, old=old)
-    n, labels, data = cluster(data, len(bins), old=old)
-    tour_dict = cluster_to_tour_model(data, labels, old=old)
+    data, bins = remove_noise(data, 300)
+    n, labels, data = cluster(data, len(bins))
+    tour_dict = cluster_to_tour_model(data, labels)
     return tour_dict
 
 if __name__=='__main__':
diff --git a/emission/analysis/modelling/tour_model/featurization.py b/emission/analysis/modelling/tour_model/featurization.py
index ba5ae3167..d4ad6783b 100644
--- a/emission/analysis/modelling/tour_model/featurization.py
+++ b/emission/analysis/modelling/tour_model/featurization.py
@@ -1,18 +1,13 @@
 # Standard imports
 import logging
-import matplotlib
-# matplotlib.use('Agg')
 import matplotlib.pyplot as plt
-import math
 import numpy
 from sklearn.cluster import KMeans
 from sklearn import metrics
 import sys
 
 # our imports
-from emission.core.wrapper.trip_old import Trip, Coordinate
 from kmedoid import kmedoids
-import emission.storage.decorations.trip_queries as esdtq
 
 
 """
@@ -25,9 +20,8 @@
 """
 class featurization:
 
-    def __init__(self, data, old=True):
+    def __init__(self, data):
         self.data = data
-        self.is_old = old
         if not self.data:
             self.data = []
         self.calculate_points()
@@ -41,21 +35,14 @@ def calculate_points(self):
         if not self.data:
             return
         for trip in self.data:
-            if self.is_old:
-                start = trip.trip_start_location
-                end = trip.trip_end_location
-            else:
-                try:
-                    start = trip.data.start_loc["coordinates"]
-                    end = trip.data.end_loc["coordinates"]
-                except:
-                    continue
+            try:
+                start = trip.data.start_loc["coordinates"]
+                end = trip.data.end_loc["coordinates"]
+            except:
+                continue
             if not (start and end):
                 raise AttributeError('each trip must have valid start and end locations')
-            if self.is_old:
-                self.points.append([start.lon, start.lat, end.lon, end.lat])
-            else:
-                self.points.append([start[0], start[1], end[0], end[1]])
+            self.points.append([start[0], start[1], end[0], end[1]])
 
     #cluster the data. input options:
     # - name (optional): the clustering algorithm to use. Options are 'kmeans' or 'kmedoids'. Default is kmeans.
@@ -68,7 +55,7 @@ def cluster(self, name='kmeans', min_clusters=2, max_clusters=None):
             logging.debug("min_clusters < 2, setting min_clusters = 2")
             min_clusters = 2
         if min_clusters > len(self.points):
-            sys.stderr.write('Maximum number of clusters is the number of data points.\n')
+            sys.stderr.write('Minimum number of clusters %d is greater than the number of data points %d.\n' % (min_clusters, len(self.points)))
             min_clusters = len(self.points)-1
         if max_clusters == None:
             logging.debug("max_clusters is None, setting max_clusters = %d" % (len(self.points) - 1))
@@ -138,8 +125,8 @@ def check_clusters(self):
         if not self.labels:
             logging.debug('Please cluster before analyzing clusters.')
             return
-        logging.debug('number of clusters is %d' % str(self.clusters))
-        logging.debug('silhouette score is %d' % str(self.sil))
+        logging.debug('number of clusters is %d' % self.clusters)
+        logging.debug('silhouette score is %s' % self.sil)
 
     #map the clusters
     #TODO - move this to a file in emission.analysis.plotting to map clusters from the database
diff --git a/emission/analysis/modelling/tour_model/representatives.py b/emission/analysis/modelling/tour_model/representatives.py
index 88e71dcf1..31a16c5e3 100755
--- a/emission/analysis/modelling/tour_model/representatives.py
+++ b/emission/analysis/modelling/tour_model/representatives.py
@@ -3,9 +3,11 @@
 import numpy
 import math
 import copy
+import geojson as gj
 
 # our imports
-from emission.core.wrapper.trip_old import Trip, Coordinate
+import emission.core.wrapper.trip as ecwt
+import emission.core.wrapper.entry as ecwe
 import emission.storage.decorations.analysis_timeseries_queries as esda
 
 
@@ -26,9 +28,8 @@
 
 class representatives:
 
-    def __init__(self, data, labels, old=True):
+    def __init__(self, data, labels):
         self.data = data
-        self.is_old = old
         if not self.data:
             self.data = []
         self.labels = labels
@@ -52,37 +53,46 @@ def list_clusters(self):
             self.clusters[a].append(self.data[i])
 
     #get the representatives for each cluster
+    #I don't understand wtf this does
+    # Why are we getting the mean of the start and end points in the cluster and
+    # creating a fake trip from it? Why not just pick a real representative of
+    # of the trips? Alternatively, why not create a new data structure to represent
+    # that this is a reconstructed trip that has no bearing in reality? What does
+    # it even mean that we have a trip with only a start and end point and no
+    # actual start or end times?
     def get_reps(self):
         self.reps = []
         if not self.data:
             return
-        for cluster in self.clusters:
+        for i, cluster in enumerate(self.clusters):
+            logging.debug("Considering cluster %d = %s" % (i, cluster))
             points = [[], [], [], []]
-            for c in cluster:
-                if self.is_old:
-                    points[0].append(c.trip_start_location.lat)
-                    points[1].append(c.trip_start_location.lon)
-                    points[2].append(c.trip_end_location.lat)
-                    points[3].append(c.trip_end_location.lon)
-                else:
-                    # We want (lat, lon) to be consistent with old above.
-                    # But in the new, our data is in geojson so it is (lon, lat).
-                    # Fix it by flipping the order of the indices
-                    # Note also that we want to use the locations of the start
-                    # and end places, not of the start point of the trip, which
-                    # may be some distance away due to geofencing.
-                    start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
-                                                 c.data.start_place)
-                    end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
-                                                 c.data.end_place)
-                    points[0].append(start_place.data.location["coordinates"][1])
-                    points[1].append(start_place.data.location["coordinates"][0])
-                    points[2].append(end_place.data.location["coordinates"][1])
-                    points[3].append(end_place.data.location["coordinates"][0])
-                    logging.debug("in representatives, endpoints have len = %s" %
-                                  len(points))
+
+            # If this cluster has no points, we skip it
+            if len(cluster) == 0:
+                logging.info("Cluster %d = %s, has length %d, skipping" %
+                             (i, cluster, len(cluster)))
+                continue
+
+            for j, c in enumerate(cluster):
+                logging.debug("Consider point %d = %s" % (j, c))
+                start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                             c.data.start_place)
+                end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                             c.data.end_place)
+                points[0].append(start_place.data.location["coordinates"][1]) # lat
+                points[1].append(start_place.data.location["coordinates"][0]) # lng
+                points[2].append(end_place.data.location["coordinates"][1]) # lat
+                points[3].append(end_place.data.location["coordinates"][0]) # lng
+                logging.debug("in representatives, endpoints have len = %s" %
+                              len(points))
             centers = numpy.mean(points, axis=1)
-            a = Trip(None, None, None, None, None, None, Coordinate(centers[0], centers[1]), Coordinate(centers[2], centers[3]))
+            logging.debug("For cluster %d, centers are %s" % (i, centers))
+            t = ecwt.Trip({
+                "start_loc": gj.Point([centers[1], centers[0]]),
+                "end_loc": gj.Point([centers[3], centers[2]])
+            })
+            a = ecwe.Entry.create_entry(c.user_id, "analysis/cleaned_trip", t)
             self.reps.append(a)
 
     #map the representatives
@@ -134,12 +144,12 @@ def locations(self):
             locs = []
             for b in bin:
                 if b[0] == 'start':
-                    point = self.reps[b[1]].trip_start_location
+                    point = self.reps[b[1]].data.start_loc
                 if b[0] == 'end':
-                    point = self.reps[b[1]].trip_end_location
-                locs.append([point.lat, point.lon])
+                    point = self.reps[b[1]].data.end_loc
+                locs.append(point.coordinates)
             locs = numpy.mean(locs, axis=0)
-            coord = Coordinate(locs[0], locs[1])
+            coord = [locs[0], locs[1]]
             self.locs.append(coord)
 
     #create the input to the tour graph
@@ -198,15 +208,16 @@ def cluster_dict(self):
     #check whether a point is close to all points in a bin
     def match(self, label, a, bin):
         if label == 'start':
-            pointa = self.reps[a].trip_start_location
+            pointa = self.reps[a].data.start_loc
         elif label == 'end':
-            pointa = self.reps[a].trip_end_location
+            pointa = self.reps[a].data.end_loc
         for b in bin:
             if b[0] == 'start':
-                pointb = self.reps[b[1]].trip_start_location
+                pointb = self.reps[b[1]].data.start_loc
             elif b[0] == 'end':
-                pointb = self.reps[b[1]].trip_end_location
-            if self.distance(pointa.lat, pointa.lon, pointb.lat, pointb.lon) > 300:
+                pointb = self.reps[b[1]].data.end_loc
+            if self.distance(pointa.coordinates[1], pointa.coordinates[0],
+                             pointb.coordinates[1], pointb.coordinates[0]) > 300:
                 return False
         return True
 
diff --git a/emission/analysis/modelling/tour_model/similarity.py b/emission/analysis/modelling/tour_model/similarity.py
index 77d1a83ba..c806d512d 100644
--- a/emission/analysis/modelling/tour_model/similarity.py
+++ b/emission/analysis/modelling/tour_model/similarity.py
@@ -6,11 +6,7 @@
 import matplotlib.pyplot as plt
 import numpy
 from sklearn import metrics
-import sys
-from numpy import cross
 from numpy.linalg import norm
-import emission.storage.decorations.trip_queries as esdtq
-import emission.storage.decorations.section_queries as esdsq
 import emission.storage.decorations.analysis_timeseries_queries as esda
 
 """
@@ -30,41 +26,30 @@
 """
 class similarity:
     
-    def __init__(self, data, radius, old=True):
+    def __init__(self, data, radius):
         self.data = data
         if not data:
             self.data = []
         self.bins = []
         self.radius = float(radius)
-        self.old = old
-        if not old:
-            for a in self.data:
-                # print "a is %s" % a
-                t = a
-                try:
-                    start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
-                                                 t.data.start_place)
-                    end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
-                                                 t.data.end_place)
-                    start_lon = start_place.data.location["coordinates"][0]
-                    start_lat = start_place.data.location["coordinates"][1]
-                    end_lon = end_place.data.location["coordinates"][0]
-                    end_lat = end_place.data.location["coordinates"][1]
-                    logging.debug("endpoints are = (%s, %s) and (%s, %s)" %
-                                  (start_lon, start_lat, end_lon, end_lat))
-                    if self.distance(start_lat, start_lon, end_lat, end_lon):
-                        self.data.remove(a)
-                except:
-                    logging.exception("exception while getting start and end places for %s" % t)
-                    self.data.remove(a)
-        else:
-            for a in range(len(self.data)-1, -1, -1):
-                start_lat = self.data[a].trip_start_location.lat
-                start_lon = self.data[a].trip_start_location.lon
-                end_lat = self.data[a].trip_end_location.lat
-                end_lon = self.data[a].trip_end_location.lon
+        for t in self.data:
+            logging.debug("Considering trip %s" % t)
+            try:
+                start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                             t.data.start_place)
+                end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                             t.data.end_place)
+                start_lon = start_place.data.location["coordinates"][0]
+                start_lat = start_place.data.location["coordinates"][1]
+                end_lon = end_place.data.location["coordinates"][0]
+                end_lat = end_place.data.location["coordinates"][1]
+                logging.debug("endpoints are = (%s, %s) and (%s, %s)" %
+                              (start_lon, start_lat, end_lon, end_lat))
                 if self.distance(start_lat, start_lon, end_lat, end_lon):
-                    self.data.pop(a)
+                    self.data.remove(t)
+            except:
+                logging.exception("exception while getting start and end places for %s" % t)
+                self.data.remove(t)
 
         logging.debug('After removing trips that are points, there are %s data points' % len(self.data))
         self.size = len(self.data)
@@ -146,12 +131,8 @@ def elbow_distance(self):
     #check if two trips match
     def match(self,a,bin):
         for b in bin:
-            if not self.old:
-                if not self.distance_helper_new(a,b):
-                    return False
-            else:
-                if not self.distance_helper(a,b):
-                    return False
+            if not self.distance_helper(a, b):
+                return False
         return True
 
     #create the histogram
@@ -187,12 +168,19 @@ def evaluate_bins(self):
         points = []
         for bin in self.bins:
             for b in bin:
-                start_lat = self.data[b].trip_start_location.lat
-                start_lon = self.data[b].trip_start_location.lon
-                end_lat = self.data[b].trip_end_location.lat
-                end_lon = self.data[b].trip_end_location.lon
+                tb = self.data[b]
+                start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                             tb.data.start_place)
+                end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
+                                           tb.data.end_place)
+                start_lon = start_place.data.location["coordinates"][0]
+                start_lat = start_place.data.location["coordinates"][1]
+                end_lon = end_place.data.location["coordinates"][0]
+                end_lat = end_place.data.location["coordinates"][1]
                 path = [start_lat, start_lon, end_lat, end_lon]
                 points.append(path)
+        logging.debug("number of labels are %d, number of points are = %d" %
+                      (len(labels), len(points)))
         a = metrics.silhouette_score(numpy.array(points), labels)
         logging.debug('number of bins is %d' % len(self.bins))
         logging.debug('silhouette score is %d' % a)
@@ -200,20 +188,8 @@ def evaluate_bins(self):
 
     #calculate the distance between two trips
     def distance_helper(self, a, b):
-        starta = self.data[a].trip_start_location
-        startb = self.data[b].trip_start_location
-        enda = self.data[a].trip_end_location
-        endb = self.data[b].trip_end_location
-
-        start = self.distance(starta.lat, starta.lon, startb.lat, startb.lon)
-        end = self.distance(enda.lat, enda.lon, endb.lat, endb.lon)
-        if start and end:
-            return True
-        return False
-
-    def distance_helper_new(self, a, b):
-        tripa = self.data[a]
-        tripb = self.data[b]
+        tripa = self.data[a].data
+        tripb = self.data[b].data
 
         starta = tripa.start_loc["coordinates"]
         startb = tripb.start_loc["coordinates"]

From 28e6ff22665c4c4ddd82e1f074a3cf83d9d79de8 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 00:25:55 -0700
Subject: [PATCH 2/8] Clean up unused imports in other tour model files

While we are in here, we might as well clean up unused imports and ensure that
nobody else is tempted to re-add the old trip wrappers
---
 emission/analysis/modelling/tour_model/K_medoid.py        | 8 +-------
 .../modelling/tour_model/create_tour_model_matrix.py      | 4 +---
 .../analysis/modelling/tour_model/tour_model_matrix.py    | 2 +-
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/emission/analysis/modelling/tour_model/K_medoid.py b/emission/analysis/modelling/tour_model/K_medoid.py
index 7435a2224..602761c71 100644
--- a/emission/analysis/modelling/tour_model/K_medoid.py
+++ b/emission/analysis/modelling/tour_model/K_medoid.py
@@ -1,16 +1,10 @@
 # Standard imports
 from __future__ import division
-import logging
-import numpy as np
-import math
 import random
-import time
 
 # Our imports
-from emission.core.get_database import get_routeDistanceMatrix_db,get_routeCluster_db,get_section_db
-from emission.core.common import calDistance, getDisplayModes
+from emission.core.get_database import get_routeDistanceMatrix_db,get_section_db
 from emission.analysis.modelling.tour_model.trajectory_matching.route_matching import fullMatchDistance,getRoute
-from emission.analysis.modelling.tour_model.trajectory_matching.LCS import lcsScore
 
 Sections=get_section_db()
 
diff --git a/emission/analysis/modelling/tour_model/create_tour_model_matrix.py b/emission/analysis/modelling/tour_model/create_tour_model_matrix.py
index 64bacf6ec..4af6c47eb 100644
--- a/emission/analysis/modelling/tour_model/create_tour_model_matrix.py
+++ b/emission/analysis/modelling/tour_model/create_tour_model_matrix.py
@@ -1,8 +1,6 @@
 import logging
 
-import emission.analysis.modelling.tour_model.tour_model_matrix as tm ##here
-import emission.core.get_database as edb
-import emission.core.wrapper.trip_old as trip
+import emission.analysis.modelling.tour_model.tour_model_matrix as tm
 import emission.analysis.modelling.tour_model.cluster_pipeline as eamtcp
 from uuid import UUID
 import random, datetime, sys
diff --git a/emission/analysis/modelling/tour_model/tour_model_matrix.py b/emission/analysis/modelling/tour_model/tour_model_matrix.py
index 3b5f6af13..59b173b1f 100644
--- a/emission/analysis/modelling/tour_model/tour_model_matrix.py
+++ b/emission/analysis/modelling/tour_model/tour_model_matrix.py
@@ -5,7 +5,7 @@
 
 # Standard imports
 import numpy as np
-import math, datetime, heapq
+import datetime, heapq
 import networkx as nx
 import matplotlib.pyplot as plt
 

From 3840d795d02c364c7fba13fbf837e069175d3417 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 00:29:08 -0700
Subject: [PATCH 3/8] Change the invocations of the tour model code to match
 the new signature

This is fairly straightforward, and was automatically done by PyCharm
---
 emission/storage/decorations/tour_model_queries.py    | 2 +-
 emission/tests/analysisTests/TestUserModel.py         | 4 ----
 emission/tests/storageTests/TestCommonPlaceQueries.py | 2 +-
 emission/tests/storageTests/TestCommonTripQueries.py  | 2 +-
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/emission/storage/decorations/tour_model_queries.py b/emission/storage/decorations/tour_model_queries.py
index 9890b0d58..6f169c6b0 100644
--- a/emission/storage/decorations/tour_model_queries.py
+++ b/emission/storage/decorations/tour_model_queries.py
@@ -47,7 +47,7 @@ def get_common_trips(user_id):
 
 def make_tour_model_from_raw_user_data(user_id):
     try:
-        list_of_cluster_data = eamtmcp.main(user_id, False)
+        list_of_cluster_data = eamtmcp.main(user_id)
         esdcpq.create_places(list_of_cluster_data, user_id)
         esdctq.set_up_trips(list_of_cluster_data, user_id)
     except ValueError as e:
diff --git a/emission/tests/analysisTests/TestUserModel.py b/emission/tests/analysisTests/TestUserModel.py
index 154d71d3c..30c135d61 100644
--- a/emission/tests/analysisTests/TestUserModel.py
+++ b/emission/tests/analysisTests/TestUserModel.py
@@ -1,10 +1,6 @@
 import unittest
 import emission.user_model_josh.utility_model as eum
-import googlemaps
 import emission.net.ext_service.otp.otp as otp
-import emission.net.ext_service.gmaps.googlemaps as gmaps
-import emission.net.ext_service.gmaps.common as gmcommon
-import emission.net.api.utility_model_api as umapi
 import datetime
 
 
diff --git a/emission/tests/storageTests/TestCommonPlaceQueries.py b/emission/tests/storageTests/TestCommonPlaceQueries.py
index e330bd184..c0563ae94 100644
--- a/emission/tests/storageTests/TestCommonPlaceQueries.py
+++ b/emission/tests/storageTests/TestCommonPlaceQueries.py
@@ -52,7 +52,7 @@ def testCreatePlace(self):
         estfm.move_all_filters_to_data()
         eaist.segment_current_trips(self.testUUID)
         eaiss.segment_current_sections(self.testUUID)
-        data = eamtcp.main(self.testUUID, False)
+        data = eamtcp.main(self.testUUID)
         esdcpq.create_places(data, self.testUUID)
         places = esdcpq.get_all_common_places_for_user(self.testUUID)
         places_list = []
diff --git a/emission/tests/storageTests/TestCommonTripQueries.py b/emission/tests/storageTests/TestCommonTripQueries.py
index de7c37267..273321f5c 100644
--- a/emission/tests/storageTests/TestCommonTripQueries.py
+++ b/emission/tests/storageTests/TestCommonTripQueries.py
@@ -106,7 +106,7 @@ def testCreateFromData(self):
 def get_fake_data(user_name):
     # Call with a username unique to your database
     tg.create_fake_trips(user_name, True)
-    return eamtcp.main(user_name, old=False)
+    return eamtcp.main(user_name)
 
 
 if __name__ == "__main__":

From 9af3d4e518bd2b2b883f36c1ee6ae39564d4581d Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 00:39:37 -0700
Subject: [PATCH 4/8] Fix import in the utility model

There is no googlemaps in the standard path but we have checked in a version
of the google maps with some minor modifications, and are already importing
it. So we can just use it instead of the failed import. I am not quite sure
how this test was working earlier.
---
 emission/user_model_josh/utility_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/emission/user_model_josh/utility_model.py b/emission/user_model_josh/utility_model.py
index a8184f6f9..7c09fd95c 100644
--- a/emission/user_model_josh/utility_model.py
+++ b/emission/user_model_josh/utility_model.py
@@ -13,7 +13,6 @@
 import json
 import heapq
 import time
-import googlemaps
 import requests
 import random
 
@@ -127,7 +126,7 @@ def get_top_choice_places(self, start_place, end_place):
         return self.get_top_choices_lat_lng(start, end)
 
     def get_all_trips(self, start, end, curr_time=None):
-        c = googlemaps.client.Client(GOOGLE_MAPS_KEY)
+        c = gmaps.client.Client(GOOGLE_MAPS_KEY)
         if curr_time is None:
             curr_time = datetime.datetime.now()
         curr_month = curr_time.month

From 17736b9270ebd149af0eb0fc249448cfa8155fc5 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 00:57:36 -0700
Subject: [PATCH 5/8] Convert all tests to the new format as well

It turns out that the tests were not ported over when the code was, so they
were not actually testing the current code. This was exposed when the crutch
for the old code was knocked away, and the tests all started failing. Ported
over the tests to use the new data structures.

Also did some minor refactoring/cleaning to move commonly used code into a
common module so that it could be reused. The common code included setup,
cleanup and sample trip creation functions.
---
 .../analysisTests/TestClusterPipeline.py      | 32 +++----
 .../tests/analysisTests/TestFeaturization.py  | 46 +++++-----
 .../analysisTests/TestRepresentatives.py      | 87 +++++++++---------
 .../tests/analysisTests/TestSimilarity.py     | 89 ++++++++++---------
 .../analysisTests/tourModelTests/__init__.py  |  0
 .../analysisTests/tourModelTests/common.py    | 46 ++++++++++
 6 files changed, 180 insertions(+), 120 deletions(-)
 create mode 100644 emission/tests/analysisTests/tourModelTests/__init__.py
 create mode 100644 emission/tests/analysisTests/tourModelTests/common.py

diff --git a/emission/tests/analysisTests/TestClusterPipeline.py b/emission/tests/analysisTests/TestClusterPipeline.py
index 6bd00146f..fecdd9cb3 100644
--- a/emission/tests/analysisTests/TestClusterPipeline.py
+++ b/emission/tests/analysisTests/TestClusterPipeline.py
@@ -32,59 +32,59 @@ def import_test_info(self):
 	eaicr.clean_and_resample(self.testUUID)
 
     def testSanity(self):
-	cp.main(self.testUUID, False)
+		cp.main(self.testUUID)
 
     def testReadData(self): 
-	data = cp.read_data(uuid=self.testUUID, old=False)
+	data = cp.read_data(uuid=self.testUUID)
 	
 	# Test to make sure something is happening
 	self.assertTrue(len(data) > 5)
 
 	# Test to make sure that the trips are mapped to the correct uuid
-	bad_data = cp.read_data(uuid="FakeUUID", old=False)
+	bad_data = cp.read_data(uuid="FakeUUID")
 	self.assertEqual(len(bad_data), 0)
 
     def testRemoveNoise(self):
-	data = cp.read_data(uuid=self.testUUID, old=False)
+	data = cp.read_data(uuid=self.testUUID)
 
 	# Test to make sure the code doesn't break on an empty dataset
-	new_data, bins = cp.remove_noise(None, self.RADIUS, False)
+	new_data, bins = cp.remove_noise(None, self.RADIUS)
 	self.assertTrue(len(new_data) == len(bins) == 0)	
 	
 	#Test to make sure some or no data was filtered out, but that nothing was added after filtering
-	new_data, bins = cp.remove_noise(None, self.RADIUS, False)
+	new_data, bins = cp.remove_noise(None, self.RADIUS)
 	self.assertTrue(len(new_data) <= len(data))
 	
 	# Make sure there are not more bins than data; that wouldnt make sense
 	self.assertTrue(len(bins) <= len(data))
 
     def testCluster(self):
-	data = cp.read_data(uuid=self.testUUID, old=False)
+	data = cp.read_data(uuid=self.testUUID)
 
 	# Test to make sure empty dataset doesn't crash the program
-	clusters, labels, new_data = cp.cluster([], 10, False)
+	clusters, labels, new_data = cp.cluster([], 10)
 	self.assertTrue(len(new_data) == clusters == len(labels) == 0)
 
 	# Test to make sure clustering with noise works
-	clusters, labels, new_data = cp.cluster(data, 10, False)
+	clusters, labels, new_data = cp.cluster(data, 10)
 	self.assertEqual(len(labels), len(new_data))
 	self.assertEqual(cmp(new_data, data), 0)
 	
 	# Test to make sure clustering without noise works
-	data, bins = cp.remove_noise(data, self.RADIUS, False)
-	clusters, labels, new_data = cp.cluster(data, len(bins), False)
+	data, bins = cp.remove_noise(data, self.RADIUS)
+	clusters, labels, new_data = cp.cluster(data, len(bins))
 	self.assertTrue(clusters == 0 or len(bins) <= clusters <= len(bins) + 10)
 	
     def testClusterToTourModel(self):
 	# Test to make sure it doesn't crash on a empty dataset
-	data = cp.cluster_to_tour_model(None, None, False)
+	data = cp.cluster_to_tour_model(None, None)
 	self.assertFalse(data)
 	
 	# Test with the real dataset
-	data = cp.read_data(uuid=self.testUUID, old=False)
-	data, bins = cp.remove_noise(data, self.RADIUS, False)
-	n, labels, data = cp.cluster(data, len(bins), False)
-	tour_dict = cp.main(uuid=self.testUUID, old=False)
+	data = cp.read_data(uuid=self.testUUID)
+	data, bins = cp.remove_noise(data, self.RADIUS)
+	n, labels, data = cp.cluster(data, len(bins))
+	tour_dict = cp.main(uuid=self.testUUID)
 	self.assertTrue(len(tour_dict) <= n)
 
 
diff --git a/emission/tests/analysisTests/TestFeaturization.py b/emission/tests/analysisTests/TestFeaturization.py
index b6ff53240..bc14ce654 100644
--- a/emission/tests/analysisTests/TestFeaturization.py
+++ b/emission/tests/analysisTests/TestFeaturization.py
@@ -1,35 +1,36 @@
 import unittest
+import time
+import uuid
+import logging
+
 import emission.core.get_database as edb
-import sys
 import emission.analysis.modelling.tour_model.featurization as featurization
-from emission.core.wrapper.trip_old import Trip, Coordinate
 import emission.analysis.modelling.tour_model.cluster_pipeline as cp
-import emission.simulation.trip_gen as tg
-import datetime
-import os, os.path
+import emission.storage.timeseries.abstract_timeseries as esta
+
+import emission.tests.analysisTests.tourModelTests.common as etatc
 
 class FeaturizationTests(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
         super(FeaturizationTests, self).__init__(*args, **kwargs)
-        self.data = cp.read_data(size=100)
-        print 'there are ' + str(len(self.data))
-        #if len(self.data) == 0:
-        #    tg.create_fake_trips()
-        #    self.data = cp.read_data(size=100)
 
     def setUp(self):
-        pass
+        self.data = cp.read_data()
+        self.testUUID = uuid.uuid4()
+        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
+        print 'there are ' + str(len(self.data))
 
     def tearDown(self):
-        pass
+        edb.get_timeseries_db().remove({'user_id': self.testUUID})
+        edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})
 
     def testCalculatePoints(self):
         feat = featurization.featurization([])
         self.assertTrue(not feat.data)
         feat = featurization.featurization(None)
         self.assertTrue(not feat.data)
-        trip = Trip(None, None, None, None, None, None, None, None)
+        trip = etatc._createTripEntry(self, None, None, None, None)
         data = [trip]
         try:
             feat = featurization.featurization(data)
@@ -64,17 +65,16 @@ def testCluster(self):
             self.assertTrue(False)
 
         data = []
-        start = Coordinate(47,-122)
-        end = Coordinate(47,-123)
+        start = [-122, 47]
+        end = [-123,47]
+        now = time.time()
         for i in range(10):
-            now = datetime.datetime.now()
-            a = Trip(None, None, None, None, now, now, start, end)
+            a = etatc._createTripEntry(self, now, now, start, end)
             data.append(a)
-        start = Coordinate(41,-74)
-        end = Coordinate(42, -74)
+        start = [-74, 41]
+        end = [-74, 42]
         for i in range(10):
-            now = datetime.datetime.now()
-            a = Trip(None, None, None, None, now, now, start, end)
+            a = etatc._createTripEntry(self, now, now, start, end)
             data.append(a)
         feat = featurization.featurization(data)
         feat.cluster()
@@ -87,8 +87,10 @@ def testCheckClusters(self):
         feat.cluster(min_clusters=2, max_clusters=10)
         try:
             feat.check_clusters()
-        except Exception:
+        except Exception, e:
+            logging.exception(e.message)
             self.assertTrue(False)
 
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
     unittest.main()
diff --git a/emission/tests/analysisTests/TestRepresentatives.py b/emission/tests/analysisTests/TestRepresentatives.py
index 905b9b028..0c0e463e4 100644
--- a/emission/tests/analysisTests/TestRepresentatives.py
+++ b/emission/tests/analysisTests/TestRepresentatives.py
@@ -1,25 +1,24 @@
 import unittest
-import emission.core.get_database as edb
+import time
+import logging
+
 import emission.analysis.modelling.tour_model.representatives as rep
-import emission.simulation.trip_gen as tg
 import emission.analysis.modelling.tour_model.featurization as feat
-import emission.analysis.modelling.tour_model.cluster_pipeline as cp
-from emission.core.wrapper.trip_old import Trip, Coordinate
+
+import emission.tests.analysisTests.tourModelTests.common as etatc
 
 class RepresentativesTests(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
         super(RepresentativesTests, self).__init__(*args, **kwargs)
-        self.data = cp.read_data(size=100)
-        #if len(self.data) == 0:
-        #    tg.create_fake_trips()
-        #    self.data = cp.read_data(size=100)
-        print 'there are ' + str(len(self.data))
+
+    def setUp(self):
+        etatc._setup(self)
         n = len(self.data)/5
-        self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n)        
+        self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n)
 
     def tearDown(self):
-        pass
+        etatc._tearDown(self)
 
     def testInit(self):
         repy = rep.representatives(None, None)
@@ -55,18 +54,20 @@ def testReps(self):
         repy.get_reps()
         self.assertTrue(len(repy.reps) == len(repy.clusters))
         clusters = [0]
-        tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(3,4))
-        tripb = Trip(None, None, None, None, None, None, Coordinate(9,10), Coordinate(5,8))
-        tripc = Trip(None, None, None, None, None, None, Coordinate(5,6), Coordinate(4,6))
+        now = time.time()
+        tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4])
+        tripb = etatc._createTripEntry(self, now, now, [9,10], [5,8])
+        tripc = etatc._createTripEntry(self, now, now, [5,6], [4,6])
         data = [tripa, tripb, tripc]
         labels = [0,0,0]
         repy = rep.representatives(data, labels)
         repy.list_clusters()
         repy.get_reps()
-        self.assertTrue(repy.reps[0].trip_start_location.lat == 5)
-        self.assertTrue(repy.reps[0].trip_start_location.lon == 6)
-        self.assertTrue(repy.reps[0].trip_end_location.lat == 4)
-        self.assertTrue(repy.reps[0].trip_end_location.lon == 6)
+        logging.debug("repy.reps[0].data.start_loc = %s" % repy.reps[0].data.start_loc)
+        self.assertEqual(repy.reps[0].data.start_loc.coordinates[0], 5)
+        self.assertEqual(repy.reps[0].data.start_loc.coordinates[1], 6)
+        self.assertEqual(repy.reps[0].data.end_loc.coordinates[0], 4)
+        self.assertEqual(repy.reps[0].data.end_loc.coordinates[1], 6)
 
     def testLocations(self):
         repy = rep.representatives(self.data, self.labels)
@@ -78,16 +79,17 @@ def testLocations(self):
             for i in range(len(bin)):
                 b = bin[i]
                 if b[0] == 'start':
-                    a = repy.reps[b[1]].trip_start_location
+                    a = repy.reps[b[1]].data.start_loc
                 if b[0] == 'end':
-                    a = repy.reps[b[1]].trip_end_location
+                    a = repy.reps[b[1]].data.end_loc
                 for j in range(i):
                     c = bin[j]
                     if c[0] == 'start':
-                        d = repy.reps[c[1]].trip_start_location
+                        d = repy.reps[c[1]].data.start_loc
                     if c[0] == 'end':
-                        d = repy.reps[c[1]].trip_end_location
-                    self.assertTrue(repy.distance(a.lat, a.lon, d.lat, d.lon) < 300)
+                        d = repy.reps[c[1]].data.end_loc
+                    self.assertTrue(repy.distance(a.coordinates[1], a.coordinates[0],
+                                                  d.coordinates[1], d.coordinates[0]) < 300)
             total += len(bin)
         self.assertTrue(total == 2 * repy.num_clusters)
         for i in range(repy.num_clusters):
@@ -95,24 +97,25 @@ def testLocations(self):
             self.assertTrue(sum(bin.count(('end',i)) for bin in repy.bins) == 1)
         self.assertTrue(len(repy.locs) == len(repy.bins))
 
-        tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(30,40))
-        tripb = Trip(None, None, None, None, None, None, Coordinate(1.0000002,2.0000002), Coordinate(55.0000002,85.0000002))
-        tripc = Trip(None, None, None, None, None, None, Coordinate(30.0000002,40.0000002), Coordinate(55,85))
+        now = time.time()
+        tripa = etatc._createTripEntry(self, now, now, [1,2], [30,40])
+        tripb = etatc._createTripEntry(self, now, now, [1.0000002,2.0000002], [55.0000002,85.0000002])
+        tripc = etatc._createTripEntry(self, now, now, [30.0000002,40.0000002], [55,85])
         data = [tripa, tripb, tripc]
         labels = [0,1,2]
         repy = rep.representatives(data, labels)
         repy.list_clusters()
         repy.get_reps()
         repy.locations()
-        self.assertTrue(repy.bins[0] == [('start', 0), ('start', 1)])
-        self.assertTrue(repy.bins[1] == [('end', 0), ('start', 2)])
-        self.assertTrue(repy.bins[2] == [('end', 1), ('end', 2)])
-        self.assertTrue(round(repy.locs[0].lat,7) == 1.0000001)
-        self.assertTrue(round(repy.locs[0].lon,7) == 2.0000001)
-        self.assertTrue(round(repy.locs[1].lat,7) == 30.0000001)
-        self.assertTrue(round(repy.locs[1].lon,7) == 40.0000001)
-        self.assertTrue(round(repy.locs[2].lat,7) == 55.0000001)
-        self.assertTrue(round(repy.locs[2].lon,7) == 85.0000001)
+        self.assertEqual(repy.bins[0], [('start', 0), ('start', 1)])
+        self.assertEqual(repy.bins[1], [('end', 0), ('start', 2)])
+        self.assertEqual(repy.bins[2], [('end', 1), ('end', 2)])
+        self.assertAlmostEqual(repy.locs[0][0], 1.0000001, places=7)
+        self.assertAlmostEqual(repy.locs[0][1], 2.0000001, places=7)
+        self.assertAlmostEqual(repy.locs[1][0], 30.0000001, places=7)
+        self.assertAlmostEqual(repy.locs[1][1], 40.0000001, places=7)
+        self.assertAlmostEqual(repy.locs[2][0], 55.0000001, places=7)
+        self.assertAlmostEqual(repy.locs[2][1], 85.0000001, places=7)
 
     def testClusterDict(self):
         repy = rep.representatives(self.data, self.labels)
@@ -127,19 +130,20 @@ def testClusterDict(self):
             self.assertTrue(('start', i) in repy.bins[cluster['start']])
             self.assertTrue(('end', i) in repy.bins[cluster['end']])
             for d in repy.clusters[i]:
-                tripid = d.trip_id
-                tripy = next((x for x in cluster['sections'] if x.trip_id == tripid), None)
+                tripid = d.get_id()
+                tripy = next((x for x in cluster['sections'] if x.get_id() == tripid), None)
                 self.assertTrue(tripy)
-                self.assertTrue(sum(sum(t.trip_id == tripid for t in cluster['sections']) for cluster in repy.self_loops_tour_dict) == 1)
+                self.assertTrue(sum(sum(t.get_id() == tripid for t in cluster['sections']) for cluster in repy.self_loops_tour_dict) == 1)
         
         for c in repy.tour_dict:
             self.assertTrue(c['start'] != c['end'])
 
 
     def testMatch(self):
-        tripa = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(3,4))
-        tripb = Trip(None, None, None, None, None, None, Coordinate(3,4), Coordinate(1,2))
-        tripc = Trip(None, None, None, None, None, None, Coordinate(1,2), Coordinate(9,10))
+        now = time.time()
+        tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4])
+        tripb = etatc._createTripEntry(self, now, now, [3,4], [1,2])
+        tripc = etatc._createTripEntry(self, now, now, [1,2], [9,10])
 
         data = [tripa, tripb, tripc]
         labels = [0,1,2]
@@ -155,4 +159,5 @@ def testMatch(self):
         self.assertTrue(not repy.match('end', 2, bin))
 
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
     unittest.main()
diff --git a/emission/tests/analysisTests/TestSimilarity.py b/emission/tests/analysisTests/TestSimilarity.py
index 4c7400a29..5a1002029 100644
--- a/emission/tests/analysisTests/TestSimilarity.py
+++ b/emission/tests/analysisTests/TestSimilarity.py
@@ -1,29 +1,36 @@
+import logging
 import unittest
+import uuid
+import time
+import datetime 
+import os, os.path
+
+import emission.tests.analysisTests.tourModelTests.common as etatc
+
 import emission.core.get_database as edb
+
 import emission.analysis.modelling.tour_model.similarity as similarity
-import emission.simulation.trip_gen as tg
-import math
-from emission.core.wrapper.trip_old import Trip, Coordinate
 import emission.analysis.modelling.tour_model.cluster_pipeline as cp
-import datetime 
-import os, os.path
+
+import emission.storage.timeseries.abstract_timeseries as esta
 
 class SimilarityTests(unittest.TestCase):
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self,  *args, **kwargs):
         super(SimilarityTests, self).__init__(*args, **kwargs)
-        self.data = cp.read_data(size=100)
+
+    def setUp(self):
+        self.testUUID = uuid.uuid4()
+        self.data = cp.read_data()
         #if len(self.data) == 0:
         #    tg.create_fake_trips()
         #    self.data = cp.read_data(size=100)
-        print 'there are ' + str(len(self.data))
-
-    def setUp(self):
-        pass
+        logging.info("Found %s trips" % len(self.data))
+        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
 
     def tearDown(self):
-        return
-
+        edb.get_timeseries_db().remove({'user_id': self.testUUID})
+        edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})
 
     def testInit(self):
         try:
@@ -33,15 +40,18 @@ def testInit(self):
         except Exception:
             self.assertTrue(False)
 
+        logging.debug("STARTING init test")
         sim = similarity.similarity([], 100)
         self.assertTrue(len(sim.data) == 0)
-        now = datetime.datetime.now()
-        start = Coordinate(47,-122)
-        end = Coordinate(47,-123)
-        t1 = Trip(None, None, None, None, now, now, start, start)
-        t2 = Trip(None, None, None, None, now, now, start, end)
+        now = time.time()
+        start = [-122,47]
+        end = [-123,47]
+        t1 = etatc._createTripEntry(self, now, now, start, start)
+        t2 = etatc._createTripEntry(self, now, now, start, end)
         sim = similarity.similarity([t1, t2], 100)
+        logging.debug("sim.data = %s" % sim.data)
         simmy = similarity.similarity([t2], 100)
+        logging.debug("simmy.data = %s" % simmy.data)
         self.assertTrue(sim.data == simmy.data)
 
     def testBinData(self):
@@ -62,17 +72,15 @@ def testBinData(self):
             self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1]))
 
         data = []
-        now = datetime.datetime.now()
-        start = Coordinate(47,-122)
-        end = Coordinate(47,-123)
+        now = time.time()
+        start = [-122, 47]
+        end = [-123, 47]
         for i in range(10):
-            a = Trip(None, None, None, None, now, now, start, end)
-            data.append(a)
-        start = Coordinate(41,-74)
-        end = Coordinate(42, -74)
+            data.append(etatc._createTripEntry(self, now, now, start, end))
+        start = [-74, 41]
+        end = [-74, 42]
         for i in range(10):
-            a = Trip(None, None, None, None, now, now, start, end)
-            data.append(a)
+            data.append(etatc._createTripEntry(self, now, now, start, end))
         sim = similarity.similarity(data, 300)
         sim.bin_data()
         self.assertTrue(len(sim.bins) == 2)
@@ -86,10 +94,10 @@ def testDeleteBins(self):
             self.assertTrue(b == sim.num)
 
     def testElbowDistance(self):
-        start = Coordinate(47,-122)
-        end = Coordinate(47,-123)
-        now = datetime.datetime.now()
-        t = Trip(None, None, None, None, now, now, start, end)
+        start = [-122,47]
+        end = [-133,47]
+        now = time.time()
+        t = etatc._createTripEntry(self, now, now, start, end)
         data = [t] * 11
         bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]]
         sim = similarity.similarity(data, 300)
@@ -106,15 +114,15 @@ def testMatch(self):
                     self.assertTrue(sim.distance_helper(b,c))
 
     def testDistance(self):
-        start = Coordinate(-122.259447, 37.875174)
-        end1 = Coordinate(-122.259279, 37.875479)
-        end2 = Coordinate(-122.252287, 37.869569)
-        now = datetime.datetime.now()
-        t1 = Trip(None, None, None, None, now, now, start, end1)
-        t2 = Trip(None, None, None, None, now, now, start, end2)
+        start = [-122.259447, 37.875174]
+        end1 = [-122.259279, 37.875479]
+        end2 = [-122.252287, 37.869569]
+        now = time.time()
+        t1 = etatc._createTripEntry(self, now, now, start, end1)
+        t2 = etatc._createTripEntry(self, now, now, start, end2)
         sim = similarity.similarity(self.data, 300)
-        self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon))
-        self.assertTrue(not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
+        self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0]))
+        self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
 
     def testGraph(self):
         if os.path.isfile('./histogram.png'):
@@ -136,12 +144,11 @@ def testEvaluateBins(self):
         a = sim.evaluate_bins()
         self.assertTrue(not a)
         sim = similarity.similarity(self.data, 300)
-        b = sim.evaluate_bins()
-        self.assertTrue(not b)
         sim.bin_data()
         c = sim.evaluate_bins()
         if sim.data:
             self.assertTrue(c)
 
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
     unittest.main()
diff --git a/emission/tests/analysisTests/tourModelTests/__init__.py b/emission/tests/analysisTests/tourModelTests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/emission/tests/analysisTests/tourModelTests/common.py b/emission/tests/analysisTests/tourModelTests/common.py
new file mode 100644
index 000000000..47aabfae8
--- /dev/null
+++ b/emission/tests/analysisTests/tourModelTests/common.py
@@ -0,0 +1,46 @@
+import logging
+import geojson as gj
+import uuid
+
+import emission.core.wrapper.cleanedtrip as ecwct
+import emission.core.wrapper.entry as ecwe
+import emission.core.wrapper.cleanedplace as ecwcp
+import emission.core.get_database as edb
+
+import emission.analysis.modelling.tour_model.cluster_pipeline as cp
+import emission.storage.timeseries.abstract_timeseries as esta
+
+def _createTripEntry(self, start_ts, end_ts, start_loc, end_loc):
+    t = ecwct.Cleanedtrip()
+    t.start_ts = start_ts
+    t.end_ts = end_ts
+    t.start_loc = gj.Point(start_loc)
+    t.end_loc = gj.Point(end_loc)
+    sp = ecwcp.Cleanedplace()
+    sp.location = t.start_loc
+    sp.exit_ts = start_ts
+    ep = ecwcp.Cleanedplace()
+    ep.location = t.end_loc
+    ep.enter_ts = end_ts
+    spe = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_place", sp, create_id=True)
+    epe = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_place", ep, create_id=True)
+    t.start_place = spe.get_id()
+    t.end_place = epe.get_id()
+    te = ecwe.Entry.create_entry(self.testUUID, "analysis/cleaned_trip", t, create_id=True)
+    self.ts.insert(spe)
+    self.ts.insert(epe)
+    self.ts.insert(te)
+    return te
+
+def _setup(self):
+    self.data = cp.read_data()
+    #if len(self.data) == 0:
+    #    tg.create_fake_trips()
+    #    self.data = cp.read_data(size=100)
+    print 'there are ' + str(len(self.data))
+    self.testUUID = uuid.uuid4()
+    self.ts = esta.TimeSeries.get_time_series(self.testUUID)
+
+def _tearDown(self):
+    edb.get_timeseries_db().remove({'user_id': self.testUUID})
+    edb.get_analysis_timeseries_db().remove({'user_id': self.testUUID})

From ff0c3bc4221b157a124711d144f01397bc098fc2 Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 01:11:37 -0700
Subject: [PATCH 6/8] More fallout from the old -> new change

The coordinates stored are now a direct array and do not need to be converted
from a coordinate list
---
 .../storage/decorations/common_place_queries.py  | 16 +++-------------
 .../storage/decorations/common_trip_queries.py   |  4 ++--
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/emission/storage/decorations/common_place_queries.py b/emission/storage/decorations/common_place_queries.py
index b0798633c..95736e109 100644
--- a/emission/storage/decorations/common_place_queries.py
+++ b/emission/storage/decorations/common_place_queries.py
@@ -42,17 +42,6 @@ def clear_existing_places(user_id):
     db = edb.get_common_place_db()
     db.remove({'user_id': user_id})
 
-def get_all_place_objs(common_place):
-    trip.trips = [unc_trip.get_id() for unc_trip in dct["sections"]]
-    place_db = edb.get_place_db()
-    start_places = []
-    end_places = []
-    for t in trip.trips:
-        start = place_db.find_one({"_id" : t.start_place})
-        end = place_db.find_one({"_id" : t.end_place})
-        start_places.append(start)
-        end_places.append(end)
-
 ################################################################################
 
 def create_places(list_of_cluster_data, user_id):
@@ -60,10 +49,11 @@ def create_places(list_of_cluster_data, user_id):
     places_dct = {}
     logging.debug("About to create places for %d clusters" % len(list_of_cluster_data))
     for dct in list_of_cluster_data:
+        logging.debug("Current coords = %s" % dct)
         start_name = dct['start']
         end_name = dct['end']
-        start_loc = gj.Point(dct['start_coords'].coordinate_list())
-        end_loc = gj.Point(dct['end_coords'].coordinate_list())
+        start_loc = gj.Point(dct['start_coords'])
+        end_loc = gj.Point(dct['end_coords'])
         start_loc_str = gj.dumps(start_loc, sort_keys=True)
         end_loc_str = gj.dumps(end_loc, sort_keys=True)
         if start_loc_str not in places_to_successors:
diff --git a/emission/storage/decorations/common_trip_queries.py b/emission/storage/decorations/common_trip_queries.py
index 32eb3f56a..6b5d08ec0 100644
--- a/emission/storage/decorations/common_trip_queries.py
+++ b/emission/storage/decorations/common_trip_queries.py
@@ -103,8 +103,8 @@ def set_up_trips(list_of_cluster_data, user_id):
     for dct in list_of_cluster_data:
         start_times = []
         durations = []
-        start_loc = gj.Point(dct['start_coords'].coordinate_list())
-        end_loc = gj.Point(dct['end_coords'].coordinate_list())
+        start_loc = gj.Point(dct['start_coords'])
+        end_loc = gj.Point(dct['end_coords'])
         start_place_id = esdcpq.get_common_place_at_location(start_loc).get_id()
         end_place_id = esdcpq.get_common_place_at_location(end_loc).get_id()
         #print 'dct["sections"].trip_id %s is' % dct["sections"][0]

From b1eed9e62c7df1f28c9a8c397a366041d46cd1fa Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Tue, 28 Jun 2016 13:59:49 -0700
Subject: [PATCH 7/8] Replace other references to googlemaps with gmaps

Thanks to @sunil07t for the catch!
---
 emission/user_model_josh/utility_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/emission/user_model_josh/utility_model.py b/emission/user_model_josh/utility_model.py
index 7c09fd95c..685c7ffc1 100644
--- a/emission/user_model_josh/utility_model.py
+++ b/emission/user_model_josh/utility_model.py
@@ -504,9 +504,9 @@ def get_elevation_change(trip, testing=False):
         down = random.randint(1, 100)
         return (up, down)
     time.sleep(1) # so we dont run out calls
-    c = googlemaps.client.Client(GOOGLE_MAPS_KEY)
+    c = gmaps.client.Client(GOOGLE_MAPS_KEY)
     print get_route(trip)
-    jsn = googlemaps.elevation.elevation_along_path(c, get_route(trip), 200)
+    jsn = gmaps.elevation.elevation_along_path(c, get_route(trip), 200)
     up, down = 0, 0
     prev = None
     for item in jsn:

From 9ba9511b5a7f1b53c842b468f95409081a619a6c Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Wed, 29 Jun 2016 23:18:19 -0700
Subject: [PATCH 8/8] Fixed typo introduced while modifying tests

Thanks to @sunil07t for the catch!
---
 emission/tests/analysisTests/TestSimilarity.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emission/tests/analysisTests/TestSimilarity.py b/emission/tests/analysisTests/TestSimilarity.py
index 5a1002029..37c42bbac 100644
--- a/emission/tests/analysisTests/TestSimilarity.py
+++ b/emission/tests/analysisTests/TestSimilarity.py
@@ -95,7 +95,7 @@ def testDeleteBins(self):
 
     def testElbowDistance(self):
         start = [-122,47]
-        end = [-133,47]
+        end = [-123,47]
         now = time.time()
         t = etatc._createTripEntry(self, now, now, start, end)
         data = [t] * 11