Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix indexes #849

Merged
merged 10 commits into from
May 27, 2022
Merged
38 changes: 25 additions & 13 deletions emission/core/get_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,20 @@
result_limit = config_data["timeseries"]["result_limit"]
config_file.close()

try:
parsed=pymongo.uri_parser.parse_uri(url)
except:
print("URL not formatted, defaulting to \"Stage_database\"")
db_name = "Stage_database"
else:
if parsed['database']:
db_name = parsed['database']
else:
print("URL does not specify a DB name, defaulting to \"Stage_database\"")
db_name = "Stage_database"

shankari marked this conversation as resolved.
Show resolved Hide resolved
print("Connecting to database URL "+url)
_current_db = MongoClient(url).Stage_database
_current_db = MongoClient(url)[db_name]
#config_file.close()

def _get_current_db():
Expand Down Expand Up @@ -158,12 +170,12 @@ def get_usercache_db():
def get_timeseries_db():
#current_db = MongoClient().Stage_database
TimeSeries = _get_current_db().Stage_timeseries
TimeSeries.create_index([("user_id", pymongo.HASHED)])
TimeSeries.create_index([("metadata.key", pymongo.HASHED)])
TimeSeries.create_index([("user_id", pymongo.ASCENDING)])
TimeSeries.create_index([("metadata.key", pymongo.ASCENDING)])
TimeSeries.create_index([("metadata.write_ts", pymongo.DESCENDING)])
TimeSeries.create_index([("data.ts", pymongo.DESCENDING)], sparse=True)

TimeSeries.create_index([("data.loc", pymongo.GEOSPHERE)], sparse=True)
TimeSeries.create_index([("data.loc", pymongo.GEOSPHERE)])

return TimeSeries

Expand All @@ -178,7 +190,7 @@ def get_analysis_timeseries_db():
"""
#current_db = MongoClient().Stage_database
AnalysisTimeSeries = _get_current_db().Stage_analysis_timeseries
AnalysisTimeSeries.create_index([("user_id", pymongo.HASHED)])
AnalysisTimeSeries.create_index([("user_id", pymongo.ASCENDING)])
_create_analysis_result_indices(AnalysisTimeSeries)
return AnalysisTimeSeries

Expand All @@ -187,18 +199,18 @@ def get_non_user_timeseries_db():
" Stores the data that is not associated with a particular user
"""
NonUserTimeSeries = _get_current_db().Stage_analysis_timeseries
NonUserTimeSeries.create_index([("user_id", pymongo.HASHED)])
NonUserTimeSeries.create_index([("user_id", pymongo.ASCENDING)])
_create_analysis_result_indices(NonUserTimeSeries)
return NonUserTimeSeries

def _create_analysis_result_indices(tscoll):
tscoll.create_index([("metadata.key", pymongo.HASHED)])
tscoll.create_index([("metadata.key", pymongo.ASCENDING)])

# trips and sections
tscoll.create_index([("data.start_ts", pymongo.DESCENDING)], sparse=True)
tscoll.create_index([("data.end_ts", pymongo.DESCENDING)], sparse=True)
tscoll.create_index([("data.start_loc", pymongo.GEOSPHERE)], sparse=True)
tscoll.create_index([("data.end_loc", pymongo.GEOSPHERE)], sparse=True)
tscoll.create_index([("data.start_loc", pymongo.GEOSPHERE)])
tscoll.create_index([("data.end_loc", pymongo.GEOSPHERE)])
_create_local_dt_indices(tscoll, "data.start_local_dt")
_create_local_dt_indices(tscoll, "data.end_local_dt")

Expand All @@ -207,14 +219,14 @@ def _create_analysis_result_indices(tscoll):
tscoll.create_index([("data.exit_ts", pymongo.DESCENDING)], sparse=True)
_create_local_dt_indices(tscoll, "data.enter_local_dt")
_create_local_dt_indices(tscoll, "data.exit_local_dt")
tscoll.create_index([("data.location", pymongo.GEOSPHERE)], sparse=True)
tscoll.create_index([("data.location", pymongo.GEOSPHERE)])
tscoll.create_index([("data.duration", pymongo.DESCENDING)], sparse=True)
tscoll.create_index([("data.mode", pymongo.HASHED)], sparse=True)
tscoll.create_index([("data.section", pymongo.HASHED)], sparse=True)
tscoll.create_index([("data.mode", pymongo.ASCENDING)], sparse=True)
tscoll.create_index([("data.section", pymongo.ASCENDING)], sparse=True)

# recreated location
tscoll.create_index([("data.ts", pymongo.DESCENDING)], sparse=True)
tscoll.create_index([("data.loc", pymongo.GEOSPHERE)], sparse=True)
tscoll.create_index([("data.loc", pymongo.GEOSPHERE)])
_create_local_dt_indices(tscoll, "data.local_dt") # recreated location
return tscoll

Expand Down
33 changes: 1 addition & 32 deletions emission/net/usercache/formatters/android/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,7 @@
def format(entry):
assert(entry.metadata.key == "background/location" or
entry.metadata.key == "background/filtered_location")
if ("mLatitude" in entry.data):
return format_location_raw(entry)
else:
return format_location_simple(entry)

# TODO: Remove the RAW code since we don't really have any clients that
# are sending it. But while it is there, we should still convert ms to sec
# since the old data did have that in place
def format_location_raw(entry):
formatted_entry = ad.AttrDict()
formatted_entry["_id"] = entry["_id"]
formatted_entry.user_id = entry.user_id

metadata = entry.metadata
metadata.time_zone = "America/Los_Angeles"
metadata.write_ts = old_div(float(entry.metadata.write_ts), 1000)
fc.expand_metadata_times(metadata)
formatted_entry.metadata = metadata

data = ad.AttrDict()
data.latitude = entry.data.mLatitude
data.longitude = entry.data.mLongitude
data.loc = geojson.Point((data.longitude, data.latitude))
data.ts = old_div(float(entry.data.mTime), 1000) # convert the ms from the phone to secs
fc.expand_data_times(data, metadata)
data.altitude = entry.data.mAltitude
data.accuracy = entry.data.mAccuracy
data.sensed_speed = entry.data.mSpeed
data.heading = entry.data.mBearing
formatted_entry.data = data

return formatted_entry
return format_location_simple(entry)

def format_location_simple(entry):
formatted_entry = ad.AttrDict()
Expand Down
2 changes: 1 addition & 1 deletion emission/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def setupRealExampleWithEntries(testObj):
def setupIncomingEntries():
with open("emission/tests/data/netTests/android.activity.txt") as aaef:
activity_entry = json.load(aaef)
with open("emission/tests/data/netTests/android.location.raw.txt") as alef:
with open("emission/tests/data/netTests/android.location.txt") as alef:
location_entry = json.load(alef)
with open("emission/tests/data/netTests/android.transition.txt") as atef:
transition_entry = json.load(atef)
Expand Down
37 changes: 0 additions & 37 deletions emission/tests/data/netTests/android.location.raw.txt

This file was deleted.

28 changes: 28 additions & 0 deletions emission/tests/data/netTests/android.location.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"_id": {
"$oid": "607f5e4d54cdb2d8bf073d6e"
},
"metadata": {
"key": "background/location",
"platform": "android",
"read_ts": 0,
"time_zone": "America/Denver",
"type": "sensor-data",
"write_ts": 1617919770.084
},
"user_id": {
"$uuid": "73c7bf7e25524a6db21162190d2322e2"
},
"data": {
"accuracy": 28.944,
"altitude": 1649.0229115940374,
"bearing": 98,
"elapsedRealtimeNanos": 9978889615940,
"filter": "time",
"fmt_time": "Apr 8, 2021 2:31:05 PM",
"latitude": 39.5974003,
"longitude": -104.9823262,
"sensed_speed": 6.62,
"ts": 1617913865
}
}
20 changes: 11 additions & 9 deletions emission/tests/netTests/TestFormatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,19 @@ def testConvertMotionActivity(self):
self.assertTrue(formatted_entry.data.fmt_time.startswith("2015-07-13T15:26:00.493"))

def testConvertLocation(self):
with open("emission/tests/data/netTests/android.location.raw.txt") as fp:
with open("emission/tests/data/netTests/android.location.txt") as fp:
entry = json.load(fp)
formatted_entry = enuf.convert_to_common_format(ad.AttrDict(entry))
self.assertEqual(formatted_entry.data.accuracy, 52.5)
self.assertEqual(formatted_entry.data.latitude, 37.3885529)
self.assertEqual(formatted_entry.data.longitude, -122.0879696)
self.assertEqual(formatted_entry.data.loc, geojson.Point((-122.0879696, 37.3885529)))
self.assertEqual(formatted_entry.data.ts, 1436826356.852)
self.assertTrue(formatted_entry.data.fmt_time.startswith("2015-07-13T15:25:56.852"))
self.assertEqual(formatted_entry.metadata.write_ts, 1436826357.115)
self.assertTrue(formatted_entry.metadata.write_fmt_time.startswith("2015-07-13T15:25:57.115"))
self.assertEqual(formatted_entry.data.accuracy, 28.944)
self.assertEqual(formatted_entry.data.latitude, 39.5974003)
self.assertEqual(formatted_entry.data.longitude, -104.9823262)
self.assertEqual(formatted_entry.data.loc, geojson.Point((-104.9823262, 39.5974003)))
self.assertEqual(formatted_entry.data.ts, 1617913865)
self.assertTrue(formatted_entry.data.fmt_time.startswith("2021-04-08T14:31:05"),
"Found formatted time %s" % formatted_entry.data.fmt_time)
self.assertEqual(formatted_entry.metadata.write_ts, 1617919770.084)
self.assertTrue(formatted_entry.metadata.write_fmt_time.startswith("2021-04-08T16:09:30.084"),
"Found formatted metadata time %s" % formatted_entry.metadata.write_fmt_time)

def testConvertTransition(self):
with open("emission/tests/data/netTests/android.transition.txt") as fp:
Expand Down