Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds timezone support #113

Merged
merged 10 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
needs: qa

runs-on: ubuntu-latest

services:
postgres:
image: timescale/timescaledb-ha:pg14-latest
Expand All @@ -30,10 +30,10 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel
Expand Down
6 changes: 5 additions & 1 deletion djangomain/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"drf_yasg",
"management",
"crispy_forms",
"crispy_bootstrap4",
]

MIDDLEWARE = [
Expand Down Expand Up @@ -157,7 +158,7 @@
TIME_ZONE = "UTC"
USE_I18N = True
USE_L10N = True
USE_TZ = False
USE_TZ = True

# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.0/howto/static-files/
Expand Down Expand Up @@ -242,3 +243,6 @@
}

DEFAULT_AUTO_FIELD = "django.db.models.AutoField"

CRISPY_ALLOWED_TEMPLATE_PACKS = "bootstrap4"
CRISPY_TEMPLATE_PACK = "bootstrap4"
59 changes: 39 additions & 20 deletions importing/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import shutil
import time
import zoneinfo
from datetime import datetime
from logging import getLogger
from numbers import Number
Expand Down Expand Up @@ -93,19 +94,21 @@ def get_last_uploaded_date(station_id, var_code):
return datetime


def preformat_matrix(source_file, file_format):
def preformat_matrix(source_file, file_format, timezone: str):
"""
First step for importing data. Works out what sort of file is being read and adds
standardised columns for date and datetime (str). This is used in construct_matrix.
Args:
source_file: path to raw data file.
file_format: formatting.models.Format object.
timezone: Timezone name, eg. 'America/Chicago'.
Returns:
Pandas.DataFrame with raw data read and extra column(s) for date and datetime
(Str), which should be parsed correctly here.
"""
firstline = file_format.first_row if file_format.first_row else 0
skipfooter = file_format.footer_rows if file_format.footer_rows else 0
tz = zoneinfo.ZoneInfo(timezone)

if file_format.extension.value in ["xlsx", "xlx"]:
# If in Excel format
Expand Down Expand Up @@ -152,7 +155,6 @@ def preformat_matrix(source_file, file_format):
skipfooter=skipfooter,
engine=engine,
encoding="ISO-8859-1",
error_bad_lines=False,
)
else:
file = pd.read_csv(
Expand All @@ -164,14 +166,13 @@ def preformat_matrix(source_file, file_format):
skipfooter=skipfooter,
engine=engine,
encoding="ISO-8859-1",
error_bad_lines=False,
)

datetime_format = file_format.date.code + " " + file_format.time.code
if file_format.date_column == file_format.time_column:
file["date"] = pd.Series(
[
standardise_datetime(row, datetime_format)
standardise_datetime(row, datetime_format).replace(tzinfo=tz)
for row in file[file_format.date_column - 1].values
],
index=file.index,
Expand All @@ -198,7 +199,7 @@ def preformat_matrix(source_file, file_format):
)
file["date"] = pd.Series(
[
standardise_datetime(row, datetime_format)
standardise_datetime(row, datetime_format).replace(tzinfo=tz)
for row in file["datetime_str"].values
],
index=file.index,
Expand All @@ -208,7 +209,7 @@ def preformat_matrix(source_file, file_format):
return file.reset_index(drop=True)


def standardise_datetime(date_time, datetime_format):
def standardise_datetime(date_time, datetime_format) -> datetime:
"""
Returns a datetime object in the case that date_time is not already in that form.
Args:
Expand All @@ -220,7 +221,9 @@ def standardise_datetime(date_time, datetime_format):
if isinstance(date_time, datetime):
return date_time
elif isinstance(date_time, np.datetime64):
date_time = datetime.utcfromtimestamp((date_time - unix_epoch) / one_second)
date_time = datetime.utcfromtimestamp(
float((date_time - unix_epoch) / one_second)
)
return date_time
elif isinstance(date_time, str):
pass
Expand Down Expand Up @@ -271,36 +274,52 @@ def save_temp_data_to_permanent(data_import_temp):
station_id=station.station_id,
).delete()

# The following is a hack to account for the different possible name of the
# fields that the models might have. Will be made "nicer" at some point.
# This should always work as a measurement model should always have one and only
# one of "value", "average", "sum" fields.
value_field = (
set([field.name for field in Model._meta.fields])
.intersection(["value", "average", "sum"])
.pop()
)

# Bulk add new data
# TODO improve this logic to cope with variables that might have max/min
# AND depth.
if "maximum" in table.columns:
model_instances = [
Model(
time=record["date"],
value=record["value"],
station_id=record["station_id"],
maximum=record["maximum"],
minimum=record["minimum"],
{
"time": record["date"],
value_field: record["value"],
"station_id": record["station_id"],
"maximum": record["maximum"],
"minimum": record["minimum"],
},
)
for record in records
]
elif "depth" in [f.name for f in Model._meta.fields]:
model_instances = [
Model(
time=record["date"],
value=record["value"],
depth=record["depth"],
station_id=record["station_id"],
{
"time": record["date"],
value_field: record["value"],
"depth": record["depth"],
"station_id": record["station_id"],
},
)
for record in records
]
else:
model_instances = [
Model(
time=record["date"],
value=record["value"],
station_id=record["station_id"],
{
"time": record["date"],
value_field: record["value"],
"station_id": record["station_id"],
},
)
for record in records
]
Expand All @@ -321,7 +340,7 @@ def construct_matrix(matrix_source, file_format, station):
"""

# Get the "preformatted matrix" sorted by date col
matrix = preformat_matrix(matrix_source, file_format)
matrix = preformat_matrix(matrix_source, file_format, station.timezone)
# Find start and end dates from top and bottom row
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]
Expand Down
6 changes: 5 additions & 1 deletion importing/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import os
import shutil
import urllib
from logging import getLogger

import pandas as pd
from django.contrib.auth.decorators import permission_required
from django.http import HttpResponse, JsonResponse
from rest_framework import generics
Expand Down Expand Up @@ -58,7 +60,9 @@ class DataImportTempCreate(generics.CreateAPIView):

def perform_create(self, serializer):
file = copy.deepcopy(self.request.FILES["file"])
matrix = preformat_matrix(file, serializer.validated_data["format"])
timezone = serializer.validated_data["station"].timezone
getLogger().warning(timezone)
matrix = preformat_matrix(file, serializer.validated_data["format"], timezone)
del file
# Set start and end date based on cleaned data from the file
serializer.validated_data["start_date"] = matrix.loc[0, "date"]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ psycopg2==2.9.9
pytz==2023.3.post1
PyYAML==6.0.1
uritemplate==4.1.1
crispy-bootstrap4==2023.1

## Legacy dependency versions
# asgiref==3.3.4
Expand Down
31 changes: 31 additions & 0 deletions station/migrations/0002_station_timezone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 4.2.7 on 2023-11-16 11:45

import datetime

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("station", "0001_initial"),
]

operations = [
migrations.AddField(
model_name="station",
name="timezone",
field=models.CharField(
choices=[
("London", "Europe/London"),
("Paris", "Europe/Paris"),
("New York", "America/New_York"),
],
default=datetime.datetime(
2023, 11, 16, 11, 45, 57, 843717, tzinfo=datetime.timezone.utc
),
max_length=100,
verbose_name="Timezone",
),
preserve_default=False,
),
]
5 changes: 5 additions & 0 deletions station/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
# creadoras, ya sea en uso total o parcial del código.
########################################################################################

import zoneinfo

from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.urls import reverse

TIMEZONES = tuple([(val, val) for val in sorted(zoneinfo.available_timezones())])

# Global variables used in Basin model
BASIN_IMAGE_PATH = "station/basin_image/"
BASIN_FILE_PATH = "station/basin_file/"
Expand Down Expand Up @@ -265,6 +269,7 @@ class Station(models.Model):
influence_km = models.DecimalField(
"Área of input (km)", max_digits=12, decimal_places=4, null=True, blank=True
)
timezone = models.CharField("Timezone", max_length=100, choices=TIMEZONES)

def __str__(self):
return str(self.station_code)
Expand Down
16 changes: 10 additions & 6 deletions tests/importing/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,19 @@ class TestMatrixFunctions(TestCase):

def setUp(self):
from formatting.models import Format
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data/iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

def test_preformat_matrix(self):
from importing.functions import preformat_matrix

df = preformat_matrix(self.data_file, self.file_format)
df = preformat_matrix(self.data_file, self.file_format, self.station.timezone)
self.assertEqual(df.shape, (263371, 5))

def test_construct_matrix(self):
Expand Down Expand Up @@ -85,15 +86,18 @@ def setUp(self):
from importing.functions import preformat_matrix
from importing.models import DataImportTemp
from measurement.models import Flow
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data" / "iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

matrix = preformat_matrix(self.data_file, self.file_format)
matrix = preformat_matrix(
self.data_file, self.file_format, self.station.timezone
)
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]

Expand All @@ -114,12 +118,12 @@ def setUp(self):
flow1 = Flow.objects.create(
station_id=8,
time=datetime(2014, 6, 28, 0, 35, 0, tzinfo=pytz.UTC),
value=3.4,
average=3.4,
)
flow2 = Flow.objects.create(
station_id=8,
time=datetime(2016, 3, 7, 18, 5, 0, tzinfo=pytz.UTC),
value=5.7,
average=5.7,
)

def test_get_last_uploaded_date(self):
Expand Down
7 changes: 5 additions & 2 deletions tests/importing/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,24 @@ class TestSaveImportModels(TestCase):

def setUp(self):
from formatting.models import Format
from station.models import Station
from station.models import TIMEZONES, Station

self.file_format = Format.objects.get(format_id=45)
self.data_file = str(
Path(__file__).parent.parent / "test_data/iMHEA_HMT_01_HI_01_raw.csv"
)
self.station = Station.objects.get(station_id=8)
self.station.timezone = TIMEZONES[0][0]

def test_save_import_temp(self):
from django.core.files.uploadedfile import SimpleUploadedFile

from importing.functions import preformat_matrix
from importing.models import DataImportTemp

matrix = preformat_matrix(self.data_file, self.file_format)
matrix = preformat_matrix(
self.data_file, self.file_format, self.station.timezone
)
start_date = matrix.loc[0, "date"]
end_date = matrix.loc[matrix.shape[0] - 1, "date"]

Expand Down
8 changes: 4 additions & 4 deletions tests/measurement/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ def setUp(self):
flow1 = Flow.objects.create(
station_id=1,
time=datetime(2015, 10, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=10.2,
average=10.2,
)
flow2 = Flow.objects.create(
station_id=1,
time=datetime(2016, 11, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=5.7,
average=5.7,
)
precip1 = Precipitation.objects.create(
station_id=2,
time=datetime(2017, 12, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=11.1,
sum=11.1,
)
precip2 = Precipitation.objects.create(
station_id=2,
time=datetime(2018, 1, 9, 23, 55, 59, tzinfo=pytz.UTC),
value=0.3,
sum=0.3,
)

def test_flow(self):
Expand Down
Loading
Loading