Skip to content

Commit

Permalink
🐛 Export dates correctly for datawrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
jh0ker committed Nov 8, 2023
1 parent fc929eb commit cf6a688
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
16 changes: 10 additions & 6 deletions ddj_cloud/scrapers/talsperren/talsperren.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ def run():
df_base = _get_base_dataset()

## For testing
#
# bio = to_parquet_bio(df_base, compression="gzip", index=False)
# bio.seek(0)
# upload_file(bio.read(), "talsperren/base.parquet.gzip")
#

bio = to_parquet_bio(df_base, compression="gzip", index=False)
bio.seek(0)
upload_file(bio.read(), "talsperren/base.parquet.gzip")

# df_base = pd.read_parquet("local_storage/talsperren/base.parquet.gzip", engine="fastparquet")

# Filter out reservoirs in ignore list
Expand All @@ -129,7 +129,11 @@ def run():
for exporter in exporters:
try:
df_export = exporter.run(df_base.copy())
upload_dataframe(df_export, f"talsperren/{exporter.filename}.csv")
upload_dataframe(
df_export,
f"talsperren/{exporter.filename}.csv",
datawrapper_datetimes=True,
)
except Exception as e:
print("Skipping exporter due to error:")
print(e)
Expand Down
16 changes: 16 additions & 0 deletions ddj_cloud/utils/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def upload_dataframe(
compare_fn: Callable[[bytes, bytes], bool] = simple_compare,
acl: Optional[str] = "public-read",
create_cloudfront_invalidation: bool = False,
datawrapper_datetimes: bool = False,
):
"""Upload a dataframe to storage.
Expand All @@ -313,6 +314,21 @@ def upload_dataframe(
acl (str, optional): ACL to use when uploading. Defaults to ``"public-read"``.
create_cloudfront_invalidation (bool, optional): Whether to create a CloudFront invalidation. Defaults to False.
"""

# Convert datetime for datawrapper (no timezone support madge)
if datawrapper_datetimes:
for col in df.columns:
# There's some different types of datetime columns,
# like datetime64[ns, Europe/Berlin] and datetime64[ns, UTC]
if not str(df[col].dtype).startswith("datetime64"):
continue

# Convert to Berlin timezone
df[col] = df[col].dt.tz_convert("Europe/Berlin")

# Convert to string
df[col] = df[col].dt.strftime("%Y-%m-%d %H:%M:%S")

# Convert to csv and encode to get bytes
write = df.to_csv(index=False).encode("utf-8")

Expand Down

0 comments on commit cf6a688

Please sign in to comment.