From 4fd1b0e28a298b229faaef6e2be24dddecd75755 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 19 Jul 2023 09:08:10 +0100 Subject: [PATCH] utils.py: get_file_type_for_flatten_tool: consider content type too Needed for https://github.com/openownership/cove-bods/issues/101 --- CHANGELOG.md | 7 +++++++ libcoveweb2/settings.py | 21 +++++++++++++++------ libcoveweb2/utils.py | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 633d75d..baadf98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +When upgrading to this version, `ALLOWED_UNKNOWN_CONTENT_TYPES` must be set in the Django settings file, ideally from the settings file included with this library. + +## Added + +- utils.py: get_file_type_for_flatten_tool: consider content type too +- settings.ALLOWED_UNKNOWN_CONTENT_TYPES. + ## Fixed - utils.py: get_file_type_for_flatten_tool: include an error message in raise at end https://github.com/OpenDataServices/lib-cove-web-2/issues/3 diff --git a/libcoveweb2/settings.py b/libcoveweb2/settings.py index 2145264..8938158 100644 --- a/libcoveweb2/settings.py +++ b/libcoveweb2/settings.py @@ -173,21 +173,29 @@ }, } -ALLOWED_JSON_CONTENT_TYPES = ["application/json", "application/octet-stream"] +# Sometimes uploads happen with a generic content type. +# In this case, we can't rely on content type to detect type. +# But the type is still allowed, so it's added to +# ALLOWED_*_CONTENT_TYPES when they are defined. +ALLOWED_UNKNOWN_CONTENT_TYPES = ["application/octet-stream"] + +# JSON details +ALLOWED_JSON_CONTENT_TYPES = ["application/json"] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_JSON_EXTENSIONS = [".json"] +# Excel details ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES = [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "application/octet-stream", -] +] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS = [".xlsx"] +# Open Document details ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES = [ "application/vnd.oasis.opendocument.spreadsheet", - "application/octet-stream", -] +] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS = [".ods"] +# Spreadsheet details (sum of details above) ALLOWED_SPREADSHEET_CONTENT_TYPES = ( ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES + ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES @@ -196,5 +204,6 @@ ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS + ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS ) -ALLOWED_CSV_CONTENT_TYPES = ["text/csv", "application/octet-stream"] +# CSV Details +ALLOWED_CSV_CONTENT_TYPES = ["text/csv"] + ALLOWED_UNKNOWN_CONTENT_TYPES ALLOWED_CSV_EXTENSIONS = [".csv"] diff --git a/libcoveweb2/utils.py b/libcoveweb2/utils.py index 1379b02..efe6e53 100644 --- a/libcoveweb2/utils.py +++ b/libcoveweb2/utils.py @@ -29,6 +29,26 @@ def get_file_type_for_flatten_tool(supplied_data_file: SuppliedDataFile): for extension in settings.ALLOWED_CSV_EXTENSIONS: if supplied_data_file.filename.lower().endswith(extension): return "csv" + # Check the content type + if ( + supplied_data_file.content_type + and supplied_data_file.content_type + not in settings.ALLOWED_UNKNOWN_CONTENT_TYPES + ): + if supplied_data_file.content_type in settings.ALLOWED_JSON_CONTENT_TYPES: + return "json" + if ( + supplied_data_file.content_type + in settings.ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES + ): + return "xlsx" + if ( + supplied_data_file.content_type + in settings.ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES + ): + return "ods" + if supplied_data_file.content_type in settings.ALLOWED_CSV_CONTENT_TYPES: + return "csv" # Try and load the first bit of the file to see if it's JSON? try: with open(supplied_data_file.upload_dir_and_filename(), "rb") as fp: