diff --git a/backend/gn_module_import/conf_schema_toml.py b/backend/gn_module_import/conf_schema_toml.py index 37e3988c..d95e4d99 100644 --- a/backend/gn_module_import/conf_schema_toml.py +++ b/backend/gn_module_import/conf_schema_toml.py @@ -156,3 +156,4 @@ class GnModuleSchemaConf(Schema): ID_LIST_TAXA_RESTRICTION = fields.Integer(load_default=None) MODULE_URL = fields.String(load_default="/import") DATAFRAME_BATCH_SIZE = fields.Integer(load_default=10000) + INSERT_BATCH_SIZE = fields.Integer(load_default=1000) diff --git a/backend/gn_module_import/tasks.py b/backend/gn_module_import/tasks.py index b4ef6d40..fa4c5789 100644 --- a/backend/gn_module_import/tasks.py +++ b/backend/gn_module_import/tasks.py @@ -168,7 +168,8 @@ def do_import_in_synthese(self, import_id): entity_source_pk_field=entity_source_pk_field.synthese_field, module=TModules.query.filter_by(module_code="IMPORT").one(), ) - import_data_to_synthese(imprt) + for progress in import_data_to_synthese(imprt): + self.update_state(state="PROGRESS", meta={"progress": progress}) ImportSyntheseData.query.filter_by(imprt=imprt).delete() imprt = TImports.query.with_for_update(of=TImports).get(import_id) if imprt is None or imprt.task_id != self.request.id: diff --git a/backend/gn_module_import/utils.py b/backend/gn_module_import/utils.py index 0cd76a5a..82694f65 100644 --- a/backend/gn_module_import/utils.py +++ b/backend/gn_module_import/utils.py @@ -5,6 +5,7 @@ import json from enum import IntEnum from datetime import datetime, timedelta +from math import ceil from flask import current_app, render_template from sqlalchemy import func @@ -268,6 +269,10 @@ def update_import_data_from_dataframe(imprt, fields, df): def import_data_to_synthese(imprt): + """ + Import prepared data in synthese. + Operate on batches, and yeld progress between each batch. + """ generated_fields = { "datetime_min", "datetime_max", @@ -302,11 +307,20 @@ def import_data_to_synthese(imprt): "id_dataset", "last_action", ] - insert_stmt = insert(Synthese).from_select( - names=names, - select=select_stmt, - ) - db.session.execute(insert_stmt) + batch_size = current_app.config["IMPORT"]["INSERT_BATCH_SIZE"] + batch_count = ceil(imprt.source_count / batch_size) + for batch in range(batch_count): + min_line_no = batch * batch_size + max_line_no = (batch + 1) * batch_size + insert_stmt = insert(Synthese).from_select( + names=names, + select=select_stmt.filter( + ImportSyntheseData.line_no >= min_line_no, + ImportSyntheseData.line_no < max_line_no, + ), + ) + db.session.execute(insert_stmt) + yield (batch + 1) / batch_count def generate_pdf_from_template(template, data): diff --git a/frontend/app/components/import_process/import-step/import-step.component.html b/frontend/app/components/import_process/import-step/import-step.component.html index d6eba4a8..cb0418ab 100644 --- a/frontend/app/components/import_process/import-step/import-step.component.html +++ b/frontend/app/components/import_process/import-step/import-step.component.html @@ -117,7 +117,7 @@
Import des données en cours
-