Skip to content

Commit

Permalink
Merge pull request #1045 from debrief/develop
Browse files Browse the repository at this point in the history
New release
  • Loading branch information
IanMayo authored Sep 29, 2021
2 parents 630df0d + 4ee66a0 commit 7b294aa
Show file tree
Hide file tree
Showing 62 changed files with 1,363 additions and 326 deletions.
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
line_length = 100
known_third_party =alembic,dateutil,flask,flask_cachebuster,flask_cors,freezegun,geoalchemy2,geopy,halo,iterfzf,loguru,pg8000,pint,prompt_toolkit,psycopg2,pyfiglet,pygments,pytest,setuptools,shapely,sqlalchemy,sqlalchemy_utils,tabulate,testing,tqdm,twisted,waitress
known_third_party =alembic,dateutil,flask,flask_cachebuster,flask_cors,freezegun,geoalchemy2,geopy,halo,iterfzf,loguru,pexpect,pg8000,pint,prompt_toolkit,psycopg2,pyfiglet,pygments,pytest,setuptools,shapely,sqlalchemy,sqlalchemy_utils,tabulate,testing,tqdm,twisted,waitress
7 changes: 6 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
History
=======

0.0.29 (2021-09-29)
-------------------

* Record extractions to database for future data audit
* Performance improvement in generating table summaries
* Performance improvement in opening some tables in Maintenance GUI

0.0.28 (2021-09-24)
-------------------

* Minor fixes for timeline (-100%, include_in_timeline, default_interval)
* Fix error for viewing Platform Entry in GUI
* Improve speed of generating table summaries (affects import performance)
*

0.0.27 (2021-09-16)
-------------------
Expand Down
2 changes: 1 addition & 1 deletion alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@ format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

[alembic:exclude]
tables = alembic_version,ElementaryGeometries,spatial_ref_sys,spatial_ref_sys_aux,spatialite_history,sql_statements_log,sqlite_sequence,SpatialIndex
tables = alembic_version,ElementaryGeometries,spatial_ref_sys,spatial_ref_sys_aux,spatialite_history,sql_statements_log,sqlite_sequence,SpatialIndex,KNN,data_licenses
21 changes: 15 additions & 6 deletions docs/importer_dev_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,14 @@ files showing which parts of the file have been used to extract each individual
part field in the created measurements, and tracks the extraction in the
database to help understand data provenance.

But this token highlighting does come with a performance cost. For high volume
file types that are tightly structured, with little room for misinterpretation,
the overhead may not be justified. In these circumstances, a call to
:code:`self.disable_recording()` in the :code:`__init__` method will turn off
the extraction highlighting for this importer, and significantly speed up the processing
of large files.
But this token highlighting and database recording does come with
a performance cost. For high volume file types that are tightly
structured, with little room for misinterpretation, the overhead
may not be justified. You can configure the level of extraction that will
take place by calling :code:`self.set_highlighting_level()` in the :code:`__init__` method.
Three different values can be passed to this function: :code:`HighlightLevel.NONE` will turn off all extraction
and highlighting, :code:`"HighlightLevel.HTML"` will record extractions to HTML but not to the database, and
:code:`"HighlightLevel.DATABASE"` will record to both a HTML file and the database. The default is :code:`"HighlightLevel.HTML"`.

Similarly, it may be justified to capture extraction data in the early stages of
developing/maintaining the parser for a new file format, with level of
Expand Down Expand Up @@ -220,6 +222,13 @@ one extraction can be recorded from disparate data in the file. For example:
combine_tokens(long_degrees_token, lat_degrees_token).record(
self.name, "location", state.location, "decimal degrees")
Once token extractions have been recorded using the :code:`record` method, the recorded information
must be linked to the appropriate measurement object (State/Contact/Comment etc) and prepared for saving
to the database. This can be done using the :code:`Datafile.flush_extracted_tokens` method, which should be called once
all the data has been loaded for a specific measurement object. Usually this will be at the end of the
:meth:`~pepys_import.file.importer._load_this_line` method, or at the end of a loop inside the
:meth:`~pepys_import.file.importer._load_this_file` method - but for complex importers it may be elsewhere.


Creating measurement objects
############################
Expand Down
2 changes: 2 additions & 0 deletions importers/aircraft_csv_format_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
state.course = course
course_token.record(self.name, "course", course)

datafile.flush_extracted_tokens()

@staticmethod
def parse_timestamp(date, time):
format_str = "%d/%m/%Y "
Expand Down
2 changes: 2 additions & 0 deletions importers/e_trac_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
state.speed = speed
speed_token.record(self.name, "speed", speed)

datafile.flush_extracted_tokens()

@staticmethod
def name_for(token):
# split into two
Expand Down
2 changes: 2 additions & 0 deletions importers/eag_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def _load_this_file(self, data_store, path, file_object, datafile, change_id):
state.heading = heading
heading_token.record(self.name, "heading", heading)

datafile.flush_extracted_tokens()

def get_previous_sunday(self, date_of_recording_str):
format_str = "%Y%m%d"
date_of_recording = datetime.datetime.strptime(date_of_recording_str, format_str)
Expand Down
2 changes: 2 additions & 0 deletions importers/gpx_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def _load_this_file(self, data_store, path, file_object, datafile, change_id):
if elevation_valid:
state.elevation = elevation

datafile.flush_extracted_tokens()

def get_child_and_text_if_exists(self, element, search_string):
child = element.find(search_string)
if child is not None:
Expand Down
5 changes: 5 additions & 0 deletions importers/nisida_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pepys_import.core.formats import unit_registry
from pepys_import.core.formats.location import Location
from pepys_import.core.validators import constants
from pepys_import.file.highlighter.level import HighlightLevel
from pepys_import.file.highlighter.support.combine import combine_tokens
from pepys_import.file.importer import CANCEL_IMPORT, Importer
from pepys_import.utils.sqlalchemy_utils import get_lowest_privacy
Expand Down Expand Up @@ -48,6 +49,8 @@ def __init__(self):
self.year = None
self.platform = None

self.set_highlighting_level(HighlightLevel.DATABASE)

def can_load_this_type(self, suffix):
return suffix.upper() == ".TXT"

Expand Down Expand Up @@ -129,6 +132,7 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
else:
self.last_entry_with_text.remarks = self.last_entry_with_text.remarks + text_to_add
line.record(self.name, "comment text", text_to_add)
datafile.flush_extracted_tokens()
elif len(line.text) > 7 and line.text[7] == "/" and line.text[0:5].isdigit():
# Check whether line starts with something like "311206Z/" (a timestamp and a slash)
# Checking like this is faster than using regular expressions on each line
Expand Down Expand Up @@ -183,6 +187,7 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
}
)
return
datafile.flush_extracted_tokens()
else:
# Not a line we recognise, so just skip to next one
return
Expand Down
2 changes: 2 additions & 0 deletions importers/nmea_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
self.location = None
self.depth = None

datafile.flush_extracted_tokens()

@staticmethod
def parse_timestamp(date, time):
if len(date) == 6:
Expand Down
2 changes: 2 additions & 0 deletions importers/replay_comment_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,5 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
comment_type=comment_type,
parser_name=self.short_name,
)

datafile.flush_extracted_tokens()
2 changes: 2 additions & 0 deletions importers/replay_contact_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,5 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
if ambig_bearing_valid:
ambig_bearing_token.record(self.name, "ambig bearing", ambig_bearing)
contact.ambig_bearing = ambig_bearing

datafile.flush_extracted_tokens()
9 changes: 7 additions & 2 deletions importers/replay_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ def __init__(self):
self.depth = 0.0

# Example: Uncomment this line to turn off recording of extractions
# for this importer
# self.disable_recording()
# for this importer:
# self.set_highlighting_level(HighlightLevel.NONE)
# or to turn on database recording:
# self.set_highlighting_level(HighlightLevel.DATABASE)
# (default is HTML recording)

def can_load_this_type(self, suffix):
return suffix.upper() == ".REP" or suffix.upper() == ".DSF"
Expand Down Expand Up @@ -66,6 +69,8 @@ def _load_this_line(self, data_store, line_number, line, datafile, change_id):
state.speed = rep_line.speed
state.location = rep_line.get_location()

datafile.flush_extracted_tokens()

@staticmethod
def degrees_for(degs, mins, secs, hemi: str):
if hemi.upper() == "S" or hemi.upper() == "W":
Expand Down
8 changes: 2 additions & 6 deletions migrations/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,6 @@ def process_revision_directives(context_, revision, directives):
context.run_migrations()
else:
# Turn off the enforcement of foreign key constraints before running the migration
connection.execute(text("PRAGMA foreign_keys=OFF;"))
connection.commit()
context.configure(
connection=connection,
target_metadata=target_metadata,
Expand All @@ -205,11 +203,9 @@ def process_revision_directives(context_, revision, directives):
compare_type=special_compare_type,
)
with context.begin_transaction():
connection.execute(text("PRAGMA foreign_keys=OFF;"))
context.run_migrations()

# Turn on the enforcement of foreign key constraints after the migration is done
connection.execute(text("PRAGMA foreign_keys=ON;"))
connection.commit()
connection.execute(text("PRAGMA foreign_keys=ON;"))


if context.is_offline_mode():
Expand Down
4 changes: 2 additions & 2 deletions migrations/latest_revisions.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"LATEST_POSTGRES_VERSION": "c16bbfed85dc",
"LATEST_SQLITE_VERSION": "a7f75ead6204"
"LATEST_SQLITE_VERSION": "feb548c7c6c0",
"LATEST_POSTGRES_VERSION": "4899e94653f1"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Alter Extractions table
Revision ID: 4899e94653f1
Revises: bfb29dfcef94
Create Date: 2021-09-24 12:40:23.320197+00:00
"""
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "4899e94653f1"
down_revision = "c16bbfed85dc"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"Extractions",
sa.Column("destination_table", sa.String(length=150), nullable=True),
schema="pepys",
)
op.add_column(
"Extractions",
sa.Column("entry_id", postgresql.UUID(as_uuid=True), nullable=True),
schema="pepys",
)
op.add_column(
"Extractions",
sa.Column("datafile_id", postgresql.UUID(as_uuid=True), nullable=False),
schema="pepys",
)
op.add_column("Extractions", sa.Column("text", sa.Text(), nullable=False), schema="pepys")
op.add_column(
"Extractions",
sa.Column("text_location", sa.String(length=200), nullable=False),
schema="pepys",
)
op.add_column(
"Extractions", sa.Column("importer", sa.String(length=150), nullable=False), schema="pepys"
)
op.add_column(
"Extractions", sa.Column("interpreted_value", sa.Text(), nullable=False), schema="pepys"
)
op.create_foreign_key(
op.f("fk_Extractions_datafile_id_Datafiles"),
"Extractions",
"Datafiles",
["datafile_id"],
["datafile_id"],
source_schema="pepys",
referent_schema="pepys",
onupdate="cascade",
ondelete="cascade",
)
op.drop_column("Extractions", "chars", schema="pepys")
op.drop_column("Extractions", "table", schema="pepys")
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"Extractions",
sa.Column("table", sa.VARCHAR(length=150), autoincrement=False, nullable=False),
schema="pepys",
)
op.add_column(
"Extractions",
sa.Column("chars", sa.VARCHAR(length=150), autoincrement=False, nullable=False),
schema="pepys",
)
op.drop_constraint(
op.f("fk_Extractions_datafile_id_Datafiles"),
"Extractions",
schema="pepys",
type_="foreignkey",
)
op.drop_column("Extractions", "interpreted_value", schema="pepys")
op.drop_column("Extractions", "importer", schema="pepys")
op.drop_column("Extractions", "text_location", schema="pepys")
op.drop_column("Extractions", "text", schema="pepys")
op.drop_column("Extractions", "datafile_id", schema="pepys")
op.drop_column("Extractions", "entry_id", schema="pepys")
op.drop_column("Extractions", "destination_table", schema="pepys")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Alter Extractions table
Revision ID: feb548c7c6c0
Revises: a7f75ead6204
Create Date: 2021-09-20 12:38:20.179908+00:00
"""
import sqlalchemy as sa
from alembic import op

import pepys_import

# revision identifiers, used by Alembic.
revision = "feb548c7c6c0"
down_revision = "a7f75ead6204"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("Extractions", schema=None) as batch_op:
batch_op.add_column(sa.Column("destination_table", sa.String(length=150), nullable=True))
batch_op.add_column(
sa.Column(
"entry_id", pepys_import.utils.sqlalchemy_utils.UUIDType(length=16), nullable=True
)
)
batch_op.add_column(
sa.Column(
"datafile_id",
pepys_import.utils.sqlalchemy_utils.UUIDType(length=16),
nullable=False,
)
)
batch_op.add_column(sa.Column("text", sa.Text(), nullable=False))
batch_op.add_column(sa.Column("text_location", sa.String(length=200), nullable=False))
batch_op.add_column(sa.Column("importer", sa.String(length=150), nullable=False))
batch_op.add_column(sa.Column("interpreted_value", sa.Text(), nullable=False))
batch_op.create_foreign_key(
batch_op.f("fk_Extractions_datafile_id_Datafiles"),
"Datafiles",
["datafile_id"],
["datafile_id"],
onupdate="cascade",
ondelete="cascade",
)
batch_op.drop_column("table")
batch_op.drop_column("chars")

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("Extractions", schema=None) as batch_op:
batch_op.add_column(sa.Column("chars", sa.VARCHAR(length=150), nullable=False))
batch_op.add_column(sa.Column("table", sa.VARCHAR(length=150), nullable=False))
batch_op.drop_constraint(
batch_op.f("fk_Extractions_datafile_id_Datafiles"), type_="foreignkey"
)
batch_op.drop_column("interpreted_value")
batch_op.drop_column("importer")
batch_op.drop_column("text_location")
batch_op.drop_column("text")
batch_op.drop_column("datafile_id")
batch_op.drop_column("entry_id")
batch_op.drop_column("destination_table")

# ### end Alembic commands ###
Loading

0 comments on commit 7b294aa

Please sign in to comment.