Skip to content

Commit

Permalink
Move Metadata to --internal database
Browse files Browse the repository at this point in the history
Refs:
- #2343
- #2341
  • Loading branch information
asg017 authored Jun 11, 2024
1 parent 8f9509f commit e1bfab3
Show file tree
Hide file tree
Showing 22 changed files with 286 additions and 214 deletions.
210 changes: 137 additions & 73 deletions datasette/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,37 @@ def __init__(
self._root_token = secrets.token_hex(32)
self.client = DatasetteClient(self)

async def apply_metadata_json(self):
# Apply any metadata entries from metadata.json to the internal tables
# step 1: top-level metadata
for key in self._metadata_local or {}:
if key == "databases":
continue
await self.set_instance_metadata(key, self._metadata_local[key])

# step 2: database-level metadata
for dbname, db in self._metadata_local.get("databases", {}).items():
for key, value in db.items():
if key == "tables":
continue
await self.set_database_metadata(dbname, key, value)

# step 3: table-level metadata
for tablename, table in db.get("tables", {}).items():
for key, value in table.items():
if key == "columns":
continue
await self.set_resource_metadata(dbname, tablename, key, value)

# step 4: column-level metadata (only descriptions in metadata.json)
for columnname, column_description in table.get("columns", {}).items():
await self.set_column_metadata(
dbname, tablename, columnname, "description", column_description
)

# TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log
# a warning to user that they should delete their metadata.json file

def get_jinja_environment(self, request: Request = None) -> Environment:
environment = self._jinja_env
if request:
Expand Down Expand Up @@ -476,6 +507,7 @@ async def _refresh_schemas(self):
internal_db = self.get_internal_database()
if not self.internal_db_created:
await init_internal_db(internal_db)
await self.apply_metadata_json()
self.internal_db_created = True
current_schema_versions = {
row["database_name"]: row["schema_version"]
Expand Down Expand Up @@ -646,57 +678,113 @@ def _metadata_recursive_update(self, orig, updated):
orig[key] = upd_value
return orig

def metadata(self, key=None, database=None, table=None, fallback=True):
"""
Looks up metadata, cascading backwards from specified level.
Returns None if metadata value is not found.
"""
assert not (
database is None and table is not None
), "Cannot call metadata() with table= specified but not database="
metadata = {}
async def get_instance_metadata(self):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_instance_entries
"""
)
return dict(rows)

async def get_database_metadata(self, database_name: str):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_database_entries
WHERE database_name = ?
""",
[database_name],
)
return dict(rows)

async def get_resource_metadata(self, database_name: str, resource_name: str):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_resource_entries
WHERE database_name = ?
AND resource_name = ?
""",
[database_name, resource_name],
)
return dict(rows)

for hook_dbs in pm.hook.get_metadata(
datasette=self, key=key, database=database, table=table
):
metadata = self._metadata_recursive_update(metadata, hook_dbs)

# security precaution!! don't allow anything in the local config
# to be overwritten. this is a temporary measure, not sure if this
# is a good idea long term or maybe if it should just be a concern
# of the plugin's implemtnation
metadata = self._metadata_recursive_update(metadata, self._metadata_local)

databases = metadata.get("databases") or {}

search_list = []
if database is not None:
search_list.append(databases.get(database) or {})
if table is not None:
table_metadata = ((databases.get(database) or {}).get("tables") or {}).get(
table
) or {}
search_list.insert(0, table_metadata)

search_list.append(metadata)
if not fallback:
# No fallback allowed, so just use the first one in the list
search_list = search_list[:1]
if key is not None:
for item in search_list:
if key in item:
return item[key]
return None
else:
# Return the merged list
m = {}
for item in search_list:
m.update(item)
return m
async def get_column_metadata(
self, database_name: str, resource_name: str, column_name: str
):
rows = await self.get_internal_database().execute(
"""
SELECT
key,
value
FROM datasette_metadata_column_entries
WHERE database_name = ?
AND resource_name = ?
AND column_name = ?
""",
[database_name, resource_name, column_name],
)
return dict(rows)

async def set_instance_metadata(self, key: str, value: str):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_instance_entries(key, value)
VALUES(?, ?)
ON CONFLICT(key) DO UPDATE SET value = excluded.value;
""",
[key, value],
)

@property
def _metadata(self):
return self.metadata()
async def set_database_metadata(self, database_name: str, key: str, value: str):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_database_entries(database_name, key, value)
VALUES(?, ?, ?)
ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, key, value],
)

async def set_resource_metadata(
self, database_name: str, resource_name: str, key: str, value: str
):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_resource_entries(database_name, resource_name, key, value)
VALUES(?, ?, ?, ?)
ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, resource_name, key, value],
)

async def set_column_metadata(
self,
database_name: str,
resource_name: str,
column_name: str,
key: str,
value: str,
):
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04)
await self.get_internal_database().execute_write(
"""
INSERT INTO datasette_metadata_column_entries(database_name, resource_name, column_name, key, value)
VALUES(?, ?, ?, ?, ?)
ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value;
""",
[database_name, resource_name, column_name, key, value],
)

def get_internal_database(self):
return self._internal_database
Expand Down Expand Up @@ -774,20 +862,6 @@ async def get_canned_query(self, database_name, query_name, actor):
if query:
return query

def update_with_inherited_metadata(self, metadata):
# Fills in source/license with defaults, if available
metadata.update(
{
"source": metadata.get("source") or self.metadata("source"),
"source_url": metadata.get("source_url") or self.metadata("source_url"),
"license": metadata.get("license") or self.metadata("license"),
"license_url": metadata.get("license_url")
or self.metadata("license_url"),
"about": metadata.get("about") or self.metadata("about"),
"about_url": metadata.get("about_url") or self.metadata("about_url"),
}
)

def _prepare_connection(self, conn, database):
conn.row_factory = sqlite3.Row
conn.text_factory = lambda x: str(x, "utf-8", "replace")
Expand Down Expand Up @@ -1079,11 +1153,6 @@ def absolute_url(self, request, path):
url = "https://" + url[len("http://") :]
return url

def _register_custom_units(self):
"""Register any custom units defined in the metadata.json with Pint"""
for unit in self.metadata("custom_units") or []:
ureg.define(unit)

def _connected_databases(self):
return [
{
Expand Down Expand Up @@ -1436,10 +1505,6 @@ def add_route(view, regex):
),
r"/:memory:(?P<rest>.*)$",
)
add_route(
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata()),
r"/-/metadata(\.(?P<format>json))?$",
)
add_route(
JsonDataView.as_view(self, "versions.json", self._versions),
r"/-/versions(\.(?P<format>json))?$",
Expand Down Expand Up @@ -1585,7 +1650,6 @@ async def resolve_row(self, request):
def app(self):
"""Returns an ASGI app function that serves the whole of Datasette"""
routes = self._routes()
self._register_custom_units()

async def setup_db():
# First time server starts up, calculate table counts for immutable databases
Expand Down
4 changes: 0 additions & 4 deletions datasette/default_menu_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ async def inner():
"href": datasette.urls.path("/-/versions"),
"label": "Version info",
},
{
"href": datasette.urls.path("/-/metadata"),
"label": "Metadata",
},
{
"href": datasette.urls.path("/-/settings"),
"label": "Settings",
Expand Down
13 changes: 9 additions & 4 deletions datasette/facets.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,15 @@ def get_facet_size(self):
max_returned_rows = self.ds.setting("max_returned_rows")
table_facet_size = None
if self.table:
tables_metadata = self.ds.metadata("tables", database=self.database) or {}
table_metadata = tables_metadata.get(self.table) or {}
if table_metadata:
table_facet_size = table_metadata.get("facet_size")
config_facet_size = (
self.ds.config.get("databases", {})
.get(self.database, {})
.get("tables", {})
.get(self.table, {})
.get("facet_size")
)
if config_facet_size:
table_facet_size = config_facet_size
custom_facet_size = self.request.args.get("_facet_size")
if custom_facet_size:
if custom_facet_size == "max":
Expand Down
5 changes: 0 additions & 5 deletions datasette/hookspecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@ def startup(datasette):
"""Fires directly after Datasette first starts running"""


@hookspec
def get_metadata(datasette, key, database, table):
"""Return metadata to be merged into Datasette's metadata dictionary"""


@hookspec
def asgi_wrapper(datasette):
"""Returns an ASGI middleware callable to wrap our ASGI application with"""
Expand Down
1 change: 0 additions & 1 deletion datasette/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def json_renderer(request, args, data, error, truncated=None):

if truncated is not None:
data["truncated"] = truncated

if shape == "arrayfirst":
if not data["rows"]:
data = []
Expand Down
37 changes: 37 additions & 0 deletions datasette/utils/internal_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,43 @@ async def init_internal_db(db):
"""
).strip()
await db.execute_write_script(create_tables_sql)
await initialize_metadata_tables(db)


async def initialize_metadata_tables(db):
await db.execute_write_script(
"""
CREATE TABLE IF NOT EXISTS datasette_metadata_instance_entries(
key text,
value text,
unique(key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_database_entries(
database_name text,
key text,
value text,
unique(database_name, key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_resource_entries(
database_name text,
resource_name text,
key text,
value text,
unique(database_name, resource_name, key)
);
CREATE TABLE IF NOT EXISTS datasette_metadata_column_entries(
database_name text,
resource_name text,
column_name text,
key text,
value text,
unique(database_name, resource_name, column_name, key)
);
"""
)


async def populate_schema_tables(internal_db, db):
Expand Down
6 changes: 1 addition & 5 deletions datasette/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,6 @@ async def get(self, request):

end = time.perf_counter()
data["query_ms"] = (end - start) * 1000
for key in ("source", "source_url", "license", "license_url"):
value = self.ds.metadata(key)
if value:
data[key] = value

# Special case for .jsono extension - redirect to _shape=objects
if _format == "jsono":
Expand Down Expand Up @@ -385,7 +381,7 @@ async def get(self, request):
},
}
if "metadata" not in context:
context["metadata"] = self.ds.metadata()
context["metadata"] = await self.ds.get_instance_metadata()
r = await self.render(templates, request=request, context=context)
if status_code is not None:
r.status = status_code
Expand Down
Loading

0 comments on commit e1bfab3

Please sign in to comment.