From 2b0a61ee192c8c0b85a4ddbf9c76b1b6e61e9fc0 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 5 Aug 2024 13:53:55 -0700 Subject: [PATCH] Rename metadata tables and add schema to docs, refs #2382 --- datasette/app.py | 16 ++--- datasette/utils/internal_db.py | 62 +++++++++--------- datasette/views/table.py | 4 +- docs/internals.rst | 116 ++++++++++++++++++++++++++++++--- docs/metadata_doc.py | 23 +++++++ 5 files changed, 173 insertions(+), 48 deletions(-) diff --git a/datasette/app.py b/datasette/app.py index 5b8f910ce3..1f9e9d3009 100644 --- a/datasette/app.py +++ b/datasette/app.py @@ -684,7 +684,7 @@ async def get_instance_metadata(self): SELECT key, value - FROM datasette_metadata_instance_entries + FROM metadata_instance """ ) return dict(rows) @@ -695,7 +695,7 @@ async def get_database_metadata(self, database_name: str): SELECT key, value - FROM datasette_metadata_database_entries + FROM metadata_databases WHERE database_name = ? """, [database_name], @@ -708,7 +708,7 @@ async def get_resource_metadata(self, database_name: str, resource_name: str): SELECT key, value - FROM datasette_metadata_resource_entries + FROM metadata_resources WHERE database_name = ? AND resource_name = ? """, @@ -724,7 +724,7 @@ async def get_column_metadata( SELECT key, value - FROM datasette_metadata_column_entries + FROM metadata_columns WHERE database_name = ? AND resource_name = ? AND column_name = ? @@ -737,7 +737,7 @@ async def set_instance_metadata(self, key: str, value: str): # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) await self.get_internal_database().execute_write( """ - INSERT INTO datasette_metadata_instance_entries(key, value) + INSERT INTO metadata_instance(key, value) VALUES(?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value; """, @@ -748,7 +748,7 @@ async def set_database_metadata(self, database_name: str, key: str, value: str): # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) await self.get_internal_database().execute_write( """ - INSERT INTO datasette_metadata_database_entries(database_name, key, value) + INSERT INTO metadata_databases(database_name, key, value) VALUES(?, ?, ?) ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; """, @@ -761,7 +761,7 @@ async def set_resource_metadata( # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) await self.get_internal_database().execute_write( """ - INSERT INTO datasette_metadata_resource_entries(database_name, resource_name, key, value) + INSERT INTO metadata_resources(database_name, resource_name, key, value) VALUES(?, ?, ?, ?) ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; """, @@ -779,7 +779,7 @@ async def set_column_metadata( # TODO upsert only supported on SQLite 3.24.0 (2018-06-04) await self.get_internal_database().execute_write( """ - INSERT INTO datasette_metadata_column_entries(database_name, resource_name, column_name, key, value) + INSERT INTO metadata_columns(database_name, resource_name, column_name, key, value) VALUES(?, ?, ?, ?, ?) ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; """, diff --git a/datasette/utils/internal_db.py b/datasette/utils/internal_db.py index 6a5e08cba5..626dd1377f 100644 --- a/datasette/utils/internal_db.py +++ b/datasette/utils/internal_db.py @@ -68,37 +68,39 @@ async def init_internal_db(db): async def initialize_metadata_tables(db): await db.execute_write_script( - """ - CREATE TABLE IF NOT EXISTS datasette_metadata_instance_entries( - key text, - value text, - unique(key) - ); - - CREATE TABLE IF NOT EXISTS datasette_metadata_database_entries( - database_name text, - key text, - value text, - unique(database_name, key) - ); - - CREATE TABLE IF NOT EXISTS datasette_metadata_resource_entries( - database_name text, - resource_name text, - key text, - value text, - unique(database_name, resource_name, key) - ); - - CREATE TABLE IF NOT EXISTS datasette_metadata_column_entries( - database_name text, - resource_name text, - column_name text, - key text, - value text, - unique(database_name, resource_name, column_name, key) - ); + textwrap.dedent( + """ + CREATE TABLE IF NOT EXISTS metadata_instance ( + key text, + value text, + unique(key) + ); + + CREATE TABLE IF NOT EXISTS metadata_databases ( + database_name text, + key text, + value text, + unique(database_name, key) + ); + + CREATE TABLE IF NOT EXISTS metadata_resources ( + database_name text, + resource_name text, + key text, + value text, + unique(database_name, resource_name, key) + ); + + CREATE TABLE IF NOT EXISTS metadata_columns ( + database_name text, + resource_name text, + column_name text, + key text, + value text, + unique(database_name, resource_name, column_name, key) + ); """ + ) ) diff --git a/datasette/views/table.py b/datasette/views/table.py index e3bfb26060..fa2c80deae 100644 --- a/datasette/views/table.py +++ b/datasette/views/table.py @@ -153,7 +153,7 @@ async def display_columns_and_rows( SELECT column_name, value - FROM datasette_metadata_column_entries + FROM metadata_columns WHERE database_name = ? AND resource_name = ? AND key = 'description' @@ -1499,7 +1499,7 @@ async def extra_metadata(): SELECT column_name, value - FROM datasette_metadata_column_entries + FROM metadata_columns WHERE database_name = ? AND resource_name = ? AND key = 'description' diff --git a/docs/internals.rst b/docs/internals.rst index 0eb95e8938..8ecf432669 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -523,7 +523,7 @@ await .get_instance_metadata(self) ---------------------------------- Returns metadata keys and values for the entire Datasette instance as a dictionary. -Internally queries the ``datasette_metadata_instance_entries`` table inside the :ref:`internal database `. +Internally queries the ``metadata_instance`` table inside the :ref:`internal database `. .. _datasette_get_database_metadata: @@ -534,7 +534,7 @@ await .get_database_metadata(self, database_name) The name of the database to query. Returns metadata keys and values for the specified database as a dictionary. -Internally queries the ``datasette_metadata_database_entries`` table inside the :ref:`internal database `. +Internally queries the ``metadata_databases`` table inside the :ref:`internal database `. .. _datasette_get_resource_metadata: @@ -548,7 +548,7 @@ await .get_resource_metadata(self, database_name, resource_name) Returns metadata keys and values for the specified "resource" as a dictionary. A "resource" in this context can be a table, view, or canned query. -Internally queries the ``datasette_metadata_resource_entries`` table inside the :ref:`internal database `. +Internally queries the ``metadata_resources`` table inside the :ref:`internal database `. .. _datasette_get_column_metadata: @@ -564,7 +564,7 @@ await .get_column_metadata(self, database_name, resource_name, column_name) Returns metadata keys and values for the specified column, resource, and table as a dictionary. -Internally queries the ``datasette_metadata_column_entries`` table inside the :ref:`internal database `. +Internally queries the ``metadata_columns`` table inside the :ref:`internal database `. .. _datasette_set_instance_metadata: @@ -578,7 +578,7 @@ await .set_instance_metadata(self, key, value) Adds a new metadata entry for the entire Datasette instance. Any previous instance-level metadata entry with the same ``key`` will be overwritten. -Internally upserts the value into the the ``datasette_metadata_instance_entries`` table inside the :ref:`internal database `. +Internally upserts the value into the the ``metadata_instance`` table inside the :ref:`internal database `. .. _datasette_set_database_metadata: @@ -594,7 +594,7 @@ await .set_database_metadata(self, database_name, key, value) Adds a new metadata entry for the specified database. Any previous database-level metadata entry with the same ``key`` will be overwritten. -Internally upserts the value into the the ``datasette_metadata_database_entries`` table inside the :ref:`internal database `. +Internally upserts the value into the the ``metadata_databases`` table inside the :ref:`internal database `. .. _datasette_set_resource_metadata: @@ -613,7 +613,7 @@ await .set_resource_metadata(self, database_name, resource_name, key, value) Adds a new metadata entry for the specified "resource". Any previous resource-level metadata entry with the same ``key`` will be overwritten. -Internally upserts the value into the the ``datasette_metadata_resource_entries`` table inside the :ref:`internal database `. +Internally upserts the value into the the ``metadata_resources`` table inside the :ref:`internal database `. .. _datasette_set_column_metadata: @@ -634,7 +634,7 @@ await .set_column_metadata(self, database_name, resource_name, column_name, key, Adds a new metadata entry for the specified column. Any previous column-level metadata entry with the same ``key`` will be overwritten. -Internally upserts the value into the the ``datasette_metadata_column_entries`` table inside the :ref:`internal database `. +Internally upserts the value into the the ``metadata_columns`` table inside the :ref:`internal database `. @@ -1338,6 +1338,8 @@ Datasette maintains an "internal" SQLite database used for configuration, cachin Datasette maintains tables called ``catalog_databases``, ``catalog_tables``, ``catalog_columns``, ``catalog_indexes``, ``catalog_foreign_keys`` with details of the attached databases and their schemas. These tables should not be considered a stable API - they may change between Datasette releases. +Metadata is stored in tables ``metadata_instance``, ``metadata_databases``, ``metadata_resources`` and ``metadata_columns``. Plugins can interact with these tables via the ``get_*_metadata`` and ``set_*_metadata`` methods. + The internal database is not exposed in the Datasette application by default, which means private data can safely be stored without worry of accidentally leaking information through the default Datasette interface and API. However, other plugins do have full read and write access to the internal database. Plugins can access this database by calling ``internal_db = datasette.get_internal_database()`` and then executing queries using the :ref:`Database API `. @@ -1349,6 +1351,104 @@ Plugin authors are asked to practice good etiquette when using the internal data 3. Use temporary tables or shared in-memory attached databases when possible. 4. Avoid implementing features that could expose private data stored in the internal database by other plugins. +.. _internals_internal_schema: + +Internal database schema +------------------------ + +The internal database schema is as follows: + +.. [[[cog + from metadata_doc import internal_schema + internal_schema(cog) +.. ]]] + +.. code-block:: sql + + CREATE TABLE catalog_databases ( + database_name TEXT PRIMARY KEY, + path TEXT, + is_memory INTEGER, + schema_version INTEGER + ); + CREATE TABLE catalog_tables ( + database_name TEXT, + table_name TEXT, + rootpage INTEGER, + sql TEXT, + PRIMARY KEY (database_name, table_name), + FOREIGN KEY (database_name) REFERENCES databases(database_name) + ); + CREATE TABLE catalog_columns ( + database_name TEXT, + table_name TEXT, + cid INTEGER, + name TEXT, + type TEXT, + "notnull" INTEGER, + default_value TEXT, -- renamed from dflt_value + is_pk INTEGER, -- renamed from pk + hidden INTEGER, + PRIMARY KEY (database_name, table_name, name), + FOREIGN KEY (database_name) REFERENCES databases(database_name), + FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) + ); + CREATE TABLE catalog_indexes ( + database_name TEXT, + table_name TEXT, + seq INTEGER, + name TEXT, + "unique" INTEGER, + origin TEXT, + partial INTEGER, + PRIMARY KEY (database_name, table_name, name), + FOREIGN KEY (database_name) REFERENCES databases(database_name), + FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) + ); + CREATE TABLE catalog_foreign_keys ( + database_name TEXT, + table_name TEXT, + id INTEGER, + seq INTEGER, + "table" TEXT, + "from" TEXT, + "to" TEXT, + on_update TEXT, + on_delete TEXT, + match TEXT, + PRIMARY KEY (database_name, table_name, id, seq), + FOREIGN KEY (database_name) REFERENCES databases(database_name), + FOREIGN KEY (database_name, table_name) REFERENCES tables(database_name, table_name) + ); + CREATE TABLE metadata_instance ( + key text, + value text, + unique(key) + ); + CREATE TABLE metadata_databases ( + database_name text, + key text, + value text, + unique(database_name, key) + ); + CREATE TABLE metadata_resources ( + database_name text, + resource_name text, + key text, + value text, + unique(database_name, resource_name, key) + ); + CREATE TABLE metadata_columns ( + database_name text, + resource_name text, + column_name text, + key text, + value text, + unique(database_name, resource_name, column_name, key) + ); + +.. [[[end]]] + .. _internals_utils: The datasette.utils module diff --git a/docs/metadata_doc.py b/docs/metadata_doc.py index ad85bf5295..031b3ddd21 100644 --- a/docs/metadata_doc.py +++ b/docs/metadata_doc.py @@ -40,3 +40,26 @@ def config_example( cog.out(" .. code-block:: json\n\n") cog.out(textwrap.indent(json.dumps(data, indent=2), " ")) cog.out("\n") + + +def internal_schema(cog): + import asyncio + from datasette.app import Datasette + from sqlite_utils import Database + + ds = Datasette() + db = ds.get_internal_database() + + def get_schema(conn): + return Database(conn).schema + + async def inner(): + await ds.invoke_startup() + await ds._refresh_schemas() + return await db.execute_fn(get_schema) + + schema = asyncio.run(inner()) + cog.out("\n.. code-block:: sql") + cog.out("\n\n") + cog.out(textwrap.indent(schema, " ")) + cog.out("\n\n")