From dcd55b96611c08d75ee9d6ad83789b46035d7207 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Mon, 23 Jan 2023 09:47:23 +0000 Subject: [PATCH 01/21] feat: schema migration - schema migration as norm namespace in datahike - Closes #13 --- src/datahike/norm/norm.cljc | 83 ++++++++++++ test/datahike/norm/norm_test.cljc | 122 ++++++++++++++++++ .../norm/resources/001-a1-example.edn | 6 + .../norm/resources/002-a2-example.edn | 5 + 4 files changed, 216 insertions(+) create mode 100644 src/datahike/norm/norm.cljc create mode 100644 test/datahike/norm/norm_test.cljc create mode 100644 test/datahike/norm/resources/001-a1-example.edn create mode 100644 test/datahike/norm/resources/002-a2-example.edn diff --git a/src/datahike/norm/norm.cljc b/src/datahike/norm/norm.cljc new file mode 100644 index 000000000..301bd8f3c --- /dev/null +++ b/src/datahike/norm/norm.cljc @@ -0,0 +1,83 @@ +(ns datahike.norm.norm + (:require + [clojure.java.io :as io] + [clojure.string :as string] + [taoensso.timbre :as log] + [datahike.api :as d])) + +(defn attribute-installed? [conn attr] + (some? (d/entity @conn [:db/ident attr]))) + +(defn ensure-norm-attribute! [conn] + (if-not (attribute-installed? conn :tx/norm) + (:db-after (d/transact conn {:tx-data [{:db/ident :tx/norm + :db/valueType :db.type/keyword + :db/cardinality :db.cardinality/one}]})) + @conn)) + +(defn norm-installed? [db norm] + (->> {:query '[:find (count ?t) + :in $ ?tn + :where + [_ :tx/norm ?tn ?t]] + :args [db norm]} + d/q + first + some?)) + +(defn read-norm-files! [norms-folder] + (let [folder (io/file norms-folder)] + (if (.exists folder) + (let [migration-files (file-seq folder) + xf (comp + (filter #(re-find #".edn" (.getPath %))) + (map (fn [migration-file] + (-> (.getPath migration-file) + slurp + read-string + (update :norm (fn [norm] (or norm + (-> (.getName migration-file) + (string/replace #"\.edn" "") + keyword))))))))] + (sort-by :norm (into [] xf migration-files))) + (throw + (ex-info + (format "Norms folder %s does not exist." norms-folder) + {:folder norms-folder}))))) + +(defn neutral-fn [_] []) + +(defn ensure-norms! + ([conn] + (ensure-norms! conn (io/resource "migrations"))) + ([conn migrations] + (let [db (ensure-norm-attribute! conn) + norm-list (cond + (string? migrations) (read-norm-files! migrations) + (vector? migrations) migrations)] + (log/info "Checking migrations ...") + (doseq [{:keys [norm tx-data tx-fn] + :or {tx-data [] + tx-fn #'neutral-fn}} + norm-list] + (log/info "Checking migration" norm) + (when-not (norm-installed? db norm) + (log/info "Run migration" norm) + (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] + tx-data + (tx-fn conn)))}) + (log/info "Done"))))))) + +(comment + (d/delete-database {:store {:backend :file + :path "/tmp/file-example"}}) + (d/create-database {:store {:backend :file + :path "/tmp/file-example"}}) + (def conn (d/connect {:store {:backend :file + :path "/tmp/file-example"}})) + (ensure-norms! conn "test/resources") + (def norm-list (read-norm-files! "test/resources")) + (norm-installed? (d/db conn) (:norm (first norm-list))) + (d/transact conn {:tx-data [{:foo "foo"}]})) + + diff --git a/test/datahike/norm/norm_test.cljc b/test/datahike/norm/norm_test.cljc new file mode 100644 index 000000000..cb9c82123 --- /dev/null +++ b/test/datahike/norm/norm_test.cljc @@ -0,0 +1,122 @@ +(ns datahike.norm.norm-test + (:require [clojure.test :refer [deftest is]] + [clojure.string :as s] + [datahike.api :as d] + [datahike.norm :as sut])) + +(defn create-test-db [] + (let [id (apply str + (for [_i (range 8)] + (char (+ (rand 26) 65))))] + (d/create-database {:store {:backend :mem + :id id}}) + (d/connect {:store {:backend :mem + :id id}}))) + +(deftest simple-test + (let [conn (create-test-db) + _ (sut/ensure-norms! conn "test/datahike/norm/resources")] + (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "foo", :ident :foo} + (-> (d/schema (d/db conn)) + :foo + (dissoc :db/id)))) + (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Simpsons character name", :ident :character/name} + (-> (d/schema (d/db conn)) + :character/name + (dissoc :db/id)))) + (is (= #:db{:ident :tx/norm, :valueType :db.type/keyword, :cardinality :db.cardinality/one} + (-> (d/schema (d/db conn)) + :tx/norm + (dissoc :db/id)))))) + +(deftest tx-fn-test + (let [conn (create-test-db) + _ (sut/ensure-norms! conn "test/datahike/norm/resources") + _ (d/transact conn {:tx-data [{:foo "upper-case"} + {:foo "Grossbuchstaben"}]}) + test-fn (fn [conn] + (-> (for [[eid value] (d/q '[:find ?e ?v + :where + [?e :foo ?v]] + (d/db conn))] + [:db/add eid + :foo (s/upper-case value)]) + vec)) + test-norm [{:norm :test-norm-1, + :tx-fn test-fn}] + _ (sut/ensure-norms! conn test-norm)] + (is (= #{["GROSSBUCHSTABEN"] ["UPPER-CASE"]} + (d/q '[:find ?v + :where + [_ :foo ?v]] + (d/db conn)))))) + +(deftest tx-and-fn-test + (let [conn (create-test-db) + _ (sut/ensure-norms! conn "test/datahike/norm/resources") + _ (d/transact conn {:tx-data [{:character/name "Homer Simpson"} + {:character/name "Marge Simpson"}]}) + margehomer (d/q '[:find [?e ...] + :where + [?e :character/name]] + (d/db conn)) + tx-data [{:db/doc "Simpsons children reference" + :db/ident :character/child + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many}] + tx-fn (fn [conn] + (-> (for [[eid] (d/q '[:find ?e + :where + [?e :character/name] + (or-join [?e] + [?e :character/name "Homer Simpson"] + [?e :character/name "Marge Simpson"])] + (d/db conn))] + {:db/id eid + :character/child [{:character/name "Bart Simpson"} + {:character/name "Lisa Simpson"} + {:character/name "Maggie Simpson"}]}) + vec)) + test-norm [{:norm :test-norm-2 + :tx-data tx-data + :tx-fn tx-fn}]] + (sut/ensure-norms! conn test-norm) + (is (= [#:character{:name "Marge Simpson", + :child + [#:character{:name "Bart Simpson"} + #:character{:name "Lisa Simpson"} + #:character{:name "Maggie Simpson"}]} + #:character{:name "Homer Simpson", + :child + [#:character{:name "Bart Simpson"} + #:character{:name "Lisa Simpson"} + #:character{:name "Maggie Simpson"}]}] + (d/pull-many (d/db conn) '[:character/name {:character/child [:character/name]}] margehomer))))) + +(comment + (def conn (create-test-db)) + (sut/ensure-norms! conn "test/resources") + (d/transact conn {:tx-data [{:character/name "Homer Simpson"} + {:character/name "Marge Simpson"}]}) + (def margehomer (-> (d/q '[:find [?e ...] + :where + [?e :character/name]] + (d/db conn)))) + (d/transact conn {:tx-data [{:db/doc "Simpsons children reference" + :db/ident :character/child + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many}]}) + (d/transact conn (-> (for [[eid] (d/q '[:find ?e + :where + [?e :character/name] + (or-join [?e] + [?e :character/name "Homer Simpson"] + [?e :character/name "Marge Simpson"])] + (d/db conn))] + {:db/id eid + :character/child [{:character/name "Bart Simpson"} + {:character/name "Lisa Simpson"} + {:character/name "Maggie Simpson"}]}) + vec)) + + (d/pull-many (d/db conn) '[:character/name {:character/child [:character/name]}] margehomer)) diff --git a/test/datahike/norm/resources/001-a1-example.edn b/test/datahike/norm/resources/001-a1-example.edn new file mode 100644 index 000000000..306c3193f --- /dev/null +++ b/test/datahike/norm/resources/001-a1-example.edn @@ -0,0 +1,6 @@ +{:norm :a1-example + :tx-data [{:db/doc "foo" + :db/ident :foo + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn io.replikativ.garantie/neutral-fn} diff --git a/test/datahike/norm/resources/002-a2-example.edn b/test/datahike/norm/resources/002-a2-example.edn new file mode 100644 index 000000000..97801f778 --- /dev/null +++ b/test/datahike/norm/resources/002-a2-example.edn @@ -0,0 +1,5 @@ +{:norm :a2-example + :tx-data [{:db/doc "Simpsons character name" + :db/ident :character/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]} From 5c86e8cfd5496687c22c376f60f0056ebbf8ecfa Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Wed, 1 Feb 2023 12:11:29 +0100 Subject: [PATCH 02/21] fixup! feat: schema migration --- bb/resources/native-image-tests/run-normtests | 6 + src/datahike/norm/norm.cljc | 5 +- .../norm/{norm_test.cljc => norm_test.clj} | 112 ++++++++++-------- .../norm/resources/0001 a3 example.edn | 4 + .../norm/resources/0002-a4-example.edn | 5 + .../norm/resources/001-a1-example.edn | 2 +- tests.edn | 2 + 7 files changed, 85 insertions(+), 51 deletions(-) create mode 100755 bb/resources/native-image-tests/run-normtests rename test/datahike/norm/{norm_test.cljc => norm_test.clj} (53%) create mode 100644 test/datahike/norm/resources/0001 a3 example.edn create mode 100644 test/datahike/norm/resources/0002-a4-example.edn diff --git a/bb/resources/native-image-tests/run-normtests b/bb/resources/native-image-tests/run-normtests new file mode 100755 index 000000000..58ffaabe0 --- /dev/null +++ b/bb/resources/native-image-tests/run-normtests @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +TIMBRE_LEVEL=':fatal' bin/kaocha --focus :norm "$@" diff --git a/src/datahike/norm/norm.cljc b/src/datahike/norm/norm.cljc index 301bd8f3c..1276784a6 100644 --- a/src/datahike/norm/norm.cljc +++ b/src/datahike/norm/norm.cljc @@ -37,6 +37,7 @@ read-string (update :norm (fn [norm] (or norm (-> (.getName migration-file) + (string/replace #" " "_") (string/replace #"\.edn" "") keyword))))))))] (sort-by :norm (into [] xf migration-files))) @@ -65,7 +66,7 @@ (log/info "Run migration" norm) (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] tx-data - (tx-fn conn)))}) + ((eval tx-fn) conn)))}) (log/info "Done"))))))) (comment @@ -76,7 +77,7 @@ (def conn (d/connect {:store {:backend :file :path "/tmp/file-example"}})) (ensure-norms! conn "test/resources") - (def norm-list (read-norm-files! "test/resources")) + (def norm-list (read-norm-files! "test/datahike/norm/resources")) (norm-installed? (d/db conn) (:norm (first norm-list))) (d/transact conn {:tx-data [{:foo "foo"}]})) diff --git a/test/datahike/norm/norm_test.cljc b/test/datahike/norm/norm_test.clj similarity index 53% rename from test/datahike/norm/norm_test.cljc rename to test/datahike/norm/norm_test.clj index cb9c82123..390872758 100644 --- a/test/datahike/norm/norm_test.cljc +++ b/test/datahike/norm/norm_test.clj @@ -1,8 +1,8 @@ (ns datahike.norm.norm-test - (:require [clojure.test :refer [deftest is]] - [clojure.string :as s] + (:require [clojure.test :refer [deftest is testing]] + [clojure.string :as string] [datahike.api :as d] - [datahike.norm :as sut])) + [datahike.norm.norm :as sut])) (defn create-test-db [] (let [id (apply str @@ -34,14 +34,14 @@ _ (sut/ensure-norms! conn "test/datahike/norm/resources") _ (d/transact conn {:tx-data [{:foo "upper-case"} {:foo "Grossbuchstaben"}]}) - test-fn (fn [conn] - (-> (for [[eid value] (d/q '[:find ?e ?v - :where - [?e :foo ?v]] - (d/db conn))] - [:db/add eid - :foo (s/upper-case value)]) - vec)) + test-fn '(fn [conn] + (-> (for [[eid value] (d/q '[:find ?e ?v + :where + [?e :foo ?v]] + (d/db conn))] + [:db/add eid + :foo (string/upper-case value)]) + vec)) test-norm [{:norm :test-norm-1, :tx-fn test-fn}] _ (sut/ensure-norms! conn test-norm)] @@ -64,19 +64,19 @@ :db/ident :character/child :db/valueType :db.type/ref :db/cardinality :db.cardinality/many}] - tx-fn (fn [conn] - (-> (for [[eid] (d/q '[:find ?e - :where - [?e :character/name] - (or-join [?e] - [?e :character/name "Homer Simpson"] - [?e :character/name "Marge Simpson"])] - (d/db conn))] - {:db/id eid - :character/child [{:character/name "Bart Simpson"} - {:character/name "Lisa Simpson"} - {:character/name "Maggie Simpson"}]}) - vec)) + tx-fn '(fn [conn] + (-> (for [[eid] (d/q '[:find ?e + :where + [?e :character/name] + (or-join [?e] + [?e :character/name "Homer Simpson"] + [?e :character/name "Marge Simpson"])] + (d/db conn))] + {:db/id eid + :character/child [{:character/name "Bart Simpson"} + {:character/name "Lisa Simpson"} + {:character/name "Maggie Simpson"}]}) + vec)) test-norm [{:norm :test-norm-2 :tx-data tx-data :tx-fn tx-fn}]] @@ -93,30 +93,46 @@ #:character{:name "Maggie Simpson"}]}] (d/pull-many (d/db conn) '[:character/name {:character/child [:character/name]}] margehomer))))) +(defn baz-test-fn-1 [_conn] + [{:baz "baz"}]) + +(defn baz-test-fn-2 [conn] + (-> (for [[eid value] (d/q '[:find ?e ?v + :where + [?e :baz ?v]] + (d/db conn))] + [:db/add eid + :baz (-> (string/replace value #" " "_") + keyword)]) + vec)) + +(deftest naming-and-sorting-test + (let [conn (create-test-db) + _ (sut/ensure-norms! conn "test/datahike/norm/resources")] + (testing "updated schema with docstring" + (is (= "baz" + (-> (d/schema (d/db conn)) + :baz + :db/doc)))) + (testing "all bazes keywordized" + (is (= :baz + (d/q '[:find ?v + :where + [_ :baz ?v]] + (d/db conn))))))) + (comment (def conn (create-test-db)) - (sut/ensure-norms! conn "test/resources") - (d/transact conn {:tx-data [{:character/name "Homer Simpson"} - {:character/name "Marge Simpson"}]}) - (def margehomer (-> (d/q '[:find [?e ...] - :where - [?e :character/name]] - (d/db conn)))) - (d/transact conn {:tx-data [{:db/doc "Simpsons children reference" - :db/ident :character/child - :db/valueType :db.type/ref - :db/cardinality :db.cardinality/many}]}) - (d/transact conn (-> (for [[eid] (d/q '[:find ?e - :where - [?e :character/name] - (or-join [?e] - [?e :character/name "Homer Simpson"] - [?e :character/name "Marge Simpson"])] - (d/db conn))] - {:db/id eid - :character/child [{:character/name "Bart Simpson"} - {:character/name "Lisa Simpson"} - {:character/name "Maggie Simpson"}]}) - vec)) + (def norm-list (sut/read-norm-files! "test/datahike/norm/resources/0001 a3 example.edn")) + (sut/ensure-norms! conn "test/datahike/norm/resources/0002-a4-example.edn") + (d/schema (d/db conn)) + (d/datoms (d/db conn) :eavt) + (d/transact conn [{:baz "baz"}]) + (d/q '[:find ?e ?a ?v + :where + [?e ?a ?v]] + (d/db conn)) + (d/transact conn {:tx-data (vec (concat [{:tx/norm :bazbaz}] + [{:baz "baz"}]))}) - (d/pull-many (d/db conn) '[:character/name {:character/child [:character/name]}] margehomer)) + ((eval 'datahike.norm.norm-test/baz-test-fn-1) conn)) diff --git a/test/datahike/norm/resources/0001 a3 example.edn b/test/datahike/norm/resources/0001 a3 example.edn new file mode 100644 index 000000000..296e9928a --- /dev/null +++ b/test/datahike/norm/resources/0001 a3 example.edn @@ -0,0 +1,4 @@ +{:tx-data [{:db/ident :baz + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm-test/baz-test-fn-1} diff --git a/test/datahike/norm/resources/0002-a4-example.edn b/test/datahike/norm/resources/0002-a4-example.edn new file mode 100644 index 000000000..af09a7542 --- /dev/null +++ b/test/datahike/norm/resources/0002-a4-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "baz" + :db/ident :baz + :db/valueType :db.type/keyword + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm-test/baz-test-fn-2} diff --git a/test/datahike/norm/resources/001-a1-example.edn b/test/datahike/norm/resources/001-a1-example.edn index 306c3193f..33a7a6b9a 100644 --- a/test/datahike/norm/resources/001-a1-example.edn +++ b/test/datahike/norm/resources/001-a1-example.edn @@ -3,4 +3,4 @@ :db/ident :foo :db/valueType :db.type/string :db/cardinality :db.cardinality/one}] - :tx-fn io.replikativ.garantie/neutral-fn} + :tx-fn datahike.norm.norm/neutral-fn} diff --git a/tests.edn b/tests.edn index 9af0d90b0..c2e1bd1e9 100644 --- a/tests.edn +++ b/tests.edn @@ -11,6 +11,8 @@ #_{:id :cljs :type :kaocha.type/cljs :ns-patterns ["datahike.test."]} + {:id :norm + :test-paths ["test/datahike/norm"]} {:id :integration :test-paths ["test/datahike/integration_test"]}] ;; More verbose than the default reporter, and with prettier errors From b25174d8bf0ab0b291e1f6e5479a38b9a29d12c4 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Wed, 1 Feb 2023 18:05:03 +0100 Subject: [PATCH 03/21] fixup! feat: schema migration --- src/datahike/norm/{norm.cljc => norm.clj} | 4 +- test/datahike/norm/norm_test.clj | 177 ++++++++---------- .../norm/resources/0001 a3 example.edn | 4 - .../norm/resources/0002-a4-example.edn | 5 - .../norm/resources/001-a1-example.edn | 5 +- .../norm/resources/002-a2-example.edn | 4 +- .../norm/resources/003-tx-fn-test.edn | 1 + .../resources/004-tx-data-and-tx-fn-test.edn | 5 + .../01-transact-basic-characters.edn | 2 + .../norm/resources/02 add occupation.edn | 5 + 10 files changed, 99 insertions(+), 113 deletions(-) rename src/datahike/norm/{norm.cljc => norm.clj} (95%) delete mode 100644 test/datahike/norm/resources/0001 a3 example.edn delete mode 100644 test/datahike/norm/resources/0002-a4-example.edn create mode 100644 test/datahike/norm/resources/003-tx-fn-test.edn create mode 100644 test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn create mode 100644 test/datahike/norm/resources/01-transact-basic-characters.edn create mode 100644 test/datahike/norm/resources/02 add occupation.edn diff --git a/src/datahike/norm/norm.cljc b/src/datahike/norm/norm.clj similarity index 95% rename from src/datahike/norm/norm.cljc rename to src/datahike/norm/norm.clj index 1276784a6..ee2943722 100644 --- a/src/datahike/norm/norm.cljc +++ b/src/datahike/norm/norm.clj @@ -53,9 +53,7 @@ (ensure-norms! conn (io/resource "migrations"))) ([conn migrations] (let [db (ensure-norm-attribute! conn) - norm-list (cond - (string? migrations) (read-norm-files! migrations) - (vector? migrations) migrations)] + norm-list (read-norm-files! migrations)] (log/info "Checking migrations ...") (doseq [{:keys [norm tx-data tx-fn] :or {tx-data [] diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 390872758..8c4c00d68 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -1,5 +1,5 @@ (ns datahike.norm.norm-test - (:require [clojure.test :refer [deftest is testing]] + (:require [clojure.test :refer [deftest is]] [clojure.string :as string] [datahike.api :as d] [datahike.norm.norm :as sut])) @@ -15,124 +15,109 @@ (deftest simple-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources")] - (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "foo", :ident :foo} - (-> (d/schema (d/db conn)) - :foo + _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") + _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") + schema (d/schema (d/db conn))] + (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Place of occupation", :ident :character/place-of-occupation} + (-> (schema :character/place-of-occupation) (dissoc :db/id)))) - (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Simpsons character name", :ident :character/name} - (-> (d/schema (d/db conn)) - :character/name + (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Simpsons character name", :ident :character/name, :db/unique :db.unique/identity} + (-> (schema :character/name) (dissoc :db/id)))) (is (= #:db{:ident :tx/norm, :valueType :db.type/keyword, :cardinality :db.cardinality/one} - (-> (d/schema (d/db conn)) - :tx/norm + (-> (schema :tx/norm) (dissoc :db/id)))))) +(defn tx-fn-test-fn [conn] + (-> (for [[eid value] (d/q '[:find ?e ?v + :where + [?e :character/place-of-occupation ?v]] + (d/db conn))] + [:db/add eid + :character/place-of-occupation (string/lower-case value)]) + vec)) + (deftest tx-fn-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources") - _ (d/transact conn {:tx-data [{:foo "upper-case"} - {:foo "Grossbuchstaben"}]}) - test-fn '(fn [conn] - (-> (for [[eid value] (d/q '[:find ?e ?v - :where - [?e :foo ?v]] - (d/db conn))] - [:db/add eid - :foo (string/upper-case value)]) - vec)) - test-norm [{:norm :test-norm-1, - :tx-fn test-fn}] - _ (sut/ensure-norms! conn test-norm)] - (is (= #{["GROSSBUCHSTABEN"] ["UPPER-CASE"]} + _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") + _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") + _ (d/transact conn {:tx-data [{:character/place-of-occupation "SPRINGFIELD ELEMENTARY SCHOOL"} + {:character/place-of-occupation "SPRINGFIELD NUCLEAR POWER PLANT"}]}) + _ (sut/ensure-norms! conn "test/datahike/norm/resources/003-tx-fn-test.edn")] + (is (= #{["springfield elementary school"] ["springfield nuclear power plant"]} (d/q '[:find ?v :where - [_ :foo ?v]] + [_ :character/place-of-occupation ?v]] (d/db conn)))))) -(deftest tx-and-fn-test +(defn tx-data-and-tx-fn-test-fn [conn] + (-> (for [[eid] + (d/q '[:find ?e + :where + [?e :character/name] + (or-join [?e] + [?e :character/name "Homer Simpson"] + [?e :character/name "Marge Simpson"])] + (d/db conn))] + {:db/id eid + :character/children [[:character/name "Bart Simpson"] + [:character/name "Lisa Simpson"] + [:character/name "Maggie Simpson"]]}) + vec)) + +(deftest tx-data-and-tx-fn-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources") + _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") + _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") + _ (sut/ensure-norms! conn "test/datahike/norm/resources/003-tx-fn-test.edn") _ (d/transact conn {:tx-data [{:character/name "Homer Simpson"} - {:character/name "Marge Simpson"}]}) + {:character/name "Marge Simpson"} + {:character/name "Bart Simpson"} + {:character/name "Lisa Simpson"} + {:character/name "Maggie Simpson"}]}) + _ (sut/ensure-norms! conn "test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn") margehomer (d/q '[:find [?e ...] :where - [?e :character/name]] - (d/db conn)) - tx-data [{:db/doc "Simpsons children reference" - :db/ident :character/child - :db/valueType :db.type/ref - :db/cardinality :db.cardinality/many}] - tx-fn '(fn [conn] - (-> (for [[eid] (d/q '[:find ?e - :where - [?e :character/name] - (or-join [?e] - [?e :character/name "Homer Simpson"] - [?e :character/name "Marge Simpson"])] - (d/db conn))] - {:db/id eid - :character/child [{:character/name "Bart Simpson"} - {:character/name "Lisa Simpson"} - {:character/name "Maggie Simpson"}]}) - vec)) - test-norm [{:norm :test-norm-2 - :tx-data tx-data - :tx-fn tx-fn}]] - (sut/ensure-norms! conn test-norm) - (is (= [#:character{:name "Marge Simpson", - :child + [?e :character/name] + (or-join [?e] + [?e :character/name "Homer Simpson"] + [?e :character/name "Marge Simpson"])] + (d/db conn))] + (is (= [#:character{:name "Homer Simpson", + :children [#:character{:name "Bart Simpson"} #:character{:name "Lisa Simpson"} #:character{:name "Maggie Simpson"}]} - #:character{:name "Homer Simpson", - :child + #:character{:name "Marge Simpson", + :children [#:character{:name "Bart Simpson"} #:character{:name "Lisa Simpson"} #:character{:name "Maggie Simpson"}]}] - (d/pull-many (d/db conn) '[:character/name {:character/child [:character/name]}] margehomer))))) - -(defn baz-test-fn-1 [_conn] - [{:baz "baz"}]) + (d/pull-many (d/db conn) '[:character/name {:character/children [:character/name]}] margehomer))))) -(defn baz-test-fn-2 [conn] - (-> (for [[eid value] (d/q '[:find ?e ?v - :where - [?e :baz ?v]] - (d/db conn))] - [:db/add eid - :baz (-> (string/replace value #" " "_") - keyword)]) +(defn naming-and-sorting-test-fn [conn] + (-> (for [[eid] (d/q '[:find ?e + :where + [?e :character/name] + (or-join [?e] + [?e :character/name "Bart Simpson"] + [?e :character/name "Lisa Simpson"])] + (d/db conn))] + {:db/id eid + :character/occupation :student}) vec)) (deftest naming-and-sorting-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources")] - (testing "updated schema with docstring" - (is (= "baz" - (-> (d/schema (d/db conn)) - :baz - :db/doc)))) - (testing "all bazes keywordized" - (is (= :baz - (d/q '[:find ?v - :where - [_ :baz ?v]] - (d/db conn))))))) - -(comment - (def conn (create-test-db)) - (def norm-list (sut/read-norm-files! "test/datahike/norm/resources/0001 a3 example.edn")) - (sut/ensure-norms! conn "test/datahike/norm/resources/0002-a4-example.edn") - (d/schema (d/db conn)) - (d/datoms (d/db conn) :eavt) - (d/transact conn [{:baz "baz"}]) - (d/q '[:find ?e ?a ?v - :where - [?e ?a ?v]] - (d/db conn)) - (d/transact conn {:tx-data (vec (concat [{:tx/norm :bazbaz}] - [{:baz "baz"}]))}) - - ((eval 'datahike.norm.norm-test/baz-test-fn-1) conn)) + _ (sut/ensure-norms! conn "test/datahike/norm/resources") + lisabart (d/q '[:find [?e ...] + :where + [?e :character/occupation :student]] + (d/db conn))] + (is (= [{:db/id 10, + :character/name "Bart Simpson", + :character/occupation :student} + {:db/id 11, + :character/name "Lisa Simpson", + :character/occupation :student}] + (d/pull-many (d/db conn) '[*] lisabart))))) diff --git a/test/datahike/norm/resources/0001 a3 example.edn b/test/datahike/norm/resources/0001 a3 example.edn deleted file mode 100644 index 296e9928a..000000000 --- a/test/datahike/norm/resources/0001 a3 example.edn +++ /dev/null @@ -1,4 +0,0 @@ -{:tx-data [{:db/ident :baz - :db/valueType :db.type/string - :db/cardinality :db.cardinality/one}] - :tx-fn datahike.norm.norm-test/baz-test-fn-1} diff --git a/test/datahike/norm/resources/0002-a4-example.edn b/test/datahike/norm/resources/0002-a4-example.edn deleted file mode 100644 index af09a7542..000000000 --- a/test/datahike/norm/resources/0002-a4-example.edn +++ /dev/null @@ -1,5 +0,0 @@ -{:tx-data [{:db/doc "baz" - :db/ident :baz - :db/valueType :db.type/keyword - :db/cardinality :db.cardinality/one}] - :tx-fn datahike.norm.norm-test/baz-test-fn-2} diff --git a/test/datahike/norm/resources/001-a1-example.edn b/test/datahike/norm/resources/001-a1-example.edn index 33a7a6b9a..53b124a52 100644 --- a/test/datahike/norm/resources/001-a1-example.edn +++ b/test/datahike/norm/resources/001-a1-example.edn @@ -1,6 +1,5 @@ -{:norm :a1-example - :tx-data [{:db/doc "foo" - :db/ident :foo +{:tx-data [{:db/doc "Place of occupation" + :db/ident :character/place-of-occupation :db/valueType :db.type/string :db/cardinality :db.cardinality/one}] :tx-fn datahike.norm.norm/neutral-fn} diff --git a/test/datahike/norm/resources/002-a2-example.edn b/test/datahike/norm/resources/002-a2-example.edn index 97801f778..3c5597a60 100644 --- a/test/datahike/norm/resources/002-a2-example.edn +++ b/test/datahike/norm/resources/002-a2-example.edn @@ -1,5 +1,5 @@ -{:norm :a2-example - :tx-data [{:db/doc "Simpsons character name" +{:tx-data [{:db/doc "Simpsons character name" :db/ident :character/name + :db/unique :db.unique/identity :db/valueType :db.type/string :db/cardinality :db.cardinality/one}]} diff --git a/test/datahike/norm/resources/003-tx-fn-test.edn b/test/datahike/norm/resources/003-tx-fn-test.edn new file mode 100644 index 000000000..5f99e0470 --- /dev/null +++ b/test/datahike/norm/resources/003-tx-fn-test.edn @@ -0,0 +1 @@ +{:tx-fn datahike.norm.norm-test/tx-fn-test-fn} diff --git a/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn b/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn new file mode 100644 index 000000000..0e96ef2b9 --- /dev/null +++ b/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Simpsons children reference" + :db/ident :character/children + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many}] + :tx-fn datahike.norm.norm-test/tx-data-and-tx-fn-test-fn} diff --git a/test/datahike/norm/resources/01-transact-basic-characters.edn b/test/datahike/norm/resources/01-transact-basic-characters.edn new file mode 100644 index 000000000..365c66926 --- /dev/null +++ b/test/datahike/norm/resources/01-transact-basic-characters.edn @@ -0,0 +1,2 @@ +{:tx-data [{:character/name "Bart Simpson"} + {:character/name "Lisa Simpson"}]} diff --git a/test/datahike/norm/resources/02 add occupation.edn b/test/datahike/norm/resources/02 add occupation.edn new file mode 100644 index 000000000..342768b85 --- /dev/null +++ b/test/datahike/norm/resources/02 add occupation.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Occupation" + :db/ident :character/occupation + :db/valueType :db.type/keyword + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm-test/naming-and-sorting-test-fn} From a4fd404b65678ed984b40dea706898acc566340d Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Thu, 2 Feb 2023 16:34:50 +0100 Subject: [PATCH 04/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 60 ++++++++++++++-------- test/datahike/norm/norm_test.clj | 5 ++ test/datahike/norm/resources/checksums.edn | 1 + 3 files changed, 46 insertions(+), 20 deletions(-) create mode 100644 test/datahike/norm/resources/checksums.edn diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index ee2943722..b622ff422 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -3,7 +3,9 @@ [clojure.java.io :as io] [clojure.string :as string] [taoensso.timbre :as log] - [datahike.api :as d])) + [datahike.api :as d]) + (:import + [java.security MessageDigest DigestInputStream])) (defn attribute-installed? [conn attr] (some? (d/entity @conn [:db/ident attr]))) @@ -16,13 +18,12 @@ @conn)) (defn norm-installed? [db norm] - (->> {:query '[:find (count ?t) + (->> {:query '[:find (count ?t) . :in $ ?tn :where [_ :tx/norm ?tn ?t]] :args [db norm]} d/q - first some?)) (defn read-norm-files! [norms-folder] @@ -35,11 +36,10 @@ (-> (.getPath migration-file) slurp read-string - (update :norm (fn [norm] (or norm - (-> (.getName migration-file) - (string/replace #" " "_") - (string/replace #"\.edn" "") - keyword))))))))] + (assoc :norm (-> (.getName migration-file) + (string/replace #" " "_") + (string/replace #"\.edn" "") + keyword))))))] (sort-by :norm (into [] xf migration-files))) (throw (ex-info @@ -51,9 +51,9 @@ (defn ensure-norms! ([conn] (ensure-norms! conn (io/resource "migrations"))) - ([conn migrations] + ([conn norms-folder] (let [db (ensure-norm-attribute! conn) - norm-list (read-norm-files! migrations)] + norm-list (read-norm-files! norms-folder)] (log/info "Checking migrations ...") (doseq [{:keys [norm tx-data tx-fn] :or {tx-data [] @@ -67,16 +67,36 @@ ((eval tx-fn) conn)))}) (log/info "Done"))))))) +(defn update-checksums! + ([^String filename] + (update-checksums! filename (io/resource "migrations"))) + ([^String filename ^String norms-folder] + (let [folder (io/file norms-folder)]))) + (comment - (d/delete-database {:store {:backend :file - :path "/tmp/file-example"}}) - (d/create-database {:store {:backend :file - :path "/tmp/file-example"}}) - (def conn (d/connect {:store {:backend :file - :path "/tmp/file-example"}})) - (ensure-norms! conn "test/resources") - (def norm-list (read-norm-files! "test/datahike/norm/resources")) - (norm-installed? (d/db conn) (:norm (first norm-list))) - (d/transact conn {:tx-data [{:foo "foo"}]})) + (def norms-folder "test/datahike/norm/resources") + (let [folder (io/file norms-folder)] + (if (.exists folder) + (->> (file-seq folder) + (filter #(re-find #".edn" (.getPath %))) + (map #(.getName %)) + sort) + (throw + (ex-info + (format "Norms folder %s does not exist." "foo") + {:folder "foo"})))) + + + (not (.exists (io/file (str norms-folder "checksums.edn")))) + (spit (str norms-folder "/" "checksums.edn") {:foo "foo"}) + (def md (MessageDigest/getInstance "SHA-256")) + (.checksum "test/datahike/norm/resources/checksums.edn" md) + (import '[java.util Base64]) + (-> (io/input-stream (str norms-folder "/" "checksums.edn")) + (DigestInputStream. md) + .getMessageDigest + .digest + (#(map (partial format "%02x") %)) + (#(string/join "" %)))) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 8c4c00d68..36ff4fa34 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -121,3 +121,8 @@ :character/name "Lisa Simpson", :character/occupation :student}] (d/pull-many (d/db conn) '[*] lisabart))))) + +(comment + (def conn (create-test-db)) + (sut/ensure-norms! conn "test/datahike/norm/resources") + (sut/norm-installed? (d/db conn) :003-tx-fn-test)) diff --git a/test/datahike/norm/resources/checksums.edn b/test/datahike/norm/resources/checksums.edn new file mode 100644 index 000000000..13398f93e --- /dev/null +++ b/test/datahike/norm/resources/checksums.edn @@ -0,0 +1 @@ +{:foo "foo"} \ No newline at end of file From 99e33d6618ae9d34f5aed0b109d2b3068844755a Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 3 Feb 2023 16:19:57 +0100 Subject: [PATCH 05/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 149 ++++++++++++--------- test/datahike/norm/norm_test.clj | 32 +++-- test/datahike/norm/resources/checksums.edn | 13 +- 3 files changed, 122 insertions(+), 72 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index b622ff422..0aaea3153 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -2,22 +2,27 @@ (:require [clojure.java.io :as io] [clojure.string :as string] + [clojure.pprint :as pp] + [clojure.data :as data] + [clojure.edn :as edn] [taoensso.timbre :as log] [datahike.api :as d]) (:import - [java.security MessageDigest DigestInputStream])) + [java.security MessageDigest DigestInputStream] + [java.io File] + [java.nio.file Files])) -(defn attribute-installed? [conn attr] +(defn- attribute-installed? [conn attr] (some? (d/entity @conn [:db/ident attr]))) -(defn ensure-norm-attribute! [conn] +(defn- ensure-norm-attribute! [conn] (if-not (attribute-installed? conn :tx/norm) (:db-after (d/transact conn {:tx-data [{:db/ident :tx/norm :db/valueType :db.type/keyword :db/cardinality :db.cardinality/one}]})) @conn)) -(defn norm-installed? [db norm] +(defn- norm-installed? [db norm] (->> {:query '[:find (count ?t) . :in $ ?tn :where @@ -26,77 +31,101 @@ d/q some?)) -(defn read-norm-files! [norms-folder] +(defn- read-norm-files! [norms-folder] (let [folder (io/file norms-folder)] (if (.exists folder) (let [migration-files (file-seq folder) xf (comp - (filter #(re-find #".edn" (.getPath %))) - (map (fn [migration-file] - (-> (.getPath migration-file) - slurp - read-string - (assoc :norm (-> (.getName migration-file) - (string/replace #" " "_") - (string/replace #"\.edn" "") - keyword))))))] + (filter #(not (.isDirectory %))) + (filter #(re-find #".edn" (.getPath %))) + (filter #(not= "checksums.edn" (.getName %))) + (map (fn [migration-file] + (-> (.getPath migration-file) + slurp + read-string + (assoc :norm (-> (.getName migration-file) + (string/replace #" " "-") + (string/replace #"\.edn" "") + keyword))))))] (sort-by :norm (into [] xf migration-files))) (throw (ex-info (format "Norms folder %s does not exist." norms-folder) {:folder norms-folder}))))) -(defn neutral-fn [_] []) - -(defn ensure-norms! - ([conn] - (ensure-norms! conn (io/resource "migrations"))) - ([conn norms-folder] - (let [db (ensure-norm-attribute! conn) - norm-list (read-norm-files! norms-folder)] - (log/info "Checking migrations ...") - (doseq [{:keys [norm tx-data tx-fn] - :or {tx-data [] - tx-fn #'neutral-fn}} - norm-list] - (log/info "Checking migration" norm) - (when-not (norm-installed? db norm) - (log/info "Run migration" norm) - (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] - tx-data - ((eval tx-fn) conn)))}) - (log/info "Done"))))))) - -(defn update-checksums! - ([^String filename] - (update-checksums! filename (io/resource "migrations"))) - ([^String filename ^String norms-folder] - (let [folder (io/file norms-folder)]))) - -(comment - (def norms-folder "test/datahike/norm/resources") +(defn- compute-checksums [norms-folder] (let [folder (io/file norms-folder)] (if (.exists folder) - (->> (file-seq folder) - (filter #(re-find #".edn" (.getPath %))) - (map #(.getName %)) - sort) + (let [md (MessageDigest/getInstance "SHA-256") + migration-files (.listFiles folder) + xf (-> (comp + (filter #(not (.isDirectory %))) + (filter #(re-find #".edn" (.getPath %))) + (filter #(not= "checksums.edn" (.getName %))))) + filenames (sort (into [] xf migration-files))] + (->> (for [f filenames] + {(-> (.getName f) + (string/replace #" " "-") + (keyword)) + (->> (Files/readAllBytes (.toPath f)) + (.digest md) + (BigInteger. 1) + (format "%064x"))}) + (into {}))) (throw (ex-info - (format "Norms folder %s does not exist." "foo") - {:folder "foo"})))) + (format "Norms folder %s does not exist." norms-folder) + {:folder norms-folder}))))) + +(defn- check-correctness [norms-folder] + (let [diff (data/diff (compute-checksums norms-folder) + (edn/read-string (slurp (str norms-folder "/checksums.edn"))))] + (when-not (every? nil? (butlast diff)) + diff))) + +(defn neutral-fn [_] []) +(defn- ensure-norms [conn norms-folder] + (let [db (ensure-norm-attribute! conn) + norm-list (read-norm-files! norms-folder)] + (log/info "Checking migrations ...") + (doseq [{:keys [norm tx-data tx-fn] + :or {tx-data [] + tx-fn #'neutral-fn}} + norm-list] + (log/info "Checking migration" norm) + (when-not (norm-installed? db norm) + (log/info "Run migration" norm) + (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] + tx-data + ((eval tx-fn) conn)))}) + (log/info "Done")))))) - (not (.exists (io/file (str norms-folder "checksums.edn")))) +(defn ensure-norms! + ([conn] + (ensure-norms! conn (io/resource "migrations"))) + ([conn norms-folder] + (if-let [diff (check-correctness norms-folder)] + (throw + (ex-info "Deviation of the checksums found. Migration aborted." + {:diff diff})) + (ensure-norms conn norms-folder)))) - (spit (str norms-folder "/" "checksums.edn") {:foo "foo"}) +(defn update-checksums! + ([] + (update-checksums! (io/resource "migrations"))) + ([^String norms-folder] + (-> (compute-checksums norms-folder) + (#(spit (io/file (str norms-folder "/" "checksums.edn")) + (with-out-str (pp/pprint %))))))) - (def md (MessageDigest/getInstance "SHA-256")) - (.checksum "test/datahike/norm/resources/checksums.edn" md) - (import '[java.util Base64]) - (-> (io/input-stream (str norms-folder "/" "checksums.edn")) - (DigestInputStream. md) - .getMessageDigest - .digest - (#(map (partial format "%02x") %)) - (#(string/join "" %)))) +(comment + (require '[datahike.norm.norm-test :refer [create-test-db]]) + (def conn (create-test-db)) + (def norms-folder "test/datahike/norm/resources") + (update-checksums! norms-folder) + (ensure-norms! conn norms-folder) + (compute-checksums norms-folder) + (check-correctness norms-folder) + (.listFiles (io/file norms-folder)) + (file-seq (io/file norms-folder))) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 36ff4fa34..23f634a8a 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -13,10 +13,12 @@ (d/connect {:store {:backend :mem :id id}}))) +(def ensure-norms #'sut/ensure-norms) + (deftest simple-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") - _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") schema (d/schema (d/db conn))] (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Place of occupation", :ident :character/place-of-occupation} (-> (schema :character/place-of-occupation) @@ -28,6 +30,13 @@ (-> (schema :tx/norm) (dissoc :db/id)))))) +(comment + (def conn (create-test-db)) + (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") + (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") + (def schema (d/schema (d/db conn)))) + + (defn tx-fn-test-fn [conn] (-> (for [[eid value] (d/q '[:find ?e ?v :where @@ -39,11 +48,11 @@ (deftest tx-fn-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") - _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") _ (d/transact conn {:tx-data [{:character/place-of-occupation "SPRINGFIELD ELEMENTARY SCHOOL"} {:character/place-of-occupation "SPRINGFIELD NUCLEAR POWER PLANT"}]}) - _ (sut/ensure-norms! conn "test/datahike/norm/resources/003-tx-fn-test.edn")] + _ (ensure-norms conn "test/datahike/norm/resources/003-tx-fn-test.edn")] (is (= #{["springfield elementary school"] ["springfield nuclear power plant"]} (d/q '[:find ?v :where @@ -67,15 +76,15 @@ (deftest tx-data-and-tx-fn-test (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources/001-a1-example.edn") - _ (sut/ensure-norms! conn "test/datahike/norm/resources/002-a2-example.edn") - _ (sut/ensure-norms! conn "test/datahike/norm/resources/003-tx-fn-test.edn") + _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") + _ (ensure-norms conn "test/datahike/norm/resources/003-tx-fn-test.edn") _ (d/transact conn {:tx-data [{:character/name "Homer Simpson"} {:character/name "Marge Simpson"} {:character/name "Bart Simpson"} {:character/name "Lisa Simpson"} {:character/name "Maggie Simpson"}]}) - _ (sut/ensure-norms! conn "test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn") + _ (ensure-norms conn "test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn") margehomer (d/q '[:find [?e ...] :where [?e :character/name] @@ -123,6 +132,7 @@ (d/pull-many (d/db conn) '[*] lisabart))))) (comment + (def norms-folder "test/datahike/norm/resources") (def conn (create-test-db)) - (sut/ensure-norms! conn "test/datahike/norm/resources") - (sut/norm-installed? (d/db conn) :003-tx-fn-test)) + (sut/ensure-norms! conn) + (#'(sut/read-norm-files!) norms-folder)) diff --git a/test/datahike/norm/resources/checksums.edn b/test/datahike/norm/resources/checksums.edn index 13398f93e..a6d7fffa0 100644 --- a/test/datahike/norm/resources/checksums.edn +++ b/test/datahike/norm/resources/checksums.edn @@ -1 +1,12 @@ -{:foo "foo"} \ No newline at end of file +{:001-a1-example.edn + "a7407fffb9c1ea423164add02c0c6b951d810068fa196b062f64f986a54d7933", + :002-a2-example.edn + "9454f765f71069519a2514636873f562266d1e0569f8f91cffe02ae6ddf38763", + :003-tx-fn-test.edn + "ac377554bbc6cd2ee0b0d8a91b29debf1c84554dc1713fb8fc793bca0f881bed", + :004-tx-data-and-tx-fn-test.edn + "7c6cb7f9de66ca1368ddc6e7cf4e95f87d2561b872b1d5582d0e594859bee25f", + :01-transact-basic-characters.edn + "f234b20fd3fe4744676be80d655f2c066142de3ee925cae6680a175481ab24be", + :02-add-occupation.edn + "d499fc826f36a9820814eb9836f3e00d633159500083cdd161742646f2789194"} From 19200fd5a7641be7e4d017a8b46267e69a2ef550 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 3 Feb 2023 16:51:12 +0100 Subject: [PATCH 06/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 17 +++-------------- test/datahike/norm/norm_test.clj | 6 ------ 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 0aaea3153..2c3c8f117 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -57,7 +57,7 @@ (let [folder (io/file norms-folder)] (if (.exists folder) (let [md (MessageDigest/getInstance "SHA-256") - migration-files (.listFiles folder) + migration-files (file-seq folder) xf (-> (comp (filter #(not (.isDirectory %))) (filter #(re-find #".edn" (.getPath %))) @@ -77,7 +77,7 @@ (format "Norms folder %s does not exist." norms-folder) {:folder norms-folder}))))) -(defn- check-correctness [norms-folder] +(defn- verify-checksums [norms-folder] (let [diff (data/diff (compute-checksums norms-folder) (edn/read-string (slurp (str norms-folder "/checksums.edn"))))] (when-not (every? nil? (butlast diff)) @@ -105,7 +105,7 @@ ([conn] (ensure-norms! conn (io/resource "migrations"))) ([conn norms-folder] - (if-let [diff (check-correctness norms-folder)] + (if-let [diff (verify-checksums norms-folder)] (throw (ex-info "Deviation of the checksums found. Migration aborted." {:diff diff})) @@ -118,14 +118,3 @@ (-> (compute-checksums norms-folder) (#(spit (io/file (str norms-folder "/" "checksums.edn")) (with-out-str (pp/pprint %))))))) - -(comment - (require '[datahike.norm.norm-test :refer [create-test-db]]) - (def conn (create-test-db)) - (def norms-folder "test/datahike/norm/resources") - (update-checksums! norms-folder) - (ensure-norms! conn norms-folder) - (compute-checksums norms-folder) - (check-correctness norms-folder) - (.listFiles (io/file norms-folder)) - (file-seq (io/file norms-folder))) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 23f634a8a..9100252f6 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -130,9 +130,3 @@ :character/name "Lisa Simpson", :character/occupation :student}] (d/pull-many (d/db conn) '[*] lisabart))))) - -(comment - (def norms-folder "test/datahike/norm/resources") - (def conn (create-test-db)) - (sut/ensure-norms! conn) - (#'(sut/read-norm-files!) norms-folder)) From ffe684166b7b2d4ecf087799ddb321012fd44b0f Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 3 Feb 2023 17:42:33 +0100 Subject: [PATCH 07/21] fixup! feat: schema migration --- doc/schema-migration.md | 24 +++++++++++++++++++ .../resources/004-tx-data-and-tx-fn-test.edn | 6 ++--- 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 doc/schema-migration.md diff --git a/doc/schema-migration.md b/doc/schema-migration.md new file mode 100644 index 000000000..2c1663b4a --- /dev/null +++ b/doc/schema-migration.md @@ -0,0 +1,24 @@ +# Schema Migration + +Schema migration with Datahike is the evolution of your current schema into a future schema. When we are speaking of changes to your schema, these should always add new definitions and never change existing definitions. In case you want to change existing data to a new format you will have to create a new schema and transact your old data transformed again. + +## How to migrate + +First create a folder of your choice, for now let's call it `migrations`. In this folder you create a new file with an edn-extension like `001-my-migration.edn`. Preferably you name the file beginning with a number. Please be aware that the name of your file will be the id of your migration. Taking into account that you might create some more migrations in the future you might want to left-pad the name with zeros to keep a proper sorting. Keep in mind that your migrations are transacted sorted after your chosen ids one after another. + +Second write the transactions itself into your newly created file. The content of the file needs to be an edn-map with one or both of the keys `:tx-data` and `tx-fn`. `:tx-data` is just transaction data in the form of a vector of datoms, `:tx-fn` is a funtion that you can run during the migration to migrate data for example. This function needs to be from a loadable namespace and will be evaled during preparation and needs to return transactions. These will be transacted with `:tx-data` together in one transaction. + +Example of a migration: +```clojure +{:tx-data [{:db/doc "Place of occupation" + :db/ident :character/place-of-occupation + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm/neutral-fn} + ``` + +Third, when you are sufficiently confident that your migration will work you usually will want to store it into some kind of version control system. To avoid conflicts with fellow colleagues we implemented a security net. Run the function `update-checksums!` from the `datahike.norm.norm` namespace to create or update a `checksums.edn` file. This file contains the names and checksums of your migration-files. In case a colleague of yours checked in a migration that you have not been aware of, your VCS should avoid merging the conflicting `checksums.edn` files. + +Last, run the `datahike.norm.norm/ensure-norms!` function to run your migrations. For each migration that already ran there will be a `:tx/norm` attribute stored with the name of your migration so it doesn't have to run twice. + +Be aware that your chosen migration-folder will include all subfolders for migrations. Don't store other things in your migration-folder than your migrations! diff --git a/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn b/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn index 0e96ef2b9..bd9f4d98e 100644 --- a/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn +++ b/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn @@ -1,5 +1,5 @@ -{:tx-data [{:db/doc "Simpsons children reference" - :db/ident :character/children - :db/valueType :db.type/ref +{:tx-data [{:db/doc "Simpsons children reference" + :db/ident :character/children + :db/valueType :db.type/ref :db/cardinality :db.cardinality/many}] :tx-fn datahike.norm.norm-test/tx-data-and-tx-fn-test-fn} From d10a3c1301b3d822431d4718b154f2a0c46f9464 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 3 Feb 2023 17:43:38 +0100 Subject: [PATCH 08/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 52 ++++++++++++++++---------------- test/datahike/norm/norm_test.clj | 1 - 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 2c3c8f117..d11ac958a 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -8,9 +8,9 @@ [taoensso.timbre :as log] [datahike.api :as d]) (:import - [java.security MessageDigest DigestInputStream] - [java.io File] - [java.nio.file Files])) + [java.security MessageDigest DigestInputStream] + [java.io File] + [java.nio.file Files])) (defn- attribute-installed? [conn attr] (some? (d/entity @conn [:db/ident attr]))) @@ -36,17 +36,17 @@ (if (.exists folder) (let [migration-files (file-seq folder) xf (comp - (filter #(not (.isDirectory %))) - (filter #(re-find #".edn" (.getPath %))) - (filter #(not= "checksums.edn" (.getName %))) - (map (fn [migration-file] - (-> (.getPath migration-file) - slurp - read-string - (assoc :norm (-> (.getName migration-file) - (string/replace #" " "-") - (string/replace #"\.edn" "") - keyword))))))] + (filter #(not (.isDirectory %))) + (filter #(re-find #".edn" (.getPath %))) + (filter #(not= "checksums.edn" (.getName %))) + (map (fn [migration-file] + (-> (.getPath migration-file) + slurp + read-string + (assoc :norm (-> (.getName migration-file) + (string/replace #" " "-") + (string/replace #"\.edn" "") + keyword))))))] (sort-by :norm (into [] xf migration-files))) (throw (ex-info @@ -102,19 +102,19 @@ (log/info "Done")))))) (defn ensure-norms! - ([conn] - (ensure-norms! conn (io/resource "migrations"))) - ([conn norms-folder] - (if-let [diff (verify-checksums norms-folder)] - (throw + ([conn] + (ensure-norms! conn (io/resource "migrations"))) + ([conn norms-folder] + (if-let [diff (verify-checksums norms-folder)] + (throw (ex-info "Deviation of the checksums found. Migration aborted." {:diff diff})) - (ensure-norms conn norms-folder)))) + (ensure-norms conn norms-folder)))) (defn update-checksums! - ([] - (update-checksums! (io/resource "migrations"))) - ([^String norms-folder] - (-> (compute-checksums norms-folder) - (#(spit (io/file (str norms-folder "/" "checksums.edn")) - (with-out-str (pp/pprint %))))))) + ([] + (update-checksums! (io/resource "migrations"))) + ([^String norms-folder] + (-> (compute-checksums norms-folder) + (#(spit (io/file (str norms-folder "/" "checksums.edn")) + (with-out-str (pp/pprint %))))))) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 9100252f6..b128a437d 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -36,7 +36,6 @@ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") (def schema (d/schema (d/db conn)))) - (defn tx-fn-test-fn [conn] (-> (for [[eid value] (d/q '[:find ?e ?v :where From 4b23ab110d7e95952b806366c0c3bc7806c11ad1 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 3 Feb 2023 18:30:08 +0100 Subject: [PATCH 09/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index d11ac958a..752f63b73 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -5,6 +5,7 @@ [clojure.pprint :as pp] [clojure.data :as data] [clojure.edn :as edn] + [clojure.spec.alpha :as s] [taoensso.timbre :as log] [datahike.api :as d]) (:import @@ -63,7 +64,7 @@ (filter #(re-find #".edn" (.getPath %))) (filter #(not= "checksums.edn" (.getName %))))) filenames (sort (into [] xf migration-files))] - (->> (for [f filenames] + (->> (doseq [f filenames] {(-> (.getName f) (string/replace #" " "-") (keyword)) @@ -83,6 +84,16 @@ (when-not (every? nil? (butlast diff)) diff))) +(s/def ::tx-data vector?) +(s/def ::tx-fn symbol?) +(s/def ::norm-map (s/keys :opt-un [::tx-data ::tx-fn])) +(defn- validate-norm [norm] + (if (s/valid? ::norm-map norm) + (log/debug "Norm validated" {:norm-map norm}) + (let [res (s/explain-data ::norm-map norm)] + (throw + (ex-info "Invalid norm" {:validation-error res}))))) + (defn neutral-fn [_] []) (defn- ensure-norms [conn norms-folder] @@ -90,10 +101,12 @@ norm-list (read-norm-files! norms-folder)] (log/info "Checking migrations ...") (doseq [{:keys [norm tx-data tx-fn] + :as norm-map :or {tx-data [] tx-fn #'neutral-fn}} norm-list] (log/info "Checking migration" norm) + (validate-norm norm-map) (when-not (norm-installed? db norm) (log/info "Run migration" norm) (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] From dad5fa3d310c07d5a60b6805484f4f8eb0b4ef39 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Tue, 7 Feb 2023 17:15:03 +0100 Subject: [PATCH 10/21] fixup! feat: schema migration --- doc/schema-migration.md | 2 +- src/datahike/norm/norm.clj | 131 ++++++++++++++++++++++--------- test/datahike/norm/norm_test.clj | 3 + 3 files changed, 98 insertions(+), 38 deletions(-) diff --git a/doc/schema-migration.md b/doc/schema-migration.md index 2c1663b4a..89e5af21c 100644 --- a/doc/schema-migration.md +++ b/doc/schema-migration.md @@ -1,6 +1,6 @@ # Schema Migration -Schema migration with Datahike is the evolution of your current schema into a future schema. When we are speaking of changes to your schema, these should always add new definitions and never change existing definitions. In case you want to change existing data to a new format you will have to create a new schema and transact your old data transformed again. +Schema migration with Datahike is the evolution of your current schema into a future schema. When we are speaking of changes to your schema, these should always add new definitions and never change existing definitions. In case you want to change existing data to a new format you will have to create a new schema and transact your existing data transformed again. ## How to migrate diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 752f63b73..ce0fc242b 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -7,11 +7,15 @@ [clojure.edn :as edn] [clojure.spec.alpha :as s] [taoensso.timbre :as log] - [datahike.api :as d]) + [datahike.api :as d] + [hasch.core :as h] + [hasch.platform :as hp]) (:import [java.security MessageDigest DigestInputStream] [java.io File] - [java.nio.file Files])) + [java.nio.file Files] + [java.util.jar JarFile] + [java.net URL])) (defn- attribute-installed? [conn attr] (some? (d/entity @conn [:db/ident attr]))) @@ -32,51 +36,81 @@ d/q some?)) -(defn- read-norm-files! [norms-folder] +(defmulti ^:private retrieve-file-list + (fn [file-or-resource] + (println "FILEORRESOURCE: " file-or-resource) + (cond + (instance? File file-or-resource) :file + (instance? URL file-or-resource) :resource))) + +(defmethod ^:private retrieve-file-list :file [norms-folder] (let [folder (io/file norms-folder)] (if (.exists folder) (let [migration-files (file-seq folder) xf (comp - (filter #(not (.isDirectory %))) + (filter #(.isFile %)) (filter #(re-find #".edn" (.getPath %))) - (filter #(not= "checksums.edn" (.getName %))) - (map (fn [migration-file] - (-> (.getPath migration-file) - slurp - read-string - (assoc :norm (-> (.getName migration-file) - (string/replace #" " "-") - (string/replace #"\.edn" "") - keyword))))))] - (sort-by :norm (into [] xf migration-files))) + (filter #(not= "checksums.edn" (.getName %))))] + (into [] xf migration-files)) (throw (ex-info (format "Norms folder %s does not exist." norms-folder) {:folder norms-folder}))))) +(defmethod ^:private retrieve-file-list :resource [resource] + (if resource + (let [abs-path (.getPath resource) + migration-files (if (string/starts-with? abs-path "file:") + (let [jar-path (-> abs-path (string/split #"!" 2) first (subs 5)) + jar-file (JarFile. jar-path)] + (->> (enumeration-seq (.entries jar-file)) + (filter #(and (string/starts-with? % abs-path) + (not (string/ends-with? % "/")))))) + (file-seq (io/file abs-path))) + xf (comp + (filter #(.isFile (io/file %))) + (filter #(re-find #".edn" (.getPath %))) + (filter #(not= "checksums.edn" (.getName %))))] + (into [] xf migration-files)) + (throw + (ex-info + "Resource does not exist." + {:resource (str resource)})))) + +(comment + (def norms-folder "test/datahike/norm/resources") + (retrieve-file-list (io/file norms-folder)) + (retrieve-file-list (io/resource "migrations")) + *e + (-> (.getPath (io/resource "migrations")) (string/split #"!" 2) first (subs 5)) + (require '[datahike.norm.norm-test :as t]) + (def conn (t/create-test-db)) + (str (io/resource "migrations"))) + +(defn- read-norm-files! [norms-folder] + (let [file-list (retrieve-file-list norms-folder)] + (->> (map (fn [migration-file] + (-> (.getPath migration-file) + slurp + read-string + (assoc :norm (-> (.getName migration-file) + (string/replace #" " "-") + (string/replace #"\.edn" "") + keyword)))) + file-list) + (sort-by :norm)))) + (defn- compute-checksums [norms-folder] - (let [folder (io/file norms-folder)] - (if (.exists folder) - (let [md (MessageDigest/getInstance "SHA-256") - migration-files (file-seq folder) - xf (-> (comp - (filter #(not (.isDirectory %))) - (filter #(re-find #".edn" (.getPath %))) - (filter #(not= "checksums.edn" (.getName %))))) - filenames (sort (into [] xf migration-files))] - (->> (doseq [f filenames] - {(-> (.getName f) - (string/replace #" " "-") - (keyword)) - (->> (Files/readAllBytes (.toPath f)) - (.digest md) - (BigInteger. 1) - (format "%064x"))}) - (into {}))) - (throw - (ex-info - (format "Norms folder %s does not exist." norms-folder) - {:folder norms-folder}))))) + (->> (retrieve-file-list norms-folder) + sort + (reduce (fn [m f] + (assoc m + (-> (.getName f) + (string/replace #" " "-") + (keyword)) + (-> (h/edn-hash (slurp f)) + (hp/hash->str)))) + {}))) (defn- verify-checksums [norms-folder] (let [diff (data/diff (compute-checksums norms-folder) @@ -115,6 +149,16 @@ (log/info "Done")))))) (defn ensure-norms! + "Takes Datahike-connection and optional a folder as string. + Returns nil when successful and throws exception when not. + + Ensure your norms are present on your Datahike database. + All the edn-files in the folder and its subfolders are + considered as migration-files aka norms and will be sorted + and transacted into your database. + All norms that are successfully transacted will have an + attribute that marks them as migrated and they will not + be transacted twice." ([conn] (ensure-norms! conn (io/resource "migrations"))) ([conn norms-folder] @@ -125,8 +169,21 @@ (ensure-norms conn norms-folder)))) (defn update-checksums! + "Optionally takes a folder as string. Defaults to the + folder `resources/migrations`. + + All the edn-files in the folder and its subfolders are + considered as migrations-files aka norms. For each of + these norms a checksum will be computed and written to + the file `checksums.edn`. Each time this fn is run, + the `checksums.edn` will be overwritten with the current + values. + This prevents inadvertent migrations of your database + when used in conjunction with a VCS. A merge-conflict + should be raised when trying to merge a checksums.edn + with stale checksums." ([] - (update-checksums! (io/resource "migrations"))) + (update-checksums! "resources/migrations")) ([^String norms-folder] (-> (compute-checksums norms-folder) (#(spit (io/file (str norms-folder "/" "checksums.edn")) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index b128a437d..fda28d586 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -129,3 +129,6 @@ :character/name "Lisa Simpson", :character/occupation :student}] (d/pull-many (d/db conn) '[*] lisabart))))) + +(comment + (sut/update-checksums! "test/datahike/norm/resources")) From 4f93213ff8aeae6e19082df4fd8c07bedcf6ee5e Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Thu, 9 Feb 2023 14:48:57 +0100 Subject: [PATCH 11/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 194 +++++++++++++++++++++++-------------- 1 file changed, 119 insertions(+), 75 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index ce0fc242b..a20543244 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -14,7 +14,7 @@ [java.security MessageDigest DigestInputStream] [java.io File] [java.nio.file Files] - [java.util.jar JarFile] + [java.util.jar JarFile JarEntry] [java.net URL])) (defn- attribute-installed? [conn attr] @@ -37,86 +37,112 @@ some?)) (defmulti ^:private retrieve-file-list - (fn [file-or-resource] - (println "FILEORRESOURCE: " file-or-resource) - (cond - (instance? File file-or-resource) :file - (instance? URL file-or-resource) :resource))) - -(defmethod ^:private retrieve-file-list :file [norms-folder] - (let [folder (io/file norms-folder)] - (if (.exists folder) - (let [migration-files (file-seq folder) - xf (comp - (filter #(.isFile %)) - (filter #(re-find #".edn" (.getPath %))) - (filter #(not= "checksums.edn" (.getName %))))] - (into [] xf migration-files)) - (throw - (ex-info - (format "Norms folder %s does not exist." norms-folder) - {:folder norms-folder}))))) + (fn [file-or-resource] (type file-or-resource))) -(defmethod ^:private retrieve-file-list :resource [resource] - (if resource - (let [abs-path (.getPath resource) - migration-files (if (string/starts-with? abs-path "file:") - (let [jar-path (-> abs-path (string/split #"!" 2) first (subs 5)) - jar-file (JarFile. jar-path)] - (->> (enumeration-seq (.entries jar-file)) - (filter #(and (string/starts-with? % abs-path) - (not (string/ends-with? % "/")))))) - (file-seq (io/file abs-path))) +(defmethod ^:private retrieve-file-list File [file] + (if (.exists file) + (let [migration-files (file-seq file) xf (comp - (filter #(.isFile (io/file %))) - (filter #(re-find #".edn" (.getPath %))) - (filter #(not= "checksums.edn" (.getName %))))] + (filter #(.isFile %)) + (filter #(string/ends-with? (.getPath %) ".edn")) + (filter #(not= "checksums.edn" (.getName %))))] (into [] xf migration-files)) + (throw + (ex-info + (format "Norms folder %s does not exist." file) + {:folder file})))) + +(defmethod ^:private retrieve-file-list URL [resource] + (if resource + (let [abs-path (.getPath resource)] + (if (string/starts-with? abs-path "file:") + (let [jar-path (-> abs-path (string/split #"!" 2) first (subs 5)) + jar-file (JarFile. jar-path)] + (->> (enumeration-seq (.entries jar-file)) + (filter #(and #_(string/starts-with? % abs-path) + #_(not (string/ends-with? % "/")) + (not (.isDirectory %)) + (string/ends-with? % ".edn") + (not= "checksums.edn" (.getName %)))))) + (->> (file-seq (io/file abs-path)) + (filter #(and (.isFile (io/file %)) + (not (.isDirectory %)) + (string/ends-with? % ".edn") + (not= "checksums.edn" (.getName %))))))) (throw (ex-info "Resource does not exist." {:resource (str resource)})))) +(defmethod ^:private retrieve-file-list :default [arg] + (throw (ex-info "Can only read a File or a URL (resource)" {:arg arg + :type (type arg)}))) + (comment - (def norms-folder "test/datahike/norm/resources") (retrieve-file-list (io/file norms-folder)) - (retrieve-file-list (io/resource "migrations")) - *e - (-> (.getPath (io/resource "migrations")) (string/split #"!" 2) first (subs 5)) - (require '[datahike.norm.norm-test :as t]) - (def conn (t/create-test-db)) - (str (io/resource "migrations"))) - -(defn- read-norm-files! [norms-folder] - (let [file-list (retrieve-file-list norms-folder)] - (->> (map (fn [migration-file] - (-> (.getPath migration-file) - slurp - read-string - (assoc :norm (-> (.getName migration-file) - (string/replace #" " "-") - (string/replace #"\.edn" "") - keyword)))) - file-list) - (sort-by :norm)))) - -(defn- compute-checksums [norms-folder] - (->> (retrieve-file-list norms-folder) - sort - (reduce (fn [m f] + (def jar-path (-> (io/resource "migrations") (string/split #"!" 2) first (subs 5))) + (def jar-file (JarFile. jar-path))) + +(defn filename->keyword [filename] + (-> filename + (string/replace #" " "-") + (keyword))) + +(defmulti ^:private read-edn-file + (fn [f & _args] (type f))) + +(defmethod read-edn-file File [f] + [(-> (slurp f) + edn/read-string) + {:name (.getName f) + :norm (filename->keyword (.getName f))}]) + +(defmethod read-edn-file JarEntry [entry jar-file] + [(-> (.getInputStream jar-file entry) + slurp + edn/read-string) + {:name (.getName entry) + :norm (filename->keyword (.getName entry))}]) + +(defn- read-norm-files-with-meta [norm-files] + (->> norm-files + (map (fn [f] + (let [[content metadata] (read-edn-file f)] + (merge content metadata)))) + (sort-by :norm))) + +(defn- read-norm-files [norm-files] + (->> norm-files + (map #(first (read-edn-file %))) + (sort-by :norm))) + +(defn- compute-checksums [norm-files] + (->> norm-files + (reduce (fn [m {:keys [norm] :as content}] (assoc m - (-> (.getName f) - (string/replace #" " "-") - (keyword)) - (-> (h/edn-hash (slurp f)) + norm + (-> (h/edn-hash content) (hp/hash->str)))) {}))) -(defn- verify-checksums [norms-folder] - (let [diff (data/diff (compute-checksums norms-folder) - (edn/read-string (slurp (str norms-folder "/checksums.edn"))))] +(defn- verify-checksums [checksums checksums-edn] + (let [diff (data/diff checksums + (read-edn-file checksums-edn))] (when-not (every? nil? (butlast diff)) - diff))) + (throw + (ex-info "Deviation of the checksums found. Migration aborted." + {:diff diff}))))) + +(comment + (def norms-folder "test/datahike/norm/resources") + (read-norm-files! (retrieve-file-list (io/file norms-folder))) + (verify-checksums (-> (io/file norms-folder) + retrieve-file-list + read-norm-files-with-meta + compute-checksums) + (-> (io/file (io/file norms-folder) "checksums.edn") + read-edn-file + first))) (s/def ::tx-data vector?) (s/def ::tx-fn symbol?) @@ -130,9 +156,15 @@ (defn neutral-fn [_] []) -(defn- ensure-norms [conn norms-folder] +(defmulti ensure-norms + (fn [_conn file-or-resource] (type file-or-resource))) + +(defmethod ensure-norms File [conn file] + (verify-checksums file + (io/file (io/file file) "checksums.edn")) (let [db (ensure-norm-attribute! conn) - norm-list (read-norm-files! norms-folder)] + norm-list (-> (retrieve-file-list (io/file file)) + read-norm-files!)] (log/info "Checking migrations ...") (doseq [{:keys [norm tx-data tx-fn] :as norm-map @@ -161,12 +193,8 @@ be transacted twice." ([conn] (ensure-norms! conn (io/resource "migrations"))) - ([conn norms-folder] - (if-let [diff (verify-checksums norms-folder)] - (throw - (ex-info "Deviation of the checksums found. Migration aborted." - {:diff diff})) - (ensure-norms conn norms-folder)))) + ([conn file-or-resource] + (ensure-norms conn file-or-resource))) (defn update-checksums! "Optionally takes a folder as string. Defaults to the @@ -185,6 +213,22 @@ ([] (update-checksums! "resources/migrations")) ([^String norms-folder] - (-> (compute-checksums norms-folder) + (-> (io/file norms-folder) + read-norm-files! + compute-checksums (#(spit (io/file (str norms-folder "/" "checksums.edn")) (with-out-str (pp/pprint %))))))) + +(comment + (def norms-folder "test/datahike/norm/resources") + (read-norm-files! (io/file norms-folder)) + (read-norm-files! (io/resource "migrations")) + (read-norm-files! (io/resource "foo")) + (-> (io/resource "migrations") + (read-norm-files!) + (compute-checksums)) + (-> (io/file norms-folder) + (read-norm-files!) + (compute-checksums)) + (verify-checksums norms-folder) + (update-checksums! norms-folder)) From 70ea52534d7b6344fad59b6c9c71e35843540f6f Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 10 Feb 2023 14:36:06 +0100 Subject: [PATCH 12/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 153 ++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 70 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index a20543244..076584cf8 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -11,12 +11,12 @@ [hasch.core :as h] [hasch.platform :as hp]) (:import - [java.security MessageDigest DigestInputStream] [java.io File] - [java.nio.file Files] [java.util.jar JarFile JarEntry] [java.net URL])) +(def checksums-file "checksums.edn") + (defn- attribute-installed? [conn attr] (some? (d/entity @conn [:db/ident attr]))) @@ -36,6 +36,13 @@ d/q some?)) +(defn- get-jar [resource] + (-> (.getPath resource) + (string/split #"!" 2) + first + (subs 5) + JarFile.)) + (defmulti ^:private retrieve-file-list (fn [file-or-resource] (type file-or-resource))) @@ -44,8 +51,7 @@ (let [migration-files (file-seq file) xf (comp (filter #(.isFile %)) - (filter #(string/ends-with? (.getPath %) ".edn")) - (filter #(not= "checksums.edn" (.getName %))))] + (filter #(string/ends-with? (.getPath %) ".edn")))] (into [] xf migration-files)) (throw (ex-info @@ -54,21 +60,17 @@ (defmethod ^:private retrieve-file-list URL [resource] (if resource - (let [abs-path (.getPath resource)] + (let [abs-path (.getPath resource) + last-path-segment (-> abs-path (string/split #"/") peek)] (if (string/starts-with? abs-path "file:") - (let [jar-path (-> abs-path (string/split #"!" 2) first (subs 5)) - jar-file (JarFile. jar-path)] - (->> (enumeration-seq (.entries jar-file)) - (filter #(and #_(string/starts-with? % abs-path) - #_(not (string/ends-with? % "/")) - (not (.isDirectory %)) - (string/ends-with? % ".edn") - (not= "checksums.edn" (.getName %)))))) - (->> (file-seq (io/file abs-path)) - (filter #(and (.isFile (io/file %)) + (->> (get-jar resource) + .entries + enumeration-seq + (filter #(and (string/starts-with? (.getName %) last-path-segment) (not (.isDirectory %)) - (string/ends-with? % ".edn") - (not= "checksums.edn" (.getName %))))))) + (string/ends-with? % ".edn")))) + (->> (file-seq (io/file abs-path)) + (filter #(not (.isDirectory %)))))) (throw (ex-info "Resource does not exist." @@ -78,10 +80,10 @@ (throw (ex-info "Can only read a File or a URL (resource)" {:arg arg :type (type arg)}))) -(comment - (retrieve-file-list (io/file norms-folder)) - (def jar-path (-> (io/resource "migrations") (string/split #"!" 2) first (subs 5))) - (def jar-file (JarFile. jar-path))) +(defn- filter-file-list [file-list] + (filter #(and (string/ends-with? % ".edn") + (not (string/ends-with? (.getName %) checksums-file))) + file-list)) (defn filename->keyword [filename] (-> filename @@ -89,61 +91,54 @@ (keyword))) (defmulti ^:private read-edn-file - (fn [f & _args] (type f))) + (fn [file-or-entry _file-or-resource] (type file-or-entry))) -(defmethod read-edn-file File [f] +(defmethod read-edn-file File [f _file-or-resource] [(-> (slurp f) edn/read-string) {:name (.getName f) :norm (filename->keyword (.getName f))}]) -(defmethod read-edn-file JarEntry [entry jar-file] - [(-> (.getInputStream jar-file entry) - slurp - edn/read-string) - {:name (.getName entry) - :norm (filename->keyword (.getName entry))}]) - -(defn- read-norm-files-with-meta [norm-files] - (->> norm-files +(defmethod read-edn-file JarEntry [entry file-or-resource] + (let [file-name (-> (.getName entry) + (string/split #"/") + peek)] + [(-> (get-jar file-or-resource) + (.getInputStream entry) + slurp + edn/read-string) + {:name file-name + :norm (filename->keyword file-name)}])) + +(defmethod read-edn-file :default [t _] + (throw (ex-info "Can not handle argument" {:type (type t) + :arg t}))) + +(defn- read-norm-files [norm-list file-or-resource] + (->> norm-list (map (fn [f] - (let [[content metadata] (read-edn-file f)] + (let [[content metadata] (read-edn-file f file-or-resource)] (merge content metadata)))) (sort-by :norm))) -(defn- read-norm-files [norm-files] - (->> norm-files - (map #(first (read-edn-file %))) - (sort-by :norm))) - (defn- compute-checksums [norm-files] (->> norm-files (reduce (fn [m {:keys [norm] :as content}] - (assoc m - norm - (-> (h/edn-hash content) - (hp/hash->str)))) + (assoc m + norm + (-> (select-keys content [:tx-data :tx-fn]) + h/edn-hash + hp/hash->str))) {}))) -(defn- verify-checksums [checksums checksums-edn] - (let [diff (data/diff checksums - (read-edn-file checksums-edn))] +(defn- verify-checksums [checksums checksums-edn file-or-resource] + (let [edn-content (-> (read-edn-file checksums-edn file-or-resource) first) + diff (data/diff checksums edn-content)] (when-not (every? nil? (butlast diff)) (throw (ex-info "Deviation of the checksums found. Migration aborted." {:diff diff}))))) -(comment - (def norms-folder "test/datahike/norm/resources") - (read-norm-files! (retrieve-file-list (io/file norms-folder))) - (verify-checksums (-> (io/file norms-folder) - retrieve-file-list - read-norm-files-with-meta - compute-checksums) - (-> (io/file (io/file norms-folder) "checksums.edn") - read-edn-file - first))) - (s/def ::tx-data vector?) (s/def ::tx-fn symbol?) (s/def ::norm-map (s/keys :opt-un [::tx-data ::tx-fn])) @@ -156,15 +151,8 @@ (defn neutral-fn [_] []) -(defmulti ensure-norms - (fn [_conn file-or-resource] (type file-or-resource))) - -(defmethod ensure-norms File [conn file] - (verify-checksums file - (io/file (io/file file) "checksums.edn")) - (let [db (ensure-norm-attribute! conn) - norm-list (-> (retrieve-file-list (io/file file)) - read-norm-files!)] +(defn- transact-norms [conn norm-list] + (let [db (ensure-norm-attribute! conn)] (log/info "Checking migrations ...") (doseq [{:keys [norm tx-data tx-fn] :as norm-map @@ -180,6 +168,29 @@ ((eval tx-fn) conn)))}) (log/info "Done")))))) +(defmulti ensure-norms + (fn [_conn file-or-resource] (type file-or-resource))) + +(defmethod ensure-norms File [conn file] + (let [norm-list (-> (retrieve-file-list file) + filter-file-list + (read-norm-files file))] + (verify-checksums (compute-checksums norm-list) + (io/file (io/file file) checksums-file) + file) + (transact-norms conn norm-list))) + +(defmethod ensure-norms URL [conn resource] + (let [file-list (retrieve-file-list resource) + norm-list (-> (filter-file-list file-list) + (read-norm-files resource))] + (verify-checksums (compute-checksums norm-list) + (->> file-list + (filter #(-> (.getName %) (string/ends-with? checksums-file))) + first) + resource) + (transact-norms conn norm-list))) + (defn ensure-norms! "Takes Datahike-connection and optional a folder as string. Returns nil when successful and throws exception when not. @@ -213,11 +224,13 @@ ([] (update-checksums! "resources/migrations")) ([^String norms-folder] - (-> (io/file norms-folder) - read-norm-files! - compute-checksums - (#(spit (io/file (str norms-folder "/" "checksums.edn")) - (with-out-str (pp/pprint %))))))) + (let [file (io/file norms-folder)] + (-> (retrieve-file-list file) + filter-file-list + (read-norm-files file) + compute-checksums + (#(spit (io/file (io/file norms-folder checksums-file)) + (with-out-str (pp/pprint %)))))))) (comment (def norms-folder "test/datahike/norm/resources") From b94f984d15da67da2a8277492f3b5de41feda0b2 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 10 Feb 2023 17:14:16 +0100 Subject: [PATCH 13/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 73 +++++++++++-------- test/datahike/norm/norm_test.clj | 46 ++++-------- test/datahike/norm/resources/checksums.edn | 12 --- .../001-a1-example.edn | 0 .../002-a2-example.edn | 0 .../003-tx-fn-test.edn | 0 .../004-tx-data-and-tx-fn-test.edn | 0 .../01-transact-basic-characters.edn | 0 .../02 add occupation.edn | 0 .../naming-and-sorting-test/checksums.edn | 12 +++ .../resources/simple-test/001-a1-example.edn | 5 ++ .../resources/simple-test/002-a2-example.edn | 5 ++ .../norm/resources/simple-test/checksums.edn | 4 + .../first/001-a1-example.edn | 5 ++ .../first/002-a2-example.edn | 5 ++ .../first/003-tx-fn-test.edn | 1 + .../first/checksums.edn | 6 ++ .../second/004-tx-data-and-tx-fn-test.edn | 5 ++ .../second/checksums.edn | 2 + .../tx-fn-test/first/001-a1-example.edn | 5 ++ .../tx-fn-test/first/002-a2-example.edn | 5 ++ .../resources/tx-fn-test/first/checksums.edn | 4 + .../tx-fn-test/second/003-tx-fn-test.edn | 1 + .../resources/tx-fn-test/second/checksums.edn | 2 + 24 files changed, 122 insertions(+), 76 deletions(-) delete mode 100644 test/datahike/norm/resources/checksums.edn rename test/datahike/norm/resources/{ => naming-and-sorting-test}/001-a1-example.edn (100%) rename test/datahike/norm/resources/{ => naming-and-sorting-test}/002-a2-example.edn (100%) rename test/datahike/norm/resources/{ => naming-and-sorting-test}/003-tx-fn-test.edn (100%) rename test/datahike/norm/resources/{ => naming-and-sorting-test}/004-tx-data-and-tx-fn-test.edn (100%) rename test/datahike/norm/resources/{ => naming-and-sorting-test}/01-transact-basic-characters.edn (100%) rename test/datahike/norm/resources/{ => naming-and-sorting-test}/02 add occupation.edn (100%) create mode 100644 test/datahike/norm/resources/naming-and-sorting-test/checksums.edn create mode 100644 test/datahike/norm/resources/simple-test/001-a1-example.edn create mode 100644 test/datahike/norm/resources/simple-test/002-a2-example.edn create mode 100644 test/datahike/norm/resources/simple-test/checksums.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/first/001-a1-example.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/first/002-a2-example.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/first/003-tx-fn-test.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/first/checksums.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/second/004-tx-data-and-tx-fn-test.edn create mode 100644 test/datahike/norm/resources/tx-data-and-tx-fn-test/second/checksums.edn create mode 100644 test/datahike/norm/resources/tx-fn-test/first/001-a1-example.edn create mode 100644 test/datahike/norm/resources/tx-fn-test/first/002-a2-example.edn create mode 100644 test/datahike/norm/resources/tx-fn-test/first/checksums.edn create mode 100644 test/datahike/norm/resources/tx-fn-test/second/003-tx-fn-test.edn create mode 100644 test/datahike/norm/resources/tx-fn-test/second/checksums.edn diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 076584cf8..495e441cc 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -93,24 +93,28 @@ (defmulti ^:private read-edn-file (fn [file-or-entry _file-or-resource] (type file-or-entry))) -(defmethod read-edn-file File [f _file-or-resource] +(defmethod ^:private read-edn-file File [f _file] + (when (not (.exists f)) + (throw (ex-info "Failed reading file because it does not exist" {:filename (str f)}))) [(-> (slurp f) edn/read-string) {:name (.getName f) :norm (filename->keyword (.getName f))}]) -(defmethod read-edn-file JarEntry [entry file-or-resource] +(defmethod ^:private read-edn-file JarEntry [entry resource] + (when (nil? resource) + (throw (ex-info "Failed reading resource because it does not exist" {:resource (str resource)}))) (let [file-name (-> (.getName entry) (string/split #"/") peek)] - [(-> (get-jar file-or-resource) + [(-> (get-jar resource) (.getInputStream entry) slurp edn/read-string) {:name file-name :norm (filename->keyword file-name)}])) -(defmethod read-edn-file :default [t _] +(defmethod ^:private read-edn-file :default [t _] (throw (ex-info "Can not handle argument" {:type (type t) :arg t}))) @@ -149,7 +153,7 @@ (throw (ex-info "Invalid norm" {:validation-error res}))))) -(defn neutral-fn [_] []) +(defn- neutral-fn [_] []) (defn- transact-norms [conn norm-list] (let [db (ensure-norm-attribute! conn)] @@ -157,21 +161,23 @@ (doseq [{:keys [norm tx-data tx-fn] :as norm-map :or {tx-data [] - tx-fn #'neutral-fn}} + tx-fn 'datahike.norm.norm/neutral-fn}} norm-list] (log/info "Checking migration" norm) (validate-norm norm-map) (when-not (norm-installed? db norm) - (log/info "Run migration" norm) + (log/info "Run migration" {:tx-data (vec (concat [{:tx/norm norm}] + tx-data + ((var-get (requiring-resolve (symbol tx-fn))) conn)))}) (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] tx-data - ((eval tx-fn) conn)))}) + ((var-get (requiring-resolve tx-fn)) conn)))}) (log/info "Done")))))) -(defmulti ensure-norms +(defmulti ^:private ensure-norms (fn [_conn file-or-resource] (type file-or-resource))) -(defmethod ensure-norms File [conn file] +(defmethod ^:private ensure-norms File [conn file] (let [norm-list (-> (retrieve-file-list file) filter-file-list (read-norm-files file))] @@ -180,7 +186,7 @@ file) (transact-norms conn norm-list))) -(defmethod ensure-norms URL [conn resource] +(defmethod ^:private ensure-norms URL [conn resource] (let [file-list (retrieve-file-list resource) norm-list (-> (filter-file-list file-list) (read-norm-files resource))] @@ -192,16 +198,17 @@ (transact-norms conn norm-list))) (defn ensure-norms! - "Takes Datahike-connection and optional a folder as string. + "Takes Datahike-connection and optional a java.io.File object + or java.net.URL to specify the location of your norms. + Defaults to the resource `migrations`. Returns nil when successful and throws exception when not. - Ensure your norms are present on your Datahike database. - All the edn-files in the folder and its subfolders are - considered as migration-files aka norms and will be sorted - and transacted into your database. - All norms that are successfully transacted will have an - attribute that marks them as migrated and they will not - be transacted twice." + Ensures your norms are present on your Datahike database. + All the edn-files in this folder and its subfolders are + considered migration-files aka norms and will be transacted + ordered by their names into your database. All norms that + are successfully transacted will have an attribute that + marks them as migrated and they will not be applied twice." ([conn] (ensure-norms! conn (io/resource "migrations"))) ([conn file-or-resource] @@ -210,9 +217,10 @@ (defn update-checksums! "Optionally takes a folder as string. Defaults to the folder `resources/migrations`. + Returns nil when successful and throws exception when not. All the edn-files in the folder and its subfolders are - considered as migrations-files aka norms. For each of + considered migration-files aka norms. For each of these norms a checksum will be computed and written to the file `checksums.edn`. Each time this fn is run, the `checksums.edn` will be overwritten with the current @@ -234,14 +242,17 @@ (comment (def norms-folder "test/datahike/norm/resources") - (read-norm-files! (io/file norms-folder)) - (read-norm-files! (io/resource "migrations")) - (read-norm-files! (io/resource "foo")) - (-> (io/resource "migrations") - (read-norm-files!) - (compute-checksums)) - (-> (io/file norms-folder) - (read-norm-files!) - (compute-checksums)) - (verify-checksums norms-folder) - (update-checksums! norms-folder)) + (def config {:store {:backend :mem + :id "bar"}}) + (def conn (do + (d/delete-database config) + (d/create-database config) + (d/connect config))) + (ensure-norms! conn (io/file norms-folder)) + (ensure-norms! conn (io/resource "migrations")) + (update-checksums! norms-folder) + (update-checksums! "resources/migrations") + (def my-fn 'datahike.norm.norm-test/tx-fn-test-fn) + (#'datahike.norm.norm-test/tx-fn-test-fn conn) + (def myfn (var-get (requiring-resolve my-fn))) + (qualified-symbol? (symbol conn))) diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index fda28d586..15fb55517 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -1,24 +1,16 @@ (ns datahike.norm.norm-test (:require [clojure.test :refer [deftest is]] [clojure.string :as string] + [clojure.java.io :as io] [datahike.api :as d] - [datahike.norm.norm :as sut])) - -(defn create-test-db [] - (let [id (apply str - (for [_i (range 8)] - (char (+ (rand 26) 65))))] - (d/create-database {:store {:backend :mem - :id id}}) - (d/connect {:store {:backend :mem - :id id}}))) + [datahike.norm.norm :as sut] + [datahike.test.utils :as tu])) (def ensure-norms #'sut/ensure-norms) (deftest simple-test - (let [conn (create-test-db) - _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") - _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") + (let [conn (tu/setup-db {} true) + _ (ensure-norms conn (io/file "test/datahike/norm/resources/simple-test")) schema (d/schema (d/db conn))] (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Place of occupation", :ident :character/place-of-occupation} (-> (schema :character/place-of-occupation) @@ -30,12 +22,6 @@ (-> (schema :tx/norm) (dissoc :db/id)))))) -(comment - (def conn (create-test-db)) - (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") - (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") - (def schema (d/schema (d/db conn)))) - (defn tx-fn-test-fn [conn] (-> (for [[eid value] (d/q '[:find ?e ?v :where @@ -46,12 +32,11 @@ vec)) (deftest tx-fn-test - (let [conn (create-test-db) - _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") - _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") + (let [conn (tu/setup-db {} true) + _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-fn-test/first")) _ (d/transact conn {:tx-data [{:character/place-of-occupation "SPRINGFIELD ELEMENTARY SCHOOL"} {:character/place-of-occupation "SPRINGFIELD NUCLEAR POWER PLANT"}]}) - _ (ensure-norms conn "test/datahike/norm/resources/003-tx-fn-test.edn")] + _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-fn-test/second"))] (is (= #{["springfield elementary school"] ["springfield nuclear power plant"]} (d/q '[:find ?v :where @@ -74,16 +59,14 @@ vec)) (deftest tx-data-and-tx-fn-test - (let [conn (create-test-db) - _ (ensure-norms conn "test/datahike/norm/resources/001-a1-example.edn") - _ (ensure-norms conn "test/datahike/norm/resources/002-a2-example.edn") - _ (ensure-norms conn "test/datahike/norm/resources/003-tx-fn-test.edn") + (let [conn (tu/setup-db {} true) + _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/first")) _ (d/transact conn {:tx-data [{:character/name "Homer Simpson"} {:character/name "Marge Simpson"} {:character/name "Bart Simpson"} {:character/name "Lisa Simpson"} {:character/name "Maggie Simpson"}]}) - _ (ensure-norms conn "test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn") + _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/second")) margehomer (d/q '[:find [?e ...] :where [?e :character/name] @@ -116,8 +99,8 @@ vec)) (deftest naming-and-sorting-test - (let [conn (create-test-db) - _ (sut/ensure-norms! conn "test/datahike/norm/resources") + (let [conn (tu/setup-db {} true) + _ (sut/ensure-norms! conn (io/file "test/datahike/norm/resources/naming-and-sorting-test")) lisabart (d/q '[:find [?e ...] :where [?e :character/occupation :student]] @@ -129,6 +112,3 @@ :character/name "Lisa Simpson", :character/occupation :student}] (d/pull-many (d/db conn) '[*] lisabart))))) - -(comment - (sut/update-checksums! "test/datahike/norm/resources")) diff --git a/test/datahike/norm/resources/checksums.edn b/test/datahike/norm/resources/checksums.edn deleted file mode 100644 index a6d7fffa0..000000000 --- a/test/datahike/norm/resources/checksums.edn +++ /dev/null @@ -1,12 +0,0 @@ -{:001-a1-example.edn - "a7407fffb9c1ea423164add02c0c6b951d810068fa196b062f64f986a54d7933", - :002-a2-example.edn - "9454f765f71069519a2514636873f562266d1e0569f8f91cffe02ae6ddf38763", - :003-tx-fn-test.edn - "ac377554bbc6cd2ee0b0d8a91b29debf1c84554dc1713fb8fc793bca0f881bed", - :004-tx-data-and-tx-fn-test.edn - "7c6cb7f9de66ca1368ddc6e7cf4e95f87d2561b872b1d5582d0e594859bee25f", - :01-transact-basic-characters.edn - "f234b20fd3fe4744676be80d655f2c066142de3ee925cae6680a175481ab24be", - :02-add-occupation.edn - "d499fc826f36a9820814eb9836f3e00d633159500083cdd161742646f2789194"} diff --git a/test/datahike/norm/resources/001-a1-example.edn b/test/datahike/norm/resources/naming-and-sorting-test/001-a1-example.edn similarity index 100% rename from test/datahike/norm/resources/001-a1-example.edn rename to test/datahike/norm/resources/naming-and-sorting-test/001-a1-example.edn diff --git a/test/datahike/norm/resources/002-a2-example.edn b/test/datahike/norm/resources/naming-and-sorting-test/002-a2-example.edn similarity index 100% rename from test/datahike/norm/resources/002-a2-example.edn rename to test/datahike/norm/resources/naming-and-sorting-test/002-a2-example.edn diff --git a/test/datahike/norm/resources/003-tx-fn-test.edn b/test/datahike/norm/resources/naming-and-sorting-test/003-tx-fn-test.edn similarity index 100% rename from test/datahike/norm/resources/003-tx-fn-test.edn rename to test/datahike/norm/resources/naming-and-sorting-test/003-tx-fn-test.edn diff --git a/test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn b/test/datahike/norm/resources/naming-and-sorting-test/004-tx-data-and-tx-fn-test.edn similarity index 100% rename from test/datahike/norm/resources/004-tx-data-and-tx-fn-test.edn rename to test/datahike/norm/resources/naming-and-sorting-test/004-tx-data-and-tx-fn-test.edn diff --git a/test/datahike/norm/resources/01-transact-basic-characters.edn b/test/datahike/norm/resources/naming-and-sorting-test/01-transact-basic-characters.edn similarity index 100% rename from test/datahike/norm/resources/01-transact-basic-characters.edn rename to test/datahike/norm/resources/naming-and-sorting-test/01-transact-basic-characters.edn diff --git a/test/datahike/norm/resources/02 add occupation.edn b/test/datahike/norm/resources/naming-and-sorting-test/02 add occupation.edn similarity index 100% rename from test/datahike/norm/resources/02 add occupation.edn rename to test/datahike/norm/resources/naming-and-sorting-test/02 add occupation.edn diff --git a/test/datahike/norm/resources/naming-and-sorting-test/checksums.edn b/test/datahike/norm/resources/naming-and-sorting-test/checksums.edn new file mode 100644 index 000000000..9d9b35061 --- /dev/null +++ b/test/datahike/norm/resources/naming-and-sorting-test/checksums.edn @@ -0,0 +1,12 @@ +{:001-a1-example.edn + "b28aa2247b363cb556b4abba3593774b6c960d5958458bcf3ed46d86e355d563ea77c66b1f4a21cb35f232227a8d4747dd7aec519bab036c706670eb5cf4a05c", + :002-a2-example.edn + "0a143795833f67d022ec26283fc57dd951fb0aac3e830336f56606d893352c274cbc72682097e60372c5724af619451fac05a0c8dad45f8f5358d734981dfcdc", + :003-tx-fn-test.edn + "221bca617e681443437ddeccee0aaafc9c63aa72f9c84fd84458d9f8ad0be6aa6e3095a716aa2b255374bd8992c0638ec4b038b5f862745cee859022ddb0880d", + :004-tx-data-and-tx-fn-test.edn + "d26413ec992aaf5c7f0703855212906df0311b27be170cc9012242f47a36c72d52e662977dc9f66fd39c11979d09880323ce7e69a3de10fbd760b59bb985b3b4", + :01-transact-basic-characters.edn + "7e826c77fdabbca47a697dab41510418d11a82ece9735b61c61c551d1d4a336f1becc15527b920bc677171e5b9335acf8c4ab35136fa0b66f33e49cfb90f9ff6", + :02-add-occupation.edn + "9d11ec4f9de67561da29a337cee59d68dac01fba3edac0de6241a7671a5a26f976a1302c59b2b0b8743104a859c6e019810fc9742d30829883aabd7e28ec5e3b"} diff --git a/test/datahike/norm/resources/simple-test/001-a1-example.edn b/test/datahike/norm/resources/simple-test/001-a1-example.edn new file mode 100644 index 000000000..53b124a52 --- /dev/null +++ b/test/datahike/norm/resources/simple-test/001-a1-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Place of occupation" + :db/ident :character/place-of-occupation + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm/neutral-fn} diff --git a/test/datahike/norm/resources/simple-test/002-a2-example.edn b/test/datahike/norm/resources/simple-test/002-a2-example.edn new file mode 100644 index 000000000..3c5597a60 --- /dev/null +++ b/test/datahike/norm/resources/simple-test/002-a2-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Simpsons character name" + :db/ident :character/name + :db/unique :db.unique/identity + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]} diff --git a/test/datahike/norm/resources/simple-test/checksums.edn b/test/datahike/norm/resources/simple-test/checksums.edn new file mode 100644 index 000000000..e7a067eda --- /dev/null +++ b/test/datahike/norm/resources/simple-test/checksums.edn @@ -0,0 +1,4 @@ +{:001-a1-example.edn + "b28aa2247b363cb556b4abba3593774b6c960d5958458bcf3ed46d86e355d563ea77c66b1f4a21cb35f232227a8d4747dd7aec519bab036c706670eb5cf4a05c", + :002-a2-example.edn + "0a143795833f67d022ec26283fc57dd951fb0aac3e830336f56606d893352c274cbc72682097e60372c5724af619451fac05a0c8dad45f8f5358d734981dfcdc"} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/001-a1-example.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/001-a1-example.edn new file mode 100644 index 000000000..53b124a52 --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/001-a1-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Place of occupation" + :db/ident :character/place-of-occupation + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm/neutral-fn} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/002-a2-example.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/002-a2-example.edn new file mode 100644 index 000000000..3c5597a60 --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/002-a2-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Simpsons character name" + :db/ident :character/name + :db/unique :db.unique/identity + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/003-tx-fn-test.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/003-tx-fn-test.edn new file mode 100644 index 000000000..5f99e0470 --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/003-tx-fn-test.edn @@ -0,0 +1 @@ +{:tx-fn datahike.norm.norm-test/tx-fn-test-fn} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/checksums.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/checksums.edn new file mode 100644 index 000000000..ec7ef1644 --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/first/checksums.edn @@ -0,0 +1,6 @@ +{:001-a1-example.edn + "b28aa2247b363cb556b4abba3593774b6c960d5958458bcf3ed46d86e355d563ea77c66b1f4a21cb35f232227a8d4747dd7aec519bab036c706670eb5cf4a05c", + :002-a2-example.edn + "0a143795833f67d022ec26283fc57dd951fb0aac3e830336f56606d893352c274cbc72682097e60372c5724af619451fac05a0c8dad45f8f5358d734981dfcdc", + :003-tx-fn-test.edn + "221bca617e681443437ddeccee0aaafc9c63aa72f9c84fd84458d9f8ad0be6aa6e3095a716aa2b255374bd8992c0638ec4b038b5f862745cee859022ddb0880d"} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/004-tx-data-and-tx-fn-test.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/004-tx-data-and-tx-fn-test.edn new file mode 100644 index 000000000..bd9f4d98e --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/004-tx-data-and-tx-fn-test.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Simpsons children reference" + :db/ident :character/children + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many}] + :tx-fn datahike.norm.norm-test/tx-data-and-tx-fn-test-fn} diff --git a/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/checksums.edn b/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/checksums.edn new file mode 100644 index 000000000..c7edc0ef2 --- /dev/null +++ b/test/datahike/norm/resources/tx-data-and-tx-fn-test/second/checksums.edn @@ -0,0 +1,2 @@ +{:004-tx-data-and-tx-fn-test.edn + "d26413ec992aaf5c7f0703855212906df0311b27be170cc9012242f47a36c72d52e662977dc9f66fd39c11979d09880323ce7e69a3de10fbd760b59bb985b3b4"} diff --git a/test/datahike/norm/resources/tx-fn-test/first/001-a1-example.edn b/test/datahike/norm/resources/tx-fn-test/first/001-a1-example.edn new file mode 100644 index 000000000..53b124a52 --- /dev/null +++ b/test/datahike/norm/resources/tx-fn-test/first/001-a1-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Place of occupation" + :db/ident :character/place-of-occupation + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}] + :tx-fn datahike.norm.norm/neutral-fn} diff --git a/test/datahike/norm/resources/tx-fn-test/first/002-a2-example.edn b/test/datahike/norm/resources/tx-fn-test/first/002-a2-example.edn new file mode 100644 index 000000000..3c5597a60 --- /dev/null +++ b/test/datahike/norm/resources/tx-fn-test/first/002-a2-example.edn @@ -0,0 +1,5 @@ +{:tx-data [{:db/doc "Simpsons character name" + :db/ident :character/name + :db/unique :db.unique/identity + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]} diff --git a/test/datahike/norm/resources/tx-fn-test/first/checksums.edn b/test/datahike/norm/resources/tx-fn-test/first/checksums.edn new file mode 100644 index 000000000..e7a067eda --- /dev/null +++ b/test/datahike/norm/resources/tx-fn-test/first/checksums.edn @@ -0,0 +1,4 @@ +{:001-a1-example.edn + "b28aa2247b363cb556b4abba3593774b6c960d5958458bcf3ed46d86e355d563ea77c66b1f4a21cb35f232227a8d4747dd7aec519bab036c706670eb5cf4a05c", + :002-a2-example.edn + "0a143795833f67d022ec26283fc57dd951fb0aac3e830336f56606d893352c274cbc72682097e60372c5724af619451fac05a0c8dad45f8f5358d734981dfcdc"} diff --git a/test/datahike/norm/resources/tx-fn-test/second/003-tx-fn-test.edn b/test/datahike/norm/resources/tx-fn-test/second/003-tx-fn-test.edn new file mode 100644 index 000000000..5f99e0470 --- /dev/null +++ b/test/datahike/norm/resources/tx-fn-test/second/003-tx-fn-test.edn @@ -0,0 +1 @@ +{:tx-fn datahike.norm.norm-test/tx-fn-test-fn} diff --git a/test/datahike/norm/resources/tx-fn-test/second/checksums.edn b/test/datahike/norm/resources/tx-fn-test/second/checksums.edn new file mode 100644 index 000000000..1ca9a4da1 --- /dev/null +++ b/test/datahike/norm/resources/tx-fn-test/second/checksums.edn @@ -0,0 +1,2 @@ +{:003-tx-fn-test.edn + "221bca617e681443437ddeccee0aaafc9c63aa72f9c84fd84458d9f8ad0be6aa6e3095a716aa2b255374bd8992c0638ec4b038b5f862745cee859022ddb0880d"} From cb794773d031ab21e75978b17558ddd3c4814823 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 10 Feb 2023 17:36:21 +0100 Subject: [PATCH 14/21] fixup! feat: schema migration --- doc/schema-migration.md | 46 ++++++++++++++++++++++++++++++-------- src/datahike/norm/norm.clj | 2 +- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/doc/schema-migration.md b/doc/schema-migration.md index 89e5af21c..87cbc9a86 100644 --- a/doc/schema-migration.md +++ b/doc/schema-migration.md @@ -1,12 +1,31 @@ -# Schema Migration +# Schema Migration or Norms -Schema migration with Datahike is the evolution of your current schema into a future schema. When we are speaking of changes to your schema, these should always add new definitions and never change existing definitions. In case you want to change existing data to a new format you will have to create a new schema and transact your existing data transformed again. +Schema migration with Datahike is the evolution of your current schema into a future schema. +We are calling the schema migrations 'norms' to avoid misunderstandings with a migration +from an older version of Datahike to a newer version. -## How to migrate +When we are speaking of changes to your schema, these should always add new definitions and +never change existing definitions. In case you want to change existing data to a new format +you will have to create a new schema and transact your existing data transformed again. A +good intro to this topic [can be found here](https://docs.datomic.com/cloud/schema/schema-change.html). -First create a folder of your choice, for now let's call it `migrations`. In this folder you create a new file with an edn-extension like `001-my-migration.edn`. Preferably you name the file beginning with a number. Please be aware that the name of your file will be the id of your migration. Taking into account that you might create some more migrations in the future you might want to left-pad the name with zeros to keep a proper sorting. Keep in mind that your migrations are transacted sorted after your chosen ids one after another. +## How to migrate -Second write the transactions itself into your newly created file. The content of the file needs to be an edn-map with one or both of the keys `:tx-data` and `tx-fn`. `:tx-data` is just transaction data in the form of a vector of datoms, `:tx-fn` is a funtion that you can run during the migration to migrate data for example. This function needs to be from a loadable namespace and will be evaled during preparation and needs to return transactions. These will be transacted with `:tx-data` together in one transaction. +1. Create a folder of your choice, for now let's call it `migrations`. In this folder you +create a new file with an edn-extension like `001-my-first-norm.edn`. Preferably you name the +file beginning with a number. Please be aware that the name of your file will be the id of +your norm. Taking into account that you might create some more norms in the future +you should left-pad the names with zeros to keep a proper sorting. Keep in mind that your +norms are transacted sorted after your chosen ids one after another. Spaces will be replaced +with dashes to compose the id. + +2. Write the transactions itself into your newly created file. The content of the file needs +to be an edn-map with one or both of the keys `:tx-data` and `tx-fn`. `:tx-data` is just +transaction data in the form of a vector, `:tx-fn` is a funtion that you can run during the +execution to migrate data from one attribute to another for example. This function needs to +be qualified and callable from the classpath. It will be evaluated during the migration and +needs to return transactions. These transactions will be transacted with `:tx-data` together +in one transaction. Example of a migration: ```clojure @@ -14,11 +33,20 @@ Example of a migration: :db/ident :character/place-of-occupation :db/valueType :db.type/string :db/cardinality :db.cardinality/one}] - :tx-fn datahike.norm.norm/neutral-fn} + :tx-fn my-transactions.my-project/my-first-tx-fn} ``` -Third, when you are sufficiently confident that your migration will work you usually will want to store it into some kind of version control system. To avoid conflicts with fellow colleagues we implemented a security net. Run the function `update-checksums!` from the `datahike.norm.norm` namespace to create or update a `checksums.edn` file. This file contains the names and checksums of your migration-files. In case a colleague of yours checked in a migration that you have not been aware of, your VCS should avoid merging the conflicting `checksums.edn` files. +3. When you are sufficiently confident that your norm will work you usually want to store +it into some kind of version control system. To avoid conflicts with fellow colleagues we +implemented a security net. Run the function `update-checksums!` from the `datahike.norm.norm` +namespace to create or update a `checksums.edn` file inside your norms-folder. This file +contains the names and checksums of your migration-files. In case a colleague of yours +checked in a migration that you have not been aware of, your VCS should avoid merging the +conflicting `checksums.edn` file. -Last, run the `datahike.norm.norm/ensure-norms!` function to run your migrations. For each migration that already ran there will be a `:tx/norm` attribute stored with the name of your migration so it doesn't have to run twice. +4. Run the `datahike.norm.norm/ensure-norms!` function to apply your norms. For each norm +that is already applied there will be a `:tx/norm` attribute stored with the id of your +norm so it will not be applied twice. -Be aware that your chosen migration-folder will include all subfolders for migrations. Don't store other things in your migration-folder than your migrations! +Be aware that your chosen norm-folder will include all subfolders for reading the norms. +Don't store other files in your norm-folder besides your norms! diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 495e441cc..39d971e7c 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -206,7 +206,7 @@ Ensures your norms are present on your Datahike database. All the edn-files in this folder and its subfolders are considered migration-files aka norms and will be transacted - ordered by their names into your database. All norms that + sorted by their names into your database. All norms that are successfully transacted will have an attribute that marks them as migrated and they will not be applied twice." ([conn] From adcee26d4eeb99455b0636978756e7bb6df60afd Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 10 Feb 2023 17:46:00 +0100 Subject: [PATCH 15/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 39d971e7c..99dac6777 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -151,7 +151,7 @@ (log/debug "Norm validated" {:norm-map norm}) (let [res (s/explain-data ::norm-map norm)] (throw - (ex-info "Invalid norm" {:validation-error res}))))) + (ex-info "Invalid norm" {:validation-error res}))))) (defn- neutral-fn [_] []) @@ -233,21 +233,21 @@ (update-checksums! "resources/migrations")) ([^String norms-folder] (let [file (io/file norms-folder)] - (-> (retrieve-file-list file) - filter-file-list - (read-norm-files file) - compute-checksums - (#(spit (io/file (io/file norms-folder checksums-file)) - (with-out-str (pp/pprint %)))))))) + (-> (retrieve-file-list file) + filter-file-list + (read-norm-files file) + compute-checksums + (#(spit (io/file (io/file norms-folder checksums-file)) + (with-out-str (pp/pprint %)))))))) (comment (def norms-folder "test/datahike/norm/resources") (def config {:store {:backend :mem :id "bar"}}) (def conn (do - (d/delete-database config) - (d/create-database config) - (d/connect config))) + (d/delete-database config) + (d/create-database config) + (d/connect config))) (ensure-norms! conn (io/file norms-folder)) (ensure-norms! conn (io/resource "migrations")) (update-checksums! norms-folder) From d7af8988511d6a3acffb7c729bb4f654d160b939 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Tue, 14 Feb 2023 11:21:25 +0100 Subject: [PATCH 16/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 99dac6777..b083f95c5 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -166,9 +166,7 @@ (log/info "Checking migration" norm) (validate-norm norm-map) (when-not (norm-installed? db norm) - (log/info "Run migration" {:tx-data (vec (concat [{:tx/norm norm}] - tx-data - ((var-get (requiring-resolve (symbol tx-fn))) conn)))}) + (log/info "Running migration") (->> (d/transact conn {:tx-data (vec (concat [{:tx/norm norm}] tx-data ((var-get (requiring-resolve tx-fn)) conn)))}) @@ -206,7 +204,7 @@ Ensures your norms are present on your Datahike database. All the edn-files in this folder and its subfolders are considered migration-files aka norms and will be transacted - sorted by their names into your database. All norms that + ordered by their names into your database. All norms that are successfully transacted will have an attribute that marks them as migrated and they will not be applied twice." ([conn] @@ -228,7 +226,7 @@ This prevents inadvertent migrations of your database when used in conjunction with a VCS. A merge-conflict should be raised when trying to merge a checksums.edn - with stale checksums." + with stale data." ([] (update-checksums! "resources/migrations")) ([^String norms-folder] @@ -237,22 +235,5 @@ filter-file-list (read-norm-files file) compute-checksums - (#(spit (io/file (io/file norms-folder checksums-file)) + (#(spit (io/file norms-folder checksums-file) (with-out-str (pp/pprint %)))))))) - -(comment - (def norms-folder "test/datahike/norm/resources") - (def config {:store {:backend :mem - :id "bar"}}) - (def conn (do - (d/delete-database config) - (d/create-database config) - (d/connect config))) - (ensure-norms! conn (io/file norms-folder)) - (ensure-norms! conn (io/resource "migrations")) - (update-checksums! norms-folder) - (update-checksums! "resources/migrations") - (def my-fn 'datahike.norm.norm-test/tx-fn-test-fn) - (#'datahike.norm.norm-test/tx-fn-test-fn conn) - (def myfn (var-get (requiring-resolve my-fn))) - (qualified-symbol? (symbol conn))) From 784f46c441ff7e5011a9e08809d864b1b5471d37 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Tue, 14 Feb 2023 11:44:38 +0100 Subject: [PATCH 17/21] fixup! feat: schema migration --- resources/.touch | 0 resources/DATAHIKE_VERSION | 1 + 2 files changed, 1 insertion(+) create mode 100644 resources/.touch create mode 100644 resources/DATAHIKE_VERSION diff --git a/resources/.touch b/resources/.touch new file mode 100644 index 000000000..e69de29bb diff --git a/resources/DATAHIKE_VERSION b/resources/DATAHIKE_VERSION new file mode 100644 index 000000000..b457ec6ad --- /dev/null +++ b/resources/DATAHIKE_VERSION @@ -0,0 +1 @@ +0.6.1547 \ No newline at end of file From 96472a1984c36f0e92c9446a3310ebc07f63036f Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Tue, 14 Feb 2023 15:51:11 +0100 Subject: [PATCH 18/21] fixup! feat: schema migration --- src/datahike/norm/norm.clj | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index b083f95c5..801b64d82 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -7,9 +7,10 @@ [clojure.edn :as edn] [clojure.spec.alpha :as s] [taoensso.timbre :as log] - [datahike.api :as d] [hasch.core :as h] - [hasch.platform :as hp]) + [hasch.platform :as hp] + [datahike.api :as d] + [datahike.tools :as dt]) (:import [java.io File] [java.util.jar JarFile JarEntry] @@ -53,10 +54,7 @@ (filter #(.isFile %)) (filter #(string/ends-with? (.getPath %) ".edn")))] (into [] xf migration-files)) - (throw - (ex-info - (format "Norms folder %s does not exist." file) - {:folder file})))) + (dt/raise (format "Norms folder %s does not exist." (str file)) {:folder file}))) (defmethod ^:private retrieve-file-list URL [resource] (if resource @@ -71,14 +69,10 @@ (string/ends-with? % ".edn")))) (->> (file-seq (io/file abs-path)) (filter #(not (.isDirectory %)))))) - (throw - (ex-info - "Resource does not exist." - {:resource (str resource)})))) + (dt/raise "Resource does not exist." {:resource (str resource)}))) (defmethod ^:private retrieve-file-list :default [arg] - (throw (ex-info "Can only read a File or a URL (resource)" {:arg arg - :type (type arg)}))) + (dt/raise "Can only read a File or a URL (resource)" {:arg arg :type (type arg)})) (defn- filter-file-list [file-list] (filter #(and (string/ends-with? % ".edn") @@ -95,7 +89,7 @@ (defmethod ^:private read-edn-file File [f _file] (when (not (.exists f)) - (throw (ex-info "Failed reading file because it does not exist" {:filename (str f)}))) + (dt/raise "Failed reading file because it does not exist" {:filename (str f)})) [(-> (slurp f) edn/read-string) {:name (.getName f) @@ -103,7 +97,7 @@ (defmethod ^:private read-edn-file JarEntry [entry resource] (when (nil? resource) - (throw (ex-info "Failed reading resource because it does not exist" {:resource (str resource)}))) + (dt/raise "Failed reading resource because it does not exist" {:resource (str resource)})) (let [file-name (-> (.getName entry) (string/split #"/") peek)] @@ -115,8 +109,7 @@ :norm (filename->keyword file-name)}])) (defmethod ^:private read-edn-file :default [t _] - (throw (ex-info "Can not handle argument" {:type (type t) - :arg t}))) + (dt/raise "Can not handle argument" {:type (type t) :arg t})) (defn- read-norm-files [norm-list file-or-resource] (->> norm-list @@ -139,9 +132,7 @@ (let [edn-content (-> (read-edn-file checksums-edn file-or-resource) first) diff (data/diff checksums edn-content)] (when-not (every? nil? (butlast diff)) - (throw - (ex-info "Deviation of the checksums found. Migration aborted." - {:diff diff}))))) + (dt/raise "Deviation of the checksums found. Migration aborted." {:diff diff})))) (s/def ::tx-data vector?) (s/def ::tx-fn symbol?) @@ -150,8 +141,7 @@ (if (s/valid? ::norm-map norm) (log/debug "Norm validated" {:norm-map norm}) (let [res (s/explain-data ::norm-map norm)] - (throw - (ex-info "Invalid norm" {:validation-error res}))))) + (dt/raise "Invalid norm" {:validation-error res})))) (defn- neutral-fn [_] []) From e6cc53642ed3d41644a7dd148bdedd94f115116d Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Wed, 15 Feb 2023 12:17:05 +0100 Subject: [PATCH 19/21] fixup! feat: schema migration --- doc/schema-migration.md | 2 +- resources/DATAHIKE_VERSION | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 resources/DATAHIKE_VERSION diff --git a/doc/schema-migration.md b/doc/schema-migration.md index 87cbc9a86..0f415a77f 100644 --- a/doc/schema-migration.md +++ b/doc/schema-migration.md @@ -21,7 +21,7 @@ with dashes to compose the id. 2. Write the transactions itself into your newly created file. The content of the file needs to be an edn-map with one or both of the keys `:tx-data` and `tx-fn`. `:tx-data` is just -transaction data in the form of a vector, `:tx-fn` is a funtion that you can run during the +transaction data in the form of a vector, `:tx-fn` is a function that you can run during the execution to migrate data from one attribute to another for example. This function needs to be qualified and callable from the classpath. It will be evaluated during the migration and needs to return transactions. These transactions will be transacted with `:tx-data` together diff --git a/resources/DATAHIKE_VERSION b/resources/DATAHIKE_VERSION deleted file mode 100644 index b457ec6ad..000000000 --- a/resources/DATAHIKE_VERSION +++ /dev/null @@ -1 +0,0 @@ -0.6.1547 \ No newline at end of file From 14bd3b0ebb4da4bc13e867b5416f73c68701f6d5 Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Fri, 10 Mar 2023 13:34:42 +0100 Subject: [PATCH 20/21] fixup! feat: schema migration --- doc/schema-migration.md | 63 ++++++++++++++++++++++++-------- src/datahike/norm/norm.clj | 44 +++++++++++++++------- test/datahike/norm/norm_test.clj | 8 +++- 3 files changed, 85 insertions(+), 30 deletions(-) diff --git a/doc/schema-migration.md b/doc/schema-migration.md index 0f415a77f..29c525a77 100644 --- a/doc/schema-migration.md +++ b/doc/schema-migration.md @@ -1,23 +1,56 @@ -# Schema Migration or Norms +# Schema Migration Schema migration with Datahike is the evolution of your current schema into a future schema. -We are calling the schema migrations 'norms' to avoid misunderstandings with a migration -from an older version of Datahike to a newer version. +## Why using the schema-migration tool? +You could use the `transact`-fn of the api-ns to apply your schema, but with our +`norm`-ns you can define your migrations centrally and they will be applied once and only +once to your database. + +This helps when your production database has limited accessibility from your developer +machines and you want to apply the migrations from a server next to your production code. +In case you are setting up your database from scratch for e.g. development purpose you can +rely on your schema to be up-to-date with your production environment because you are +keeping your original schema along with your migrations in a central repository. + +## How to migrate a database schema When we are speaking of changes to your schema, these should always add new definitions and never change existing definitions. In case you want to change existing data to a new format you will have to create a new schema and transact your existing data transformed again. A good intro to this topic [can be found here](https://docs.datomic.com/cloud/schema/schema-change.html). +## Transaction-functions +Your transaction functions need to be on your classpath to be called and they need to take +one argument, the connection to your database. Each function needs to return a vector of +transactions so that they can be applied during migration. + +Please be aware that with transaction-functions you will create transactions that need to be +held in memory. Very large migrations might exceed your memory. + +## Norms? +Like [conformity for Datomic](https://github.com/avescodes/conformity) we are using the term +norm for our tool. You can use it to declare expectations about the state of your database +and enforce those idempotently without repeatedly transacting schema. These expectations +can be the form of your schema, data in a certain format or pre-transacted data for e.g. +a development database. + +## Migration folder +Preferably create a folder in your project resources called `migrations`. You can however +use any folder you like even outside your resources. If you don't want to package the +migrations into a jar you can just run the migration-functions with a path as string passed. +In your migration-folder you store your migration-files. Be aware that your chosen +migration-folder will include all subfolders for reading the migrations. Don't store +other files in your migration-folder besides your migrations! + ## How to migrate 1. Create a folder of your choice, for now let's call it `migrations`. In this folder you create a new file with an edn-extension like `001-my-first-norm.edn`. Preferably you name the file beginning with a number. Please be aware that the name of your file will be the id of -your norm. Taking into account that you might create some more norms in the future +your migration. Taking into account that you might create some more migrations in the future you should left-pad the names with zeros to keep a proper sorting. Keep in mind that your -norms are transacted sorted after your chosen ids one after another. Spaces will be replaced -with dashes to compose the id. +migrations are transacted sorted after your chosen ids one after another. Spaces will be +replaced with dashes to compose the id. 2. Write the transactions itself into your newly created file. The content of the file needs to be an edn-map with one or both of the keys `:tx-data` and `tx-fn`. `:tx-data` is just @@ -36,17 +69,17 @@ Example of a migration: :tx-fn my-transactions.my-project/my-first-tx-fn} ``` -3. When you are sufficiently confident that your norm will work you usually want to store -it into some kind of version control system. To avoid conflicts with fellow colleagues we +3. When you are sufficiently confident that your migrations will work you usually want to store +it in some kind of version control system. To avoid conflicts with fellow colleagues we implemented a security net. Run the function `update-checksums!` from the `datahike.norm.norm` -namespace to create or update a `checksums.edn` file inside your norms-folder. This file +namespace to create or update a `checksums.edn` file inside your migrations-folder. This file contains the names and checksums of your migration-files. In case a colleague of yours checked in a migration that you have not been aware of, your VCS should avoid merging the conflicting `checksums.edn` file. -4. Run the `datahike.norm.norm/ensure-norms!` function to apply your norms. For each norm -that is already applied there will be a `:tx/norm` attribute stored with the id of your -norm so it will not be applied twice. - -Be aware that your chosen norm-folder will include all subfolders for reading the norms. -Don't store other files in your norm-folder besides your norms! +4. To apply your migrations you most likely want to package the migrations into a jar together +with datahike and a piece of code that actually runs your migrations and run it on a server. +You should check the correctness of the checksums with `datahike.norm.norm/verify-checksums` +and finally run the `datahike.norm.norm/ensure-norms!` function to apply your migrations. For +each migration that is already applied there will be a `:tx/norm` attribute stored with the +id of your migration so it will not be applied twice. diff --git a/src/datahike/norm/norm.clj b/src/datahike/norm/norm.clj index 801b64d82..2992d4390 100644 --- a/src/datahike/norm/norm.clj +++ b/src/datahike/norm/norm.clj @@ -128,12 +128,6 @@ hp/hash->str))) {}))) -(defn- verify-checksums [checksums checksums-edn file-or-resource] - (let [edn-content (-> (read-edn-file checksums-edn file-or-resource) first) - diff (data/diff checksums edn-content)] - (when-not (every? nil? (butlast diff)) - (dt/raise "Deviation of the checksums found. Migration aborted." {:diff diff})))) - (s/def ::tx-data vector?) (s/def ::tx-fn symbol?) (s/def ::norm-map (s/keys :opt-un [::tx-data ::tx-fn])) @@ -162,6 +156,36 @@ ((var-get (requiring-resolve tx-fn)) conn)))}) (log/info "Done")))))) +(defn- diff-checksums [checksums edn-content] + (let [diff (data/diff checksums edn-content)] + (when-not (every? nil? (butlast diff)) + (dt/raise "Deviation of the checksums found. Migration aborted." {:diff diff})))) + +(defmulti verify-checksums + (fn [file-or-resource] (type file-or-resource))) + +(defmethod verify-checksums File [file] + (let [norm-list (-> (retrieve-file-list file) + filter-file-list + (read-norm-files file)) + edn-content (-> (io/file (io/file file) checksums-file) + (read-edn-file file) + first)] + (diff-checksums (compute-checksums norm-list) + edn-content))) + +(defmethod verify-checksums URL [resource] + (let [file-list (retrieve-file-list resource) + norm-list (-> (filter-file-list file-list) + (read-norm-files resource)) + edn-content (-> (->> file-list + (filter #(-> (.getName %) (string/ends-with? checksums-file))) + first) + (read-edn-file resource) + first)] + (diff-checksums (compute-checksums norm-list) + edn-content))) + (defmulti ^:private ensure-norms (fn [_conn file-or-resource] (type file-or-resource))) @@ -169,20 +193,12 @@ (let [norm-list (-> (retrieve-file-list file) filter-file-list (read-norm-files file))] - (verify-checksums (compute-checksums norm-list) - (io/file (io/file file) checksums-file) - file) (transact-norms conn norm-list))) (defmethod ^:private ensure-norms URL [conn resource] (let [file-list (retrieve-file-list resource) norm-list (-> (filter-file-list file-list) (read-norm-files resource))] - (verify-checksums (compute-checksums norm-list) - (->> file-list - (filter #(-> (.getName %) (string/ends-with? checksums-file))) - first) - resource) (transact-norms conn norm-list))) (defn ensure-norms! diff --git a/test/datahike/norm/norm_test.clj b/test/datahike/norm/norm_test.clj index 15fb55517..474e9fae3 100644 --- a/test/datahike/norm/norm_test.clj +++ b/test/datahike/norm/norm_test.clj @@ -3,13 +3,14 @@ [clojure.string :as string] [clojure.java.io :as io] [datahike.api :as d] - [datahike.norm.norm :as sut] + [datahike.norm.norm :as sut :refer [verify-checksums]] [datahike.test.utils :as tu])) (def ensure-norms #'sut/ensure-norms) (deftest simple-test (let [conn (tu/setup-db {} true) + _ (verify-checksums (io/file "test/datahike/norm/resources/simple-test")) _ (ensure-norms conn (io/file "test/datahike/norm/resources/simple-test")) schema (d/schema (d/db conn))] (is (= #:db{:valueType :db.type/string, :cardinality :db.cardinality/one, :doc "Place of occupation", :ident :character/place-of-occupation} @@ -33,9 +34,11 @@ (deftest tx-fn-test (let [conn (tu/setup-db {} true) + _ (verify-checksums (io/file "test/datahike/norm/resources/tx-fn-test/first")) _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-fn-test/first")) _ (d/transact conn {:tx-data [{:character/place-of-occupation "SPRINGFIELD ELEMENTARY SCHOOL"} {:character/place-of-occupation "SPRINGFIELD NUCLEAR POWER PLANT"}]}) + _ (verify-checksums (io/file "test/datahike/norm/resources/tx-fn-test/second")) _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-fn-test/second"))] (is (= #{["springfield elementary school"] ["springfield nuclear power plant"]} (d/q '[:find ?v @@ -60,12 +63,14 @@ (deftest tx-data-and-tx-fn-test (let [conn (tu/setup-db {} true) + _ (verify-checksums (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/first")) _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/first")) _ (d/transact conn {:tx-data [{:character/name "Homer Simpson"} {:character/name "Marge Simpson"} {:character/name "Bart Simpson"} {:character/name "Lisa Simpson"} {:character/name "Maggie Simpson"}]}) + _ (verify-checksums (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/second")) _ (ensure-norms conn (io/file "test/datahike/norm/resources/tx-data-and-tx-fn-test/second")) margehomer (d/q '[:find [?e ...] :where @@ -100,6 +105,7 @@ (deftest naming-and-sorting-test (let [conn (tu/setup-db {} true) + _ (verify-checksums (io/file "test/datahike/norm/resources/naming-and-sorting-test")) _ (sut/ensure-norms! conn (io/file "test/datahike/norm/resources/naming-and-sorting-test")) lisabart (d/q '[:find [?e ...] :where From 656aa741ade70a01fa12e0c10bebb6a3eced61ff Mon Sep 17 00:00:00 2001 From: Timo Kramer Date: Wed, 15 Mar 2023 11:33:01 +0100 Subject: [PATCH 21/21] fixup! remove orchestra from prod (#612) --- bb/resources/native-image-tests/run-normtests | 6 ------ doc/schema-migration.md | 2 +- resources/.touch | 0 3 files changed, 1 insertion(+), 7 deletions(-) delete mode 100755 bb/resources/native-image-tests/run-normtests delete mode 100644 resources/.touch diff --git a/bb/resources/native-image-tests/run-normtests b/bb/resources/native-image-tests/run-normtests deleted file mode 100755 index 58ffaabe0..000000000 --- a/bb/resources/native-image-tests/run-normtests +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit -set -o pipefail - -TIMBRE_LEVEL=':fatal' bin/kaocha --focus :norm "$@" diff --git a/doc/schema-migration.md b/doc/schema-migration.md index 29c525a77..7c00b5e42 100644 --- a/doc/schema-migration.md +++ b/doc/schema-migration.md @@ -2,7 +2,7 @@ Schema migration with Datahike is the evolution of your current schema into a future schema. -## Why using the schema-migration tool? +## Why using the schema migration tool? You could use the `transact`-fn of the api-ns to apply your schema, but with our `norm`-ns you can define your migrations centrally and they will be applied once and only once to your database. diff --git a/resources/.touch b/resources/.touch deleted file mode 100644 index e69de29bb..000000000