arrival of data version 2021

ETCBC · Aug 24, 2021 · 8737b2b · 8737b2b
1 parent 035e9ce
commit 8737b2b
Show file tree

Hide file tree

Showing 742 changed files with 36,122,961 additions and 18,169 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+select = C,E,F,W,B,B950
+ignore = E203, E501, W503
+builtins = C,E,Eall,Es,F,Fall,Fs,L,N,S,T,TF
diff --git a/docs/features/0_home.md b/docs/features/0_home.md
@@ -1,10 +1,7 @@
-This is *continuous* version **c**.
+This is data version **2021**.
 
-To be updated regularly.
-
-Viewable in [SHEBANQ]({{shebanq}}).
-
-**the weekly updates have not started yet**.
+Viewable in [SHEBANQ]({{shebanq}}) shortly
+(currently the newest stable version in SHEBANQ is 2017).
 
 ## Introduction
 This is the key to the meaning of the features of the

diff --git a/docs/mql.md b/docs/mql.md
@@ -17,7 +17,7 @@ The good news is the existence of the other tool,
 This is an **offline** tool based on exactly the same data that powers SHEBANQ.
 The programming researcher can use Text-Fabric as a preprocessing tool for transforming the complex BHSA data into the formats that are suitable to
 R, spreadsheets, or any format of choice.
-Text-Fabric is open source, downloadable from [GitHub]({{tf}}),
+Text-Fabric is open source, can be installed by `pip install text-fabric`
 and the data is downloadable from [bhsa]({{repo}}).
 
 It can be installed on MacOS, Windows and Linux.

diff --git a/docs/news.md b/docs/news.md
@@ -1,5 +1,10 @@
 # Updates
 
+## 2021-08-24
+
+Data version `2021` has arrived from the ETCBC. According to Constantijn Sikkel the most
+consistent version ever.
+
 ## 2019-01-31
 
 Some features only had values for lexeme nodes: `gloss nametype voc_lex voc_lex_utf8`.
@@ -26,6 +31,8 @@ So I added the values of these features for lexemes to all of their occurrences.
 There is a fixed version, 2017 (imported on 2017-10-06), and a nearly identical version `c`,
 imported at the same time. 
 Version `c` is continuous and will be frequently updated.
+(Spoiler as of 2021-08-24: these updates have only happened a few times.
+We discontinue continuous versions. All new versions will be stable versions.)
 
 ## 2017-10-05
 

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -13,8 +13,6 @@ extra:
   repo: https://github.com/ETCBC/bhsa
   tfBase: https://github.com/annotation/text-fabric/blob/master
   tut: https://nbviewer.jupyter.org/github/annotation/tutorials/blob/master/bhsa
-  tf: https://github.com/Dans-labs/text-fabric
-  tfw: https://github.com/Dans-labs/text-fabric/wiki
   tfd: https://annotation.github.io/text-fabric/tf
   lf: https://github.com/Dans-labs/laf-fabric
   shebanq: https://shebanq.ancient-data.org

diff --git a/primer/example10a.ipynb b/primer/example10a.ipynb
@@ -48,9 +48,8 @@
    "outputs": [],
    "source": [
     "from tf.app import use\n",
-    "from tf.core.helpers import project\n",
     "\n",
-    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS"
+    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS\n"
    ]
   },
   {
@@ -628,9 +627,9 @@
     }
    ],
    "source": [
-    "VERSION = '2017'\n",
+    "VERSION = \"2017\"\n",
     "# A = use('bhsa', hoist=globals(), version=VERSION)\n",
-    "A = use('bhsa:clone', checkout=\"clone\", hoist=globals(), version=VERSION)"
+    "A = use(\"bhsa:clone\", checkout=\"clone\", hoist=globals(), version=VERSION)"
    ]
   },
   {
@@ -814,8 +813,7 @@
     "clause\n",
     "  =: phrases1\n",
     "  <: phraseNTNps\n",
-    "```\n",
-    "  "
+    "```\n"
    ]
   },
   {
@@ -973,7 +971,8 @@
     "        consecutivePhrases.append(phrase)\n",
     "        maxPos = end\n",
     "    return tuple(consecutivePhrases)\n",
-    "    \n",
+    "\n",
+    "\n",
     "results = []\n",
     "\n",
     "objlike = {\"Objc\", \"Cmpl\"}\n",
@@ -983,15 +982,15 @@
     "    ntn = r[1]\n",
     "    obj = r[3] if len(r) == 4 else -1\n",
     "    phrases = getPhrases(cl)\n",
-    "    \n",
+    "\n",
     "    predSeen = False\n",
     "    objSeen = False\n",
-    "    \n",
+    "\n",
     "    good = True\n",
-    "    \n",
+    "\n",
     "    for p in getPhrases(cl):\n",
     "        fn = F.function.v(p)\n",
-    "            \n",
+    "\n",
     "        if not predSeen and not objSeen:\n",
     "            if p == ntn:\n",
     "                predSeen = True\n",
@@ -1024,24 +1023,24 @@
     "                break\n",
     "            else:\n",
     "                continue\n",
-    "                \n",
+    "\n",
     "    if not good:\n",
     "        continue\n",
-    "    \n",
+    "\n",
     "    if not predSeen:\n",
     "        continue\n",
-    "        \n",
+    "\n",
     "    pfn = F.function.v(ntn)\n",
-    "    \n",
+    "\n",
     "    if objSeen:\n",
     "        if pfn == \"PreO\":\n",
     "            continue\n",
     "    else:\n",
     "        if pfn != \"PreO\":\n",
     "            continue\n",
-    "            \n",
+    "\n",
     "    results.append((cl, ntn, obj))\n",
-    "    \n",
+    "\n",
     "print(f\"{len(results)} results\")"
    ]
   },

diff --git a/primer/example10b.ipynb b/primer/example10b.ipynb
@@ -48,9 +48,8 @@
    "outputs": [],
    "source": [
     "from tf.app import use\n",
-    "from tf.core.helpers import project\n",
     "\n",
-    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS"
+    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS\n"
    ]
   },
   {
@@ -628,9 +627,9 @@
     }
    ],
    "source": [
-    "VERSION = '2017'\n",
+    "VERSION = \"2017\"\n",
     "# A = use('bhsa', hoist=globals(), version=VERSION)\n",
-    "A = use('bhsa:clone', checkout=\"clone\", hoist=globals(), version=VERSION)"
+    "A = use(\"bhsa:clone\", checkout=\"clone\", hoist=globals(), version=VERSION)"
    ]
   },
   {
@@ -708,7 +707,7 @@
     "    word vs=qal lex=NTN[\n",
     "  phrase function=Cmpl\n",
     "    =: word lex=L\n",
-    "\"\"\"  "
+    "\"\"\""
    ]
   },
   {
@@ -1058,7 +1057,7 @@
     "    word vs=qal lex=NTN[\n",
     "  phrase function=Cmpl\n",
     "    =: word lex=L\n",
-    "\"\"\"  "
+    "\"\"\""
    ]
   },
   {
@@ -1169,7 +1168,7 @@
     "    word vs=qal lex=NTN[\n",
     "  <: phrase function=Cmpl\n",
     "    =: word lex=L\n",
-    "\"\"\"  "
+    "\"\"\""
    ]
   },
   {
@@ -1284,7 +1283,7 @@
     "    word lex=NTN[\n",
     "  phrase function=Subj\n",
     "    =: word lex=HJ>\n",
-    "\"\"\"  "
+    "\"\"\""
    ]
   },
   {

diff --git a/primer/example2.ipynb b/primer/example2.ipynb
@@ -48,9 +48,8 @@
    "outputs": [],
    "source": [
     "from tf.app import use\n",
-    "from tf.core.helpers import project\n",
     "\n",
-    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS"
+    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS\n"
    ]
   },
   {
@@ -628,9 +627,9 @@
     }
    ],
    "source": [
-    "VERSION = '2017'\n",
+    "VERSION = \"2017\"\n",
     "# A = use('bhsa', hoist=globals(), version=VERSION)\n",
-    "A = use('bhsa:clone', checkout=\"clone\", hoist=globals(), version=VERSION)"
+    "A = use(\"bhsa:clone\", checkout=\"clone\", hoist=globals(), version=VERSION)"
    ]
   },
   {
@@ -799,7 +798,7 @@
     "\n",
     "is *not* matched by words 116853 and **116854**, because 116854 is not part of the embedding clause.\n",
     "\n",
-    "The Text-Fabric notion of adjacency is more crude. \n",
+    "The Text-Fabric notion of adjacency is more crude.\n",
     "The reason is that in Text-Fabric, the query does not have to be a tree, where each object has a unique\n",
     "immediate embedder object. There could be several embedder objects in the query, and each of the them may have different\n",
     "gaps, and if we had the concept of relative adjacency, our query language would need a way to express relative to which object\n",
@@ -969,19 +968,19 @@
     "Let's make a mental shift: what *is* the intention of the MQL query?\n",
     "Here is a bit of query-exegesis, in that the query itself is the object of the exegesis.\n",
     "\n",
-    "The MQL query mentions three `[word]` objects, but it puts only the first two of them in `FOCUS`. \n",
+    "The MQL query mentions three `[word]` objects, but it puts only the first two of them in `FOCUS`.\n",
     "\n",
     "1. it is not interested in the actual value of the third one;\n",
     "2. the third `[word]` is constrained by a very loose restriction: it can be anything, except two specific values.\n",
     "\n",
     "These two things point to the intended meaning of the query, namely:\n",
     "\n",
-    "> find a clause with the word `FJM[`, and somewhere after that the word `L`, \n",
+    "> find a clause with the word `FJM[`, and somewhere after that the word `L`,\n",
     "which is not followed by either the word `<JN/` or the word `PNH/`.\n",
     "\n",
     "This differs subtly from what the query actually says:\n",
     "\n",
-    "> find a clause with the word `FJM[`, and somewhere after that the word `L`, \n",
+    "> find a clause with the word `FJM[`, and somewhere after that the word `L`,\n",
     "which is followed by another word that is not `<JN/` and not `PNH/`.\n",
     "\n",
     "The difference is one of *quantification*.\n",
@@ -1013,7 +1012,7 @@
     "\n",
     "```\n",
     "\n",
-    "See \n",
+    "See\n",
     "[Dirk Roorda: Example 2: not exist](https://shebanq.ancient-data.org/hebrew/query?version=2017&id=4467)"
    ]
   },
@@ -1319,15 +1318,15 @@
     "\n",
     "We tried to improve the MQL query by using `NOTEXIST`, but that did not work out.\n",
     "\n",
-    "However, the TF query might include (contrived) cases that the MQL query would rightfully skip. \n",
+    "However, the TF query might include (contrived) cases that the MQL query would rightfully skip.\n",
     "We can verify whether those cases actually exist by running a separate TF query, and it turns out they do not exist.\n",
     "\n",
     "**Lesson**\n",
     "\n",
     "Whenever an exegesis hinges on the results of a query, check and double check.\n",
     "You probably will have to run multiple queries in SHEBANQ and combine the results.\n",
     "This will quickly get very cumbersome.\n",
-    "If that happens, it starts to pay off to use Text-Fabric, where you have more complete power over \n",
+    "If that happens, it starts to pay off to use Text-Fabric, where you have more complete power over\n",
     "the computations and their results."
    ]
   }

diff --git a/primer/example7.ipynb b/primer/example7.ipynb
@@ -48,9 +48,8 @@
    "outputs": [],
    "source": [
     "from tf.app import use\n",
-    "from tf.core.helpers import project\n",
     "\n",
-    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS"
+    "from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS\n"
    ]
   },
   {
@@ -628,9 +627,9 @@
     }
    ],
    "source": [
-    "VERSION = '2017'\n",
+    "VERSION = \"2017\"\n",
     "# A = use('bhsa', hoist=globals(), version=VERSION)\n",
-    "A = use('bhsa:clone', checkout=\"clone\", hoist=globals(), version=VERSION)"
+    "A = use(\"bhsa:clone\", checkout=\"clone\", hoist=globals(), version=VERSION)"
    ]
   },
   {
@@ -754,7 +753,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "query1= \"\"\"\n",
+    "query1 = \"\"\"\n",
     "clause\n",
     "  phrase function=PreO|PtcO\n",
     "    word sp=verb vs=qal lex=FJM[\n",
@@ -995,6 +994,9 @@
   }
  ],
  "metadata": {
+  "jupytext": {
+   "encoding": "# -*- coding: utf-8 -*-"
+  },
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",