diff --git a/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql index 525ce56da..df2c016da 100644 --- a/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Arabic/nouns/query_nouns.sparql @@ -1,17 +1,35 @@ # tool: scribe-data -# All Arabic (Q13955) nouns. +# All Arabic (Q13955) nouns, their plurals and their dual forms. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?singular + ?plural + ?dual WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q13955 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?singular . + FILTER(?nounType = ?nounTypes) + + # Optional selection of indefinite plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?pluralForm . + ?pluralForm ontolex:representation ?plural ; + wikibase:grammaticalFeature wd:Q53997857 ; + wikibase:grammaticalFeature wd:Q146786 ; + } . + + # Optional selection of indefinite dual form. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?dualForm . + ?dualForm ontolex:representation ?dual ; + wikibase:grammaticalFeature wd:Q53997857 ; + wikibase:grammaticalFeature wd:Q110022 ; + } . } diff --git a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_1.sparql index 09e174860..4d131dac3 100644 --- a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_1.sparql +++ b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_1.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb ?presFPS ?presSPSM ?presSPSF ?presTPSM ?presTPSF ?presSPD ?presTPDM ?presTPDF diff --git a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_2.sparql index 1bd72006f..afc15b916 100644 --- a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_2.sparql +++ b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_2.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb ?pastFPS ?pastSPSM ?pastSPSF ?pastTPSM ?pastTPSF ?pastSPD ?pastTPDM ?pastTPDF diff --git a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_3.sparql b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_3.sparql index ceb388041..3415b834b 100644 --- a/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_3.sparql +++ b/src/scribe_data/language_data_extraction/Arabic/verbs/query_verbs_3.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb ?impSPSM ?impSPSF ?impSPD ?impSPPM ?impSPPF diff --git a/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql index f922a2be6..880d4a8df 100644 --- a/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Basque/nouns/query_nouns.sparql @@ -1,17 +1,35 @@ # tool: scribe-data -# All Basque (Q8752) nouns. +# All Basque (Q8752) nouns and all implemented singular and plural forms. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?absIndefinite + ?absSingular + ?absPlural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q8752 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?absIndefinite . + FILTER(?nounType = ?nounTypes) + + # Optional selection of absolutive singular forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?absSingularForm . + ?absSingularForm ontolex:representation ?absSingular ; + wikibase:grammaticalFeature wd:Q332734 ; + wikibase:grammaticalFeature wd:Q110786 ; + } . + + # Optional selection of absolutive plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?absPluralForm . + ?absPluralForm ontolex:representation ?absPlural ; + wikibase:grammaticalFeature wd:Q332734 ; + wikibase:grammaticalFeature wd:Q146786 ; + } . } diff --git a/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql index 2ecf5aaff..744e4e2a3 100644 --- a/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Basque/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Bengali/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Bengali/adjectives/query_adjectives.sparql index 5998320f1..63eb8d422 100644 --- a/src/scribe_data/language_data_extraction/Bengali/adjectives/query_adjectives.sparql +++ b/src/scribe_data/language_data_extraction/Bengali/adjectives/query_adjectives.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?adjective WHERE { diff --git a/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql index 8de5cca07..34c0ec6bf 100644 --- a/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Bengali/nouns/query_nouns.sparql @@ -1,17 +1,47 @@ # tool: scribe-data -# All Bengali (Q9610) nouns. +# All Bengali (Q9610) nouns and their forms in the various cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nominative + ?genitive + ?accusative + ?locative WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9610 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) + + # Optional selection of nominative form. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?nomForm . + ?nomForm ontolex:representation ?nominative ; + wikibase:grammaticalFeature wd:Q131105 ; + } . + + # Optional selection of genitive form. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?genForm . + ?genForm ontolex:representation ?genitive ; + wikibase:grammaticalFeature wd:Q146233 ; + } . + + # Optional selection of accusative form. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?accForm . + ?accForm ontolex:representation ?accusative ; + wikibase:grammaticalFeature wd:Q146078 ; + } . + + # Optional selection of locative form. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?locForm . + ?locForm ontolex:representation ?locative ; + wikibase:grammaticalFeature wd:Q202142 ; + } . } diff --git a/src/scribe_data/language_data_extraction/Bengali/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Bengali/prepositions/query_prepositions.sparql index a79725219..f06cf1739 100644 --- a/src/scribe_data/language_data_extraction/Bengali/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Bengali/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/Bengali/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Bengali/verbs/query_verbs.sparql index bb2cf0f34..0c8eadd68 100644 --- a/src/scribe_data/language_data_extraction/Bengali/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Bengali/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git "a/src/scribe_data/language_data_extraction/Bokm\303\245l/nouns/query_nouns.sparql" "b/src/scribe_data/language_data_extraction/Bokm\303\245l/nouns/query_nouns.sparql" index df6e96ad4..84d5b6d53 100644 --- "a/src/scribe_data/language_data_extraction/Bokm\303\245l/nouns/query_nouns.sparql" +++ "b/src/scribe_data/language_data_extraction/Bokm\303\245l/nouns/query_nouns.sparql" @@ -1,41 +1,55 @@ # tool: scribe-data -# All Bokmål (Norwegian) (Q9043) nouns, their plural and their gender. +# All Bokmål Norwegian (Q9043) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. # Note that this query is for Bokmål (Q25167) rather than Nynorsk (Q25164). SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?indefSingular + ?defSingular + ?indefPlural + ?defPlural ?gender WHERE { - - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q25167 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?indefSingular . + + FILTER(?nounType = ?nounTypes) - # Optional selection of singular forms. + # Optional selection of definite singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; + ?lexeme ontolex:lexicalForm ? ?defSingularForm . + ?defSingularForm ontolex:representation ?defSingular ; wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q53997851 ; + } . + + # Optional selection of indefinite plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?indefPluralForm . + ?indefPluralForm ontolex:representation ?indefPlural ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q53997857 ; } . - # Optional selection of plural forms. + # Optional selection of definite plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; + ?lexeme ontolex:lexicalForm ?defPluralForm . + ?defPluralForm ontolex:representation ?defPlural ; wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q53997851 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git "a/src/scribe_data/language_data_extraction/Bokm\303\245l/verbs/query_verbs.sparql" "b/src/scribe_data/language_data_extraction/Bokm\303\245l/verbs/query_verbs.sparql" index 6d9029560..dc79f0f0e 100644 --- "a/src/scribe_data/language_data_extraction/Bokm\303\245l/verbs/query_verbs.sparql" +++ "b/src/scribe_data/language_data_extraction/Bokm\303\245l/verbs/query_verbs.sparql" @@ -4,7 +4,7 @@ # Note that this query is for Bokmål (Q25167) rather than Nynorsk (Q25164). SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql index ec92d40f1..e662b7f51 100644 --- a/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Czech/nouns/query_nouns.sparql @@ -1,41 +1,43 @@ # tool: scribe-data -# All Czeck (Q9056) nouns, their plural, and their gender. +# All Czeck (Q9056) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9056 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . - # Optional selection of singular forms. + FILTER(?nounType = ?nounTypes) + + # Optional selection of nominative singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; + ?lexeme ontolex:lexicalForm ?nomSingularForm . + ?nomSingularForm ontolex:representation ?nomSingular ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q110786 ; } . - # Optional selection of plural forms. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Czech/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Czech/prepositions/query_prepositions.sparql index 0a333f3fb..f8283cc34 100644 --- a/src/scribe_data/language_data_extraction/Czech/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Czech/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/Czech/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Czech/verbs/query_verbs.sparql index 6b314740f..c750bc93e 100644 --- a/src/scribe_data/language_data_extraction/Czech/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Czech/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql index bd9407210..0dbed5ca4 100644 --- a/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Danish/nouns/query_nouns.sparql @@ -1,27 +1,21 @@ # tool: scribe-data -# All Danish (Q9035) nouns, their plural and their gender. +# All Danish (Q9035) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9035 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { @@ -33,7 +27,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Danish/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Danish/verbs/query_verbs.sparql index 6acb8c096..90b15e77d 100644 --- a/src/scribe_data/language_data_extraction/Danish/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Danish/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql index 4c3a4aa31..ed0e22620 100644 --- a/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/English/nouns/query_nouns.sparql @@ -3,24 +3,18 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q1860 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { diff --git a/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql index 6b232849b..80b5efc0e 100644 --- a/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/English/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presTPS diff --git a/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql index 1ee0226d2..bf591a997 100644 --- a/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Esperanto/nouns/query_nouns.sparql @@ -1,33 +1,44 @@ # tool: scribe-data -# All Esperanto (Q143) nouns. +# All Esperanto (Q143) nouns and their plurals for the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?accSingular + ?nomPlural + ?accPlural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q143 ; wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?nomSingular . + FILTER(?nounType = ?nounTypes) - # Optional selection of singular forms. + # Optional selection of accusative singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; + ?lexeme ontolex:lexicalForm ?accSingularForm . + ?accSingularForm ontolex:representation ?accSingular ; + wikibase:grammaticalFeature wd:Q146078 ; wikibase:grammaticalFeature wd:Q110786 ; + } . + + # Optional selection of nominative plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . - # Optional selection of plural forms. + # Optional selection of accusative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; + ?lexeme ontolex:lexicalForm ?accPluralForm . + ?accPluralForm ontolex:representation ?accPlural ; + wikibase:grammaticalFeature wd:Q146078 ; wikibase:grammaticalFeature wd:Q146786 ; - wikibase:grammaticalFeature wd:Q131105 ; } . } diff --git a/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql index 8d67eb59d..78626a928 100644 --- a/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Esperanto/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql index bc114b75a..cd3f12b1c 100644 --- a/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Estonian/nouns/query_nouns.sparql @@ -3,31 +3,24 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9072 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - wikibase:grammaticalFeature wd:Q131105 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { ?lexeme ontolex:lexicalForm ?pluralForm . ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . } diff --git a/src/scribe_data/language_data_extraction/Estonian/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Estonian/prepositions/query_prepositions.sparql index a736b7337..a5e1130c7 100644 --- a/src/scribe_data/language_data_extraction/Estonian/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Estonian/prepositions/query_prepositions.sparql @@ -4,7 +4,7 @@ # Note that this query includes postpositions that are also used in Estonian. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql index 6e4be4a5f..a49e9fc5a 100644 --- a/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Estonian/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql index 1e1c9f3a4..09a5dc8eb 100644 --- a/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Finnish/nouns/query_nouns.sparql @@ -1,31 +1,26 @@ # tool: scribe-data -# All Finnish (Q1412) nouns and their plural. +# All Finnish (Q1412) nouns and their plural for the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q1412 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?nomSingular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) - # Optional selection of plural forms. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; + wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q146786 ; } . } diff --git a/src/scribe_data/language_data_extraction/Finnish/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Finnish/verbs/query_verbs.sparql index 1e6c72362..89acb303d 100644 --- a/src/scribe_data/language_data_extraction/Finnish/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Finnish/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql index 8701c0840..4d4d15531 100644 --- a/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/French/nouns/query_nouns.sparql @@ -1,27 +1,21 @@ # tool: scribe-data -# All French (Q150) nouns, their plural and their gender. +# All French (Q150) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q150 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { @@ -33,7 +27,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/French/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/French/verbs/query_verbs_1.sparql index 7d8c5cb86..c07e02850 100644 --- a/src/scribe_data/language_data_extraction/French/verbs/query_verbs_1.sparql +++ b/src/scribe_data/language_data_extraction/French/verbs/query_verbs_1.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/French/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/French/verbs/query_verbs_2.sparql index 92a9a7ef7..ed9132f92 100644 --- a/src/scribe_data/language_data_extraction/French/verbs/query_verbs_2.sparql +++ b/src/scribe_data/language_data_extraction/French/verbs/query_verbs_2.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?impFPS ?impSPS ?impTPS ?impFPP ?impSPP ?impTPP diff --git a/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql index 65d5b61d2..2d79ef4c8 100644 --- a/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/German/nouns/query_nouns.sparql @@ -1,31 +1,35 @@ # tool: scribe-data -# All German (Q188) nouns, their plural and their gender. +# All German (Q188) nouns, their plurals and their genders in the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural ?gender + WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; - dct:language wd:Q188 ; - wikibase:lexicalCategory ?noun . + dct:language wd:Q188 ; + wikibase:lexicalCategory ?nomSingular . + FILTER(?noun = ?nounTypes) - # Optional selection of nominative plural form. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; - wikibase:grammaticalFeature wd:Q131105 ; + ?lexeme ontolex:lexicalForm ?nonPluralForm . + ?nonPluralForm ontolex:representation ?nonPlural ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q131105 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { @@ -33,6 +37,3 @@ WHERE { ?nounGender rdfs:label ?gender . } } - -LIMIT 10000 -OFFSET OFFSET_BY diff --git a/src/scribe_data/language_data_extraction/German/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/German/prepositions/query_prepositions.sparql index 882b76bc7..bfcd4bb7d 100644 --- a/src/scribe_data/language_data_extraction/German/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/German/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql index 23858d98f..ab8d569a3 100644 --- a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql +++ b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_1.sparql @@ -4,7 +4,7 @@ # Not SELECT DISTINCT as we want to get verbs with both sein and haben as auxiliaries SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql index d54be574a..54dd9dcd6 100644 --- a/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql +++ b/src/scribe_data/language_data_extraction/German/verbs/query_verbs_2.sparql @@ -4,7 +4,7 @@ # Not SELECT DISTINCT as we want to get verbs with both sein and haben as auxiliaries SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?pastParticiple ?auxiliaryVerb ?pretFPS ?pretSPS ?pretTPS ?pretFPP ?pretSPP ?pretTPP diff --git a/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql index 06577206e..d60c1d521 100644 --- a/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Greek/nouns/query_nouns.sparql @@ -1,28 +1,21 @@ # tool: scribe-data -# All Greek (Q36510) nouns, their plural and their gender. +# All Greek (Q36510) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q36510; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q131105 ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { @@ -35,7 +28,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q48277} + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql index c4bda3e44..758e55a72 100644 --- a/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Greek/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql index 99325db42..070b5b3a8 100644 --- a/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Hausa/nouns/query_nouns.sparql @@ -3,28 +3,41 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?singular + ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q56475 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?singular . + FILTER(?nounType = ?nounTypes) + # Optional selection of plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?pluralForm . + ?pluralForm ontolex:representation ?plural ; + wikibase:grammaticalFeature wd:Q146786 ; + } . + # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . - BIND(lang(?noun) as ?language) - FILTER(?language = "ha") - # FILTER(?language = "ha-arabic") + BIND(lang(?singular) as ?langSingular) + BIND(lang(?plural) as ?langPlural) + FILTER(?langSingular = "ha") + FILTER(?langPlural = "ha") + # FILTER(?langSingular = "ha-arabic") + # FILTER(?langPlural = "ha-arabic") SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". diff --git a/src/scribe_data/language_data_extraction/Hausa/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hausa/verbs/query_verbs.sparql index 8dfadea81..230ce6c59 100644 --- a/src/scribe_data/language_data_extraction/Hausa/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Hausa/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql index ed75faa89..0848d51d8 100644 --- a/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Hebrew/nouns/query_nouns.sparql @@ -1,29 +1,41 @@ # tool: scribe-data -# All Hebrew (Q9288) nouns and their gender. +# All Hebrew (Q9288) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun + ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9288 ; wikibase:lexicalCategory ?nounType ; wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) + # Optional selection of plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?pluralForm . + ?pluralForm ontolex:representation ?plural ; + wikibase:grammaticalFeature wd:Q146786 ; + } . + # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . - BIND(lang(?noun) as ?language) - FILTER(?language = "he") + BIND(lang(?noun) as ?langSingular) + BIND(lang(?plural) as ?langPlural) + FILTER(?langSingular = "he") + FILTER(?langPlural = "he") SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". diff --git a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs.sparql index 6ebfb0f97..5c9d39f0b 100644 --- a/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Hebrew/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql index 2dffb22dc..63480cf74 100644 --- a/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Hindi/nouns/query_nouns.sparql @@ -1,30 +1,42 @@ # tool: scribe-data -# All Hindustani (Q11051) nouns and their gender. +# All Hindi (from Hindustani Q11051) nouns and their gender. # Enter this query at https://query.wikidata.org/. # Note the necessity to filter for "hi" to remove Urdu (ur) words. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?singular + ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q11051 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?singular . + FILTER(?nounType = ?nounTypes) + # Optional selection of plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?pluralForm . + ?pluralForm ontolex:representation ?plural ; + wikibase:grammaticalFeature wd:Q146786 ; + } . + # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . - BIND(lang(?noun) as ?language) - FILTER(?language = "hi") + BIND(lang(?singular) as ?langSingular) + BIND(lang(?plural) as ?langPlural) + FILTER(?langSingular = "hi") + FILTER(?langPlural = "hi") SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". diff --git a/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql index a2b4e7fae..8bacdb033 100644 --- a/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Hindi/verbs/query_verbs.sparql @@ -4,7 +4,7 @@ # Note the necessity to filter for "hi" to remove Urdu (ur) words. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql index f6ecc9a8a..def214274 100644 --- a/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Indonesian/nouns/query_nouns.sparql @@ -3,15 +3,15 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9240 ; wikibase:lexicalCategory ?nounType ; wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) } diff --git a/src/scribe_data/language_data_extraction/Indonesian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Indonesian/verbs/query_verbs.sparql index ddf0e4748..90697227b 100644 --- a/src/scribe_data/language_data_extraction/Indonesian/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Indonesian/verbs/query_verbs.sparql @@ -4,7 +4,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql index d6ae77ecc..3e2e0d5b7 100644 --- a/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Italian/nouns/query_nouns.sparql @@ -1,27 +1,21 @@ # tool: scribe-data -# All Italian (Q652) nouns, their plural and their gender. +# All Italian (Q652) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q652 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { @@ -33,7 +27,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_1.sparql index b6fde3f10..d999f67bf 100644 --- a/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_1.sparql +++ b/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_1.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_2.sparql index 75f9facfc..37eaf23e0 100644 --- a/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_2.sparql +++ b/src/scribe_data/language_data_extraction/Italian/verbs/query_verbs_2.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?impFPS ?impSPS ?impTPS ?impFPP ?impSPP ?impTPP diff --git a/src/scribe_data/language_data_extraction/Japanese/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Japanese/adjectives/query_adjectives.sparql index 5011c0dad..e52913bcd 100644 --- a/src/scribe_data/language_data_extraction/Japanese/adjectives/query_adjectives.sparql +++ b/src/scribe_data/language_data_extraction/Japanese/adjectives/query_adjectives.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?adjective WHERE { diff --git a/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql index 1a2b3ddb7..cf75aa356 100644 --- a/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Japanese/nouns/query_nouns.sparql @@ -3,12 +3,11 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q5287 ; wikibase:lexicalCategory ?nounType ; diff --git a/src/scribe_data/language_data_extraction/Japanese/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Japanese/verbs/query_verbs.sparql index 0813c6ab4..9015e9d18 100644 --- a/src/scribe_data/language_data_extraction/Japanese/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Japanese/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql index 9e5031930..e38a3b050 100644 --- a/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Kurmanji/nouns/query_nouns.sparql @@ -3,23 +3,25 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?directDefSingular ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q36163 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?directDefSingular . + FILTER(?nounType = ?nounTypes) # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Kurmanji/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Kurmanji/verbs/query_verbs.sparql index b121101b1..0a4cdaae2 100644 --- a/src/scribe_data/language_data_extraction/Kurmanji/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Kurmanji/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Latin/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Latin/nouns/query_nouns.sparql index 9897813e1..32ddb77a2 100644 --- a/src/scribe_data/language_data_extraction/Latin/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Latin/nouns/query_nouns.sparql @@ -3,23 +3,25 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q397 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?nomSingular . + FILTER(?nounType = ?nounTypes) # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Latin/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Latin/verbs/query_verbs.sparql index 92d9f0aa7..cbcd09f66 100644 --- a/src/scribe_data/language_data_extraction/Latin/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Latin/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql index 65ad15cab..2e4408c0a 100644 --- a/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Malay/nouns/query_nouns.sparql @@ -3,16 +3,16 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9237 ; wikibase:lexicalCategory ?nounType ; wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) BIND(lang(?noun) as ?language) diff --git a/src/scribe_data/language_data_extraction/Malay/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Malay/verbs/query_verbs.sparql index c95d7fb26..dc698971e 100644 --- a/src/scribe_data/language_data_extraction/Malay/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Malay/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql index 83e544f5b..394b3bd77 100644 --- a/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Malayalam/nouns/query_nouns.sparql @@ -1,45 +1,31 @@ # tool: scribe-data -# All Malayalam (Q36236) nouns. +# All Malayalam (Q36236) nouns and their plurals in the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q36236 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . - FILTER(?nounType = ?nounTypes) - - # Values to be filtered out when selecting nominative cases. - # VALUES ?nonNominativeCases { wd:Q146078 wd:Q145599 wd:Q202142 wd:Q192997 wd:Q185077 } + wikibase:lemma ?nomSingular . - # Optional selection of nominative singular forms. - # OPTIONAL { - # ?lexeme ontolex:lexicalForm ?singularForm . - # ?singularForm ontolex:representation ?singular ; - # wikibase:grammaticalFeature wd:Q110786 ; - # } . - - # Optional selection of nominative plural forms. - # OPTIONAL { - # ?lexeme ontolex:lexicalForm ?pluralForm . - # ?pluralForm ontolex:representation ?plural ; - # wikibase:grammaticalFeature wd:Q146786 ; - # } . + FILTER(?nounType = ?nounTypes) # Optional selection of genders. - # OPTIONAL { - # ?lexeme wdt:P5185 ?nounGender . - # FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } - # } . + OPTIONAL { + ?lexeme wdt:P5185 ?nounGender . + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } + } . - # SERVICE wikibase:label { - # bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". - # ?nounGender rdfs:label ?gender . - # } + SERVICE wikibase:label { + bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". + ?nounGender rdfs:label ?gender . + } } diff --git a/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql index 9b53e60fa..d46c0a7db 100644 --- a/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Malayalam/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Mandarin/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Mandarin/nouns/query_nouns.sparql index 0f84f9a4f..bb40379ba 100644 --- a/src/scribe_data/language_data_extraction/Mandarin/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Mandarin/nouns/query_nouns.sparql @@ -1,17 +1,17 @@ # tool: scribe-data -# All Mandarin Chinese (Q727694) nouns. +# All Standard Mandarin Chinese (Q727694) nouns. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q727694 ; wikibase:lexicalCategory ?nounType ; wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) } diff --git a/src/scribe_data/language_data_extraction/Mandarin/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Mandarin/verbs/query_verbs.sparql index 108a1cb46..9616ebe49 100644 --- a/src/scribe_data/language_data_extraction/Mandarin/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Mandarin/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Nynorsk/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Nynorsk/nouns/query_nouns.sparql index d42707a9d..4f0a0f443 100644 --- a/src/scribe_data/language_data_extraction/Nynorsk/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Nynorsk/nouns/query_nouns.sparql @@ -1,40 +1,55 @@ # tool: scribe-data -# All Nynorsk (Norwegian) (Q25164) nouns, their plural and their gender. +# All Nynorsk Norwegian (Q25164) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. # Note that this query is for Nynorsk (Q25164) rather than Bokmål (Q25167). SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?indefSingular + ?defSingular + ?indefPlural + ?defPlural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q25164 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?indefSingular . - # Optional selection of singular forms. + FILTER(?nounType = ?nounTypes) + + # Optional selection of definite singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; + ?lexeme ontolex:lexicalForm ? ?defSingularForm . + ?defSingularForm ontolex:representation ?defSingular ; wikibase:grammaticalFeature wd:Q110786 ; + wikibase:grammaticalFeature wd:Q53997851 ; + } . + + # Optional selection of indefinite plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?indefPluralForm . + ?indefPluralForm ontolex:representation ?indefPlural ; + wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q53997857 ; } . - # Optional selection of plural forms. + # Optional selection of definite plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; + ?lexeme ontolex:lexicalForm ?defPluralForm . + ?defPluralForm ontolex:representation ?defPlural ; wikibase:grammaticalFeature wd:Q146786 ; + wikibase:grammaticalFeature wd:Q53997851 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Nynorsk/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Nynorsk/verbs/query_verbs.sparql index 9e45e8f3a..3589b65e4 100644 --- a/src/scribe_data/language_data_extraction/Nynorsk/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Nynorsk/verbs/query_verbs.sparql @@ -4,7 +4,7 @@ # Note that this query is for Nynorsk (Q25164) rather than Bokmål (Q25167). SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql index fb895b24b..453b4ce3d 100644 --- a/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Polish/nouns/query_nouns.sparql @@ -1,41 +1,43 @@ # tool: scribe-data -# All Polish (Q809) nouns, their plural and their gender. +# All Polish (Q809) nouns, their plurals and their genders in the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q809 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . - # Optional selection of singular forms. + FILTER(?nounType = ?nounTypes) + + # Optional selection of nominative singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; + ?lexeme ontolex:lexicalForm ?nomSingularForm . + ?nomSingularForm ontolex:representation ?nomSingular ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q110786 ; } . - # Optional selection of plural forms. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Polish/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Polish/verbs/query_verbs.sparql index fcdc94e49..e418902cc 100644 --- a/src/scribe_data/language_data_extraction/Polish/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Polish/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql index 1dfbff9e0..b516f4249 100644 --- a/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Portuguese/nouns/query_nouns.sparql @@ -1,27 +1,21 @@ # tool: scribe-data -# All Portuguese (Q5146) nouns, their plural and their gender. +# All Portuguese (Q5146) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q5146 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . + wikibase:lemma ?singular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) # Optional selection of plural forms. OPTIONAL { @@ -33,7 +27,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql index 64acef6e3..29cbd9145 100644 --- a/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Portuguese/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql index 92849510b..baefb4c61 100644 --- a/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Russian/nouns/query_nouns.sparql @@ -1,41 +1,43 @@ # tool: scribe-data -# All Russian (Q7737) nouns, their plural and their gender. +# All Russian (Q7737) nouns, their plurals and their genders in the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q7737 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . + + FILTER(?nounType = ?nounTypes) # Optional selection of nominative singular forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; + ?lexeme ontolex:lexicalForm ?nomSingularForm . + ?nomSingularForm ontolex:representation ?nomSingular ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q110786 ; } . # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Russian/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Russian/prepositions/query_prepositions.sparql index f7fc8bac7..2419c5e5b 100644 --- a/src/scribe_data/language_data_extraction/Russian/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Russian/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql index 7434f8f12..dd2996f15 100644 --- a/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Russian/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql index 6d3d5fdc6..3479c4810 100644 --- a/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Slovak/nouns/query_nouns.sparql @@ -1,39 +1,36 @@ # tool: scribe-data -# All Slovak (Q9058) nouns, their plural and their gender. +# All Slovak (Q9058) nouns, their plurals and their genders for the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular ?plural ?gender + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural + ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9058 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . + wikibase:lemma ?nomSingular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - wikibase:grammaticalFeature wd:Q131105 ; - } . + FILTER(?nounType = ?nounTypes) - # Optional selection of plural forms. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; - wikibase:grammaticalFeature wd:Q146786 ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Slovak/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Slovak/prepositions/query_prepositions.sparql index 3ef7c1915..3d211c4c5 100644 --- a/src/scribe_data/language_data_extraction/Slovak/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Slovak/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case WHERE { diff --git a/src/scribe_data/language_data_extraction/Slovak/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Slovak/verbs/query_verbs.sparql index 6ec7a6489..b1772a5eb 100644 --- a/src/scribe_data/language_data_extraction/Slovak/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Slovak/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql index 14f52cbef..f9cd07ad2 100644 --- a/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Spanish/nouns/query_nouns.sparql @@ -1,29 +1,23 @@ # tool: scribe-data -# All Spanish (Q1321) nouns, their plural and their gender. +# All Spanish (Q1321) nouns, their plurals and their genders. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?singular ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q1321 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?singular . - # Optional selection of nominative singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) - # Optional selection of nominative plural forms. + # Optional selection of plural forms. OPTIONAL { ?lexeme ontolex:lexicalForm ?pluralForm . ?pluralForm ontolex:representation ?plural ; @@ -33,7 +27,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_1.sparql b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_1.sparql index 3d936623e..76005751f 100644 --- a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_1.sparql +++ b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_1.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?presFPS ?presSPS ?presTPS ?presFPP ?presSPP ?presTPP diff --git a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_2.sparql b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_2.sparql index d6388a13b..34e1feb35 100644 --- a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_2.sparql +++ b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_2.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?pretFPS ?pretSPS ?pretTPS ?pretFPP ?pretSPP ?pretTPP diff --git a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_3.sparql b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_3.sparql index 5a2cec687..d83de91de 100644 --- a/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_3.sparql +++ b/src/scribe_data/language_data_extraction/Spanish/verbs/query_verbs_3.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive ?impFPS ?impSPS ?impTPS ?impFPP ?impSPP ?impTPP diff --git a/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql index d6a1fe4b3..8ac257f9a 100644 --- a/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Swedish/nouns/query_nouns.sparql @@ -2,47 +2,52 @@ # All Swedish (Q9027) nouns with their plural, gender and genitive forms. # Enter this query at https://query.wikidata.org/. -# Note: does not include pronouns as the query wasn't running. - SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?nominativeSingular ?nominativePlural - ?genitiveSingular ?genitivePlural ?gender + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomIndefSingular + ?nomIndefPlural + ?genIndefSingular + ?genIndefPlural + ?nomDefSingular + ?nomDefPlural + ?genDefSingular + ?genDefPlural + ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9027 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType . + + FILTER(?nounType = ?nounTypes) # Indefinite forms in both nominative and genitive. OPTIONAL { # Nominative Singular - ?lexeme ontolex:lexicalForm ?nominativeSingularForm . - ?nominativeSingularForm ontolex:representation ?nominativeSingular ; + ?lexeme ontolex:lexicalForm ?nomIndefSingularForm . + ?nomIndefSingularForm ontolex:representation ?nomIndefSingular ; wikibase:grammaticalFeature wd:Q53997857 ; wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q110786 . # Nominative Plural - ?lexeme ontolex:lexicalForm ?nominativePluralForm . - ?nominativePluralForm ontolex:representation ?nominativePlural ; + ?lexeme ontolex:lexicalForm ?nomIndefPluralForm . + ?nomIndefPluralForm ontolex:representation ?nomIndefPlural ; wikibase:grammaticalFeature wd:Q53997857 ; wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q146786 . # Genitive Singular - ?lexeme ontolex:lexicalForm ?genitiveSingularForm . - ?genitiveSingularForm ontolex:representation ?genitiveSingular ; + ?lexeme ontolex:lexicalForm ?genIndefSingularForm . + ?genIndefSingularForm ontolex:representation ?genIndefSingular ; wikibase:grammaticalFeature wd:Q53997857 ; wikibase:grammaticalFeature wd:Q146233 ; wikibase:grammaticalFeature wd:Q110786 . # Genitive Plural - ?lexeme ontolex:lexicalForm ?genitivePluralForm . - ?genitivePluralForm ontolex:representation ?genitivePlural ; + ?lexeme ontolex:lexicalForm ?genIndefPluralForm . + ?genIndefPluralForm ontolex:representation ?genIndefPlural ; wikibase:grammaticalFeature wd:Q53997857 ; wikibase:grammaticalFeature wd:Q146233 ; wikibase:grammaticalFeature wd:Q146786 . @@ -51,29 +56,29 @@ WHERE { # Definite forms in both nominative and genitive. OPTIONAL { # Nominative Singular - ?lexeme ontolex:lexicalForm ?nominativeSingularForm . - ?nominativeSingularForm ontolex:representation ?Q53997851 ; + ?lexeme ontolex:lexicalForm ?nomDefSingularForm . + ?nomDefSingularForm ontolex:representation ?nomDefSingular ; wikibase:grammaticalFeature wd:Q53997851 ; wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q110786 . # Nominative Plural - ?lexeme ontolex:lexicalForm ?nominativePluralForm . - ?nominativePluralForm ontolex:representation ?nominativePlural ; + ?lexeme ontolex:lexicalForm ?nomDefPluralForm . + ?nomDefPluralForm ontolex:representation ?nomDefPlural ; wikibase:grammaticalFeature wd:Q53997851 ; wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q146786 . # Genitive Singular - ?lexeme ontolex:lexicalForm ?genitiveSingularForm . - ?genitiveSingularForm ontolex:representation ?genitiveSingular ; + ?lexeme ontolex:lexicalForm ?genDefSingularForm . + ?genDefSingularForm ontolex:representation ?genDefSingular ; wikibase:grammaticalFeature wd:Q53997851 ; wikibase:grammaticalFeature wd:Q146233 ; wikibase:grammaticalFeature wd:Q110786 . # Genitive Plural - ?lexeme ontolex:lexicalForm ?genitivePluralForm . - ?genitivePluralForm ontolex:representation ?genitivePlural ; + ?lexeme ontolex:lexicalForm ?genDefPluralForm . + ?genDefPluralForm ontolex:representation ?genDefPlural ; wikibase:grammaticalFeature wd:Q53997851 ; wikibase:grammaticalFeature wd:Q146233 ; wikibase:grammaticalFeature wd:Q146786 . @@ -82,7 +87,9 @@ WHERE { # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Swedish/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Swedish/verbs/query_verbs.sparql index 56d28b268..7c2c4abdf 100644 --- a/src/scribe_data/language_data_extraction/Swedish/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Swedish/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?activeInfinitive ?imperative ?activeSupine ?activePresent ?activePreterite ?passiveInfinitive ?passiveSupine diff --git a/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql index d628b13e4..db374bbc5 100644 --- a/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Tajik/nouns/query_nouns.sparql @@ -3,15 +3,15 @@ # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?noun WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q9260 ; wikibase:lexicalCategory ?nounType ; wikibase:lemma ?noun . + FILTER(?nounType = ?nounTypes) } diff --git a/src/scribe_data/language_data_extraction/Tajik/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Tajik/verbs/query_verbs.sparql index 88ec9450e..9eba94012 100644 --- a/src/scribe_data/language_data_extraction/Tajik/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Tajik/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql index fd1f75f3a..404408ead 100644 --- a/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Tamil/nouns/query_nouns.sparql @@ -1,17 +1,33 @@ # tool: scribe-data -# All Tamil (Q5885) nouns. +# All Tamil (Q5885) nouns and their plurals for the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q5885 ; - wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lexicalCategory ?nounType . + FILTER(?nounType = ?nounTypes) + + # Optional selection of nominative singular forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?nomSingularForm . + ?nomSingularForm ontolex:representation ?nomSingular ; + wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q110786 ; + } . + + # Optional selection of nominative plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; + wikibase:grammaticalFeature wd:Q131105 ; + wikibase:grammaticalFeature wd:Q146786 ; + } . } diff --git a/src/scribe_data/language_data_extraction/Tamil/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Tamil/verbs/query_verbs.sparql index be85cea3a..99f41782d 100644 --- a/src/scribe_data/language_data_extraction/Tamil/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Tamil/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/language_data_extraction/Ukranian/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Ukranian/nouns/query_nouns.sparql index 630d19fac..ebefab09d 100644 --- a/src/scribe_data/language_data_extraction/Ukranian/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Ukranian/nouns/query_nouns.sparql @@ -1,39 +1,36 @@ # tool: scribe-data -# All Ukrainian (Q8798) nouns, their plural and their gender. +# All Ukrainian (Q8798) nouns, their plurals and their genders for the given cases. # Enter this query at https://query.wikidata.org/. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?singular - ?plural + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?nomSingular + ?nomPlural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q8798 ; - wikibase:lexicalCategory ?noun . - FILTER(?noun = ?nounTypes) + wikibase:lexicalCategory ?nounType ; + wikibase:lemma ?nomSingular . - # Optional selection of singular forms. - OPTIONAL { - ?lexeme ontolex:lexicalForm ?singularForm . - ?singularForm ontolex:representation ?singular ; - wikibase:grammaticalFeature wd:Q110786 ; - } . + FILTER(?nounType = ?nounTypes) - # Optional selection of plural forms. + # Optional selection of nominative plural forms. OPTIONAL { - ?lexeme ontolex:lexicalForm ?pluralForm . - ?pluralForm ontolex:representation ?plural ; + ?lexeme ontolex:lexicalForm ?nomPluralForm . + ?nomPluralForm ontolex:representation ?nomPlural ; + wikibase:grammaticalFeature wd:Q131105 ; wikibase:grammaticalFeature wd:Q146786 ; } . # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . SERVICE wikibase:label { diff --git a/src/scribe_data/language_data_extraction/Ukranian/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Ukranian/prepositions/query_prepositions.sparql index f3c76cdba..865045347 100644 --- a/src/scribe_data/language_data_extraction/Ukranian/prepositions/query_prepositions.sparql +++ b/src/scribe_data/language_data_extraction/Ukranian/prepositions/query_prepositions.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?preposition ?case diff --git a/src/scribe_data/language_data_extraction/Ukranian/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Ukranian/verbs/query_verbs.sparql index 1dd46ac10..5a3cf8b45 100644 --- a/src/scribe_data/language_data_extraction/Ukranian/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Ukranian/verbs/query_verbs.sparql @@ -3,7 +3,7 @@ # Enter this query at https://query.wikidata.org/. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?infinitive WHERE { diff --git a/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql index 90ab8e0b5..9fd410f3b 100644 --- a/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql +++ b/src/scribe_data/language_data_extraction/Urdu/nouns/query_nouns.sparql @@ -1,30 +1,42 @@ # tool: scribe-data -# All Hindustani (Q11051) nouns and their gender. +# All Urdu (from Hindustani Q11051) nouns and their gender. # Enter this query at https://query.wikidata.org/. -# Note the necessity to filter for "ur" to remove Hindustani (hi) words. +# Note the necessity to filter for "ur" to remove Hindi (hi) words. SELECT DISTINCT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) - ?noun + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) + ?singular + ?plural ?gender WHERE { - # Nouns and pronouns. - VALUES ?nounTypes { wd:Q1084 wd:Q147276 } + VALUES ?nounTypes {wd:Q1084 wd:Q147276} # Nouns and pronouns ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q11051 ; wikibase:lexicalCategory ?nounType ; - wikibase:lemma ?noun . + wikibase:lemma ?singular . + FILTER(?nounType = ?nounTypes) + # Optional selection of plural forms. + OPTIONAL { + ?lexeme ontolex:lexicalForm ?pluralForm . + ?pluralForm ontolex:representation ?plural ; + wikibase:grammaticalFeature wd:Q146786 ; + } . + # Optional selection of genders. OPTIONAL { ?lexeme wdt:P5185 ?nounGender . - FILTER NOT EXISTS { ?lexeme wdt:P31 wd:Q202444 . } + FILTER NOT EXISTS { + ?lexeme wdt:P31 wd:Q202444 . + } } . - BIND(lang(?noun) as ?language) - FILTER(?language = "ur") + BIND(lang(?singular) as ?langSingular) + BIND(lang(?plural) as ?langPlural) + FILTER(?langSingular = "ur") + FILTER(?langPlural = "ur") SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". diff --git a/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql index dfe01b5f2..1e3ffdf45 100644 --- a/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql +++ b/src/scribe_data/language_data_extraction/Urdu/verbs/query_verbs.sparql @@ -4,7 +4,7 @@ # Note the necessity to filter for "ur" to remove Hindustani (hi) words. SELECT - (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") as ?lexemeID) + (REPLACE(STR(?lexeme), "http://www.wikidata.org/entity/", "") AS ?lexemeID) ?verb WHERE { diff --git a/src/scribe_data/wikidata/update_data.py b/src/scribe_data/wikidata/update_data.py index 29b7fe726..d1cbd07b3 100644 --- a/src/scribe_data/wikidata/update_data.py +++ b/src/scribe_data/wikidata/update_data.py @@ -30,12 +30,12 @@ ) from scribe_data.wikidata.query_total_nouns import query_total_nouns -total_nouns = query_total_nouns() -batch_size = 10 -num_iterations = math.ceil(total_nouns / batch_size) +# total_nouns = query_total_nouns() +# batch_size = 10 +# num_iterations = math.ceil(total_nouns / batch_size) SCRIBE_DATA_SRC_PATH = "src/scribe_data" -PATH_TO_LANGUAGE_EXTRACTION_FILES = f"{SCRIBE_DATA_SRC_PATH}/language_data/extraction" +PATH_TO_LANGUAGE_EXTRACTION_FILES = f"{SCRIBE_DATA_SRC_PATH}/language_data_extraction" PATH_TO_UPDATE_FILES = f"{SCRIBE_DATA_SRC_PATH}/load/update_files" # Set SPARQLWrapper query conditions. @@ -62,39 +62,36 @@ word_types_update = [] word_types_update = current_word_types if word_types is None else word_types -# Derive Data directory elements for potential queries. -languages_dir_files = [] + +# Derive directory files and language subdirectories for potential queries. +language_data_extraction_files = [] for path, _, files in os.walk(PATH_TO_LANGUAGE_EXTRACTION_FILES): - languages_dir_files.extend(os.path.join(path, name) for name in files) - -language_dir_files = list( - { - f.split(PATH_TO_LANGUAGE_EXTRACTION_FILES + "/")[1].split("/")[0] - for f in languages_dir_files - if f.split(PATH_TO_LANGUAGE_EXTRACTION_FILES + "/")[1][0] != "_" - } -) + language_data_extraction_files.extend(os.path.join(path, name) for name in files) + +language_directories = [ + d + for d in os.listdir(PATH_TO_LANGUAGE_EXTRACTION_FILES) + if os.path.isdir(f"{PATH_TO_LANGUAGE_EXTRACTION_FILES}/{d}") +] -# Data paths to run scripts and format outputs. -# Check to see if the language has all zeroes for its data, meaning it's been added. +# Check to see if the language has all zeroes for its data, meaning it's new. new_language_list = [] for lang in languages_update: - # Prepositions not needed for all languages. check_current_data = [current_data[lang][k] for k in current_data[lang].keys()] if len(set(check_current_data)) == 1 and check_current_data[0] == 0: new_language_list.append(lang) # Derive queries to be ran. possible_queries = [] -for d in language_dir_files: +for d in language_directories: possible_queries.extend( f"{PATH_TO_LANGUAGE_EXTRACTION_FILES}/{d}/{target_type}" for target_type in word_types_update if f"{PATH_TO_LANGUAGE_EXTRACTION_FILES}/{d}/{target_type}" in [ e[: len(f"{PATH_TO_LANGUAGE_EXTRACTION_FILES}/{d}/{target_type}")] - for e in languages_dir_files + for e in language_data_extraction_files ] )