scribe-org · andrewtavis · Jun 22, 2024 · Jun 4, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/.github/workflows/pr_maintainer_checklist.yaml b/.github/workflows/pr_maintainer_checklist.yaml
@@ -32,4 +32,6 @@ jobs:
               - The contributor's name and icon in remote commits should be the same as what appears in the PR
               - If there's a mismatch, the contributor needs to make sure that the [email they use for GitHub](https://github.com/settings/emails) matches what they have for `git config user.email` in their local Scribe-Data repo
 
+            - [ ] The linting and formatting workflow within the [PR checks](https://github.com/scribe-org/Scribe-Data/pull/${{ github.event.pull_request.number }}/checks) do not indicate new errors in the files changed
+
             - [ ] The [CHANGELOG](https://github.com/scribe-org/Scribe-Data/blob/main/CHANGELOG.md) has been updated with a description of the changes for the upcoming release and the corresponding issue (if necessary)
diff --git a/setup.py b/setup.py
@@ -47,6 +47,11 @@
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/scribe-org/Scribe-Data",
+    entry_points={
+        "console_scripts": [
+            "scribe-data=scribe_data.cli.main:main",
+        ],
+    }
 )
 
 if __name__ == "__main__":

diff --git a/src/scribe_data/cli/__init__.py b/src/scribe_data/cli/__init__.py
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
@@ -0,0 +1,125 @@
+"""
+Utility functions for the Scribe-Data CLI.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Union
+
+LANGUAGE_METADATA_FILE = (
+    Path(__file__).parent.parent / "resources" / "language_metadata.json"
+)
+WORD_TYPE_METADATA_FILE = (
+    Path(__file__).parent.parent / "resources" / "word_type_metadata.json"
+)
+DATA_DIR = Path("scribe_data_json_export")
+
+with LANGUAGE_METADATA_FILE.open("r", encoding="utf-8") as file:
+    language_metadata = json.load(file)
+
+with WORD_TYPE_METADATA_FILE.open("r", encoding="utf-8") as file:
+    word_type_metadata = json.load(file)
+
+language_map = {
+    lang["language"].lower(): lang for lang in language_metadata["languages"]
+}
+
+
+def correct_word_type(word_type: str) -> str:
+    """
+    Corrects common versions of word type arguments so users can choose between them.
+
+    Parameters
+    ----------
+        word_type : str
+            The word type to potentially correct.
+
+    Returns
+    -------
+        The word_type value or a corrected version of it.
+    """
+    all_word_types = word_type_metadata["word-types"]
+
+    if word_type in all_word_types:
+        return word_type
+
+    for wt in all_word_types:
+        if f"{word_type}s" == wt:
+            return wt
+
+
+def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    """
+    Prints a formatted output from the Scribe-Data CLI.
+    """
+    if not data:
+        print(f"No data available for word type '{word_type}'.")
+        return
+
+    max_key_length = max((len(key) for key in data.keys()), default=0)
+
+    if word_type == "autosuggestions":
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {', '.join(value)}")
+
+    elif word_type == "emoji_keywords":
+        for key, value in data.items():
+            emojis = [item["emoji"] for item in value]
+            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
+
+    elif word_type in {"prepositions", "translations"}:
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {value}")
+
+    elif isinstance(data, dict):
+        for key, value in data.items():
+            if isinstance(value, dict):
+                print(f"{key:<{max_key_length}} : ")
+                max_sub_key_length = max(
+                    (len(sub_key) for sub_key in value.keys()), default=0
+                )
+                for sub_key, sub_value in value.items():
+                    print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+
+            elif isinstance(value, list):
+                print(f"{key:<{max_key_length}} : ")
+                for item in value:
+                    if isinstance(item, dict):
+                        for sub_key, sub_value in item.items():
+                            print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+
+                    else:
+                        print(f"  {item}")
+
+            else:
+                print(f"{key:<{max_key_length}} : {value}")
+
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item, dict):
+                for key, value in item.items():
+                    print(f"{key} : {value}")
+
+            else:
+                print(item)
+
+    else:
+        print(data)
diff --git a/src/scribe_data/cli.py → src/scribe_data/cli/convert.py b/src/scribe_data/cli.py → src/scribe_data/cli/convert.py
@@ -1,5 +1,5 @@
 """
-Setup and commands for the Scribe-Data command line interface.
+Functions to convert data returned from the Scribe-Data CLI to other file types.
 
 .. raw:: html
     <!--

diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
@@ -0,0 +1,187 @@
+"""
+Functions for listing languages and word types for the Scribe-Data CLI.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import json
+from pathlib import Path
+
+from .cli_utils import correct_word_type
+
+# Load language metadata from JSON file.
+METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_metadata.json"
+LANGUAGE_DATA_EXTRACTION_DIR = Path(__file__).parent.parent / "language_data_extraction"
+
+with METADATA_FILE.open("r", encoding="utf-8") as file:
+    language_metadata = json.load(file)
+
+language_map = {
+    lang["language"].lower(): lang for lang in language_metadata["languages"]
+}
+
+
+def list_languages() -> None:
+    """
+    Generates a table of languages, their ISO-2 codes and their Wikidata QIDs.
+    """
+    languages = list(language_metadata["languages"])
+    languages.sort(key=lambda x: x["language"])
+
+    language_col_width = max(len(lang["language"]) for lang in languages) + 2
+    iso_col_width = max(len(lang["iso"]) for lang in languages) + 2
+    qid_col_width = max(len(lang["qid"]) for lang in languages) + 2
+
+    table_line_length = language_col_width + iso_col_width + qid_col_width
+
+    print()
+    print(
+        f"{'Language':<{language_col_width}} {'ISO':<{iso_col_width}} {'QID':<{qid_col_width}}"
+    )
+    print("-" * table_line_length)
+
+    for lang in languages:
+        print(
+            f"{lang['language'].capitalize():<{language_col_width}} {lang['iso']:<{iso_col_width}} {lang['qid']:<{qid_col_width}}"
+        )
+
+    print("-" * table_line_length)
+    print()
+
+
+def list_word_types(language: str = None) -> None:
+    """
+    Lists all word types or those available for a given language.
+
+    Parameters
+    ----------
+        language : str
+            The language to potentially list word types for.
+    """
+    if language:
+        language_data = language_map.get(language.lower())
+        language_capitalized = language.capitalize()
+        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language_capitalized
+
+        if not language_data:
+            raise ValueError(f"Language '{language}' is not recognized.")
+
+        word_types = [f.name for f in language_dir.iterdir() if f.is_dir()]
+        if not word_types:
+            raise ValueError(
+                f"No word types available for language '{language_capitalized}'."
+            )
+
+        table_header = f"Available word types: {language_capitalized}"
+
+    else:
+        word_types = set()
+        for lang in language_metadata["languages"]:
+            language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
+            if language_dir.is_dir():
+                word_types.update(f.name for f in language_dir.iterdir() if f.is_dir())
+
+        table_header = "Available word types: All languages"
+
+    table_line_length = max(len(table_header), max(len(wt) for wt in word_types))
+
+    print()
+    print(table_header)
+    print("-" * table_line_length)
+
+    word_types = sorted(word_types)
+    for wt in word_types:
+        print(wt)
+
+    print("-" * table_line_length)
+    print()
+
+
+def list_all() -> None:
+    """
+    Lists all available languages and word types.
+    """
+    list_languages()
+    list_word_types()
+
+
+def list_languages_for_word_type(word_type: str) -> None:
+    """
+    Lists the available languages for a given word type.
+
+    Parameters
+    ----------
+        word_type : str
+            The word type to check for.
+    """
+    word_type = correct_word_type(word_type)
+    available_languages = []
+    for lang in language_metadata["languages"]:
+        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
+        if language_dir.is_dir():
+            wt_path = language_dir / word_type
+            if wt_path.exists():
+                available_languages.append(lang["language"])
+
+    available_languages.sort()
+    table_header = f"Available languages: {word_type}"
+    table_line_length = max(
+        len(table_header), max(len(lang) for lang in available_languages)
+    )
+
+    print()
+    print(table_header)
+    print("-" * table_line_length)
+
+    for lang in available_languages:
+        print(f"{lang.capitalize()}")
+
+    print("-" * table_line_length)
+    print()
+
+
+def list_wrapper(language: str = None, word_type: str = None) -> None:
+    """
+    Conditionally provides the full functionality of the list command.
+
+    Parameters
+    ----------
+        language : str
+            The language to potentially list word types for.
+
+        word_type : str
+            The word type to check for.
+    """
+    if not language and not word_type:
+        list_all()
+
+    elif language is True and not word_type:
+        list_languages()
+
+    elif not language and word_type is True:
+        list_word_types()
+
+    elif language is True and word_type is True:
+        print("Please specify either a language or a word type.")
+
+    elif word_type is not None:
+        list_languages_for_word_type(word_type)
+
+    elif language is not None:
+        list_word_types(language)