From 179184b933f7c2ea8f295146c29b7304febcaff4 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Tue, 4 Jun 2024 08:19:25 +0600
Subject: [PATCH 01/18] add script for all language list

---
 src/scribe_data/cli.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
index 0b2872598..60385354d 100644
--- a/src/scribe_data/cli.py
+++ b/src/scribe_data/cli.py
@@ -1,3 +1,30 @@
 """
 Setup and commands for the Scribe-Data command line interface.
 """
+
+import os
+import argparse
+
+def list_languages():
+    languages = [lang for lang in os.listdir('language_data_export') if os.path.isdir(f"language_data_export/{lang}")]
+    print("Available languages:")
+    for lang in languages:
+        print(f"- {lang}")
+        word_types = [wt.replace('.json', '') for wt in os.listdir(f"language_data_export/{lang}") if wt.endswith('.json')]
+        max_word_type_length = max(len(wt) for wt in word_types)
+        for wt in word_types:
+            print(f"  - {wt:<{max_word_type_length}}")
+        print("")  
+
+def main():
+    parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
+    parser.add_argument('--list-languages', '-ll', action='store_true', help='List available language codes and word types')
+    args = parser.parse_args()
+
+    if args.list_languages:
+        list_languages()
+    else:
+        parser.print_help()
+
+if __name__ == '__main__':
+    main()

From 7c4dde3b1efbdef2784d2ee4d086e3cb0de84d2f Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Wed, 5 Jun 2024 10:16:43 +0600
Subject: [PATCH 02/18] add query word

---
 src/scribe_data/cli.py | 79 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 70 insertions(+), 9 deletions(-)

diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
index 60385354d..0da163178 100644
--- a/src/scribe_data/cli.py
+++ b/src/scribe_data/cli.py
@@ -2,27 +2,88 @@
 Setup and commands for the Scribe-Data command line interface.
 """
 
-import os
 import argparse
+import json
+from pathlib import Path
+from typing import Dict, List, Union
 
-def list_languages():
-    languages = [lang for lang in os.listdir('language_data_export') if os.path.isdir(f"language_data_export/{lang}")]
+DATA_DIR = Path('language_data_export')
+
+def list_languages() -> None:
+    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
+        print(f"Directory '{DATA_DIR}' does not exist.")
+        return
+
+    languages = [lang for lang in DATA_DIR.iterdir() if lang.is_dir()]
     print("Available languages:")
     for lang in languages:
-        print(f"- {lang}")
-        word_types = [wt.replace('.json', '') for wt in os.listdir(f"language_data_export/{lang}") if wt.endswith('.json')]
+        print(f"- {lang.name}")
+        word_types = [wt.stem for wt in lang.glob('*.json')]
         max_word_type_length = max(len(wt) for wt in word_types)
         for wt in word_types:
             print(f"  - {wt:<{max_word_type_length}}")
-        print("")  
+        print("")
+
+def print_formatted_data(data: Union[Dict, List]) -> None:
+    if isinstance(data, dict):
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            if isinstance(value, dict):
+                print(f"{key:<{max_key_length}} : ")
+                max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
+                for sub_key, sub_value in value.items():
+                    print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+            else:
+                print(f"{key:<{max_key_length}} : {value}")
+    elif isinstance(data, list):
+        max_key_length = max(len(key) for item in data for key in item.keys())
+        for item in data:
+            if isinstance(item, dict):
+                for key, value in item.items():
+                    print(f"{key:<{max_key_length}} : ")
+                    if isinstance(value, dict):
+                        max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
+                        for sub_key, sub_value in value.items():
+                            print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+                    else:
+                        print(f"  {value}")
+            else:
+                print(json.dumps(item, indent=2))
+    else:
+        print(data)
+
+def query_data(language: str, word_type: str) -> None:
+    data_file = DATA_DIR / language / f"{word_type}.json"
+    if not data_file.exists():
+        print(f"No data found for language '{language}' and word type '{word_type}'.")
+        return
 
-def main():
+    try:
+        with data_file.open('r') as file:
+            data = json.load(file)
+    except (IOError, json.JSONDecodeError) as e:
+        print(f"Error reading '{data_file}': {e}")
+        return
+
+    print(f"Data for language '{language}' and word type '{word_type}':")
+    print_formatted_data(data)
+
+def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
-    parser.add_argument('--list-languages', '-ll', action='store_true', help='List available language codes and word types')
+    subparsers = parser.add_subparsers(dest='command')
+
+    subparsers.add_parser('list-languages', help='List available language codes and word types')
+
+    query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
+    query_parser.add_argument('-l', '--language', required=True, help='Language code')
+    query_parser.add_argument('-wt', '--word-type', required=True, help='Word type')
+
     args = parser.parse_args()
 
-    if args.list_languages:
+    if args.command == 'list-languages':
         list_languages()
+    elif args.command == 'query':
+        query_data(args.language, args.word_type)
     else:
         parser.print_help()
 

From 4731c6da3527a02a8990901641287bef8f33cf46 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Thu, 6 Jun 2024 08:02:01 +0600
Subject: [PATCH 03/18] update the commands

---
 setup.py               |   5 ++
 src/scribe_data/cli.py | 122 ++++++++++++++++++++++++++++-------------
 2 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/setup.py b/setup.py
index ba2fbdd76..948e87112 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,11 @@
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/scribe-org/Scribe-Data",
+    entry_points={
+        "console_scripts": [
+            "scribe-data=scribe_data.cli:main",
+        ],
+    }
 )
 
 if __name__ == "__main__":
diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
index 05e85f756..277cca12e 100644
--- a/src/scribe_data/cli.py
+++ b/src/scribe_data/cli.py
@@ -20,11 +20,20 @@
     -->
 """
 
+#!/usr/bin/env python3
+
+import sys
+import os
 import argparse
 import json
 from pathlib import Path
 from typing import Dict, List, Union
 
+# Add the parent directory of 'src' to sys.path
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.append(parent_dir)
+
 DATA_DIR = Path('language_data_export')
 
 def list_languages() -> None:
@@ -32,43 +41,69 @@ def list_languages() -> None:
         print(f"Directory '{DATA_DIR}' does not exist.")
         return
 
-    languages = [lang for lang in DATA_DIR.iterdir() if lang.is_dir()]
+    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
     print("Available languages:")
     for lang in languages:
-        print(f"- {lang.name}")
-        word_types = [wt.stem for wt in lang.glob('*.json')]
-        max_word_type_length = max(len(wt) for wt in word_types)
-        for wt in word_types:
-            print(f"  - {wt:<{max_word_type_length}}")
-        print("")
-
-def print_formatted_data(data: Union[Dict, List]) -> None:
-    if isinstance(data, dict):
+        print(f"- {lang}")
+
+def list_word_types(language: str) -> None:
+    language_dir = DATA_DIR / language
+    if not language_dir.exists() or not language_dir.is_dir():
+        print(f"No data found for language '{language}'.")
+        return
+
+    word_types = [wt.stem for wt in language_dir.glob('*.json')]
+    if not word_types:
+        print(f"No word types available for language '{language}'.")
+        return
+
+    max_word_type_length = max(len(wt) for wt in word_types)
+    print(f"Word types for language '{language}':")
+    for wt in word_types:
+        print(f"  - {wt:<{max_word_type_length}}")
+
+def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    if word_type == 'autosuggestions':
         max_key_length = max(len(key) for key in data.keys())
         for key, value in data.items():
-            if isinstance(value, dict):
-                print(f"{key:<{max_key_length}} : ")
-                max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
-                for sub_key, sub_value in value.items():
-                    print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-            else:
-                print(f"{key:<{max_key_length}} : {value}")
-    elif isinstance(data, list):
-        max_key_length = max(len(key) for item in data for key in item.keys())
-        for item in data:
-            if isinstance(item, dict):
-                for key, value in item.items():
-                    print(f"{key:<{max_key_length}} : ")
-                    if isinstance(value, dict):
-                        max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
-                        for sub_key, sub_value in value.items():
-                            print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-                    else:
-                        print(f"  {value}")
-            else:
-                print(json.dumps(item, indent=2))
+            print(f"{key:<{max_key_length}} : {', '.join(value)}")
+    elif word_type == 'emoji_keywords':
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            emojis = [item['emoji'] for item in value]
+            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
+    elif word_type == 'prepositions' or word_type == 'translations':
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {value}")
     else:
-        print(data)
+        if isinstance(data, dict):
+            max_key_length = max(len(key) for key in data.keys())
+            for key, value in data.items():
+                if isinstance(value, dict):
+                    print(f"{key:<{max_key_length}} : ")
+                    max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
+                    for sub_key, sub_value in value.items():
+                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+                elif isinstance(value, list):
+                    print(f"{key:<{max_key_length}} : ")
+                    for item in value:
+                        if isinstance(item, dict):
+                            for sub_key, sub_value in item.items():
+                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+                        else:
+                            print(f"  {item}")
+                else:
+                    print(f"{key:<{max_key_length}} : {value}")
+        elif isinstance(data, list):
+            for item in data:
+                if isinstance(item, dict):
+                    for key, value in item.items():
+                        print(f"{key} : {value}")
+                else:
+                    print(item)
+        else:
+            print(data)
 
 def query_data(language: str, word_type: str) -> None:
     data_file = DATA_DIR / language / f"{word_type}.json"
@@ -84,22 +119,35 @@ def query_data(language: str, word_type: str) -> None:
         return
 
     print(f"Data for language '{language}' and word type '{word_type}':")
-    print_formatted_data(data)
+    print_formatted_data(data, word_type)
+
+    if word_type.lower() == 'nouns':
+        print("\nLegend:")
+        print("PL    : Plural")
+        print("empty : Singular\n")
 
 def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
-    subparsers = parser.add_subparsers(dest='command')
-
-    subparsers.add_parser('list-languages', help='List available language codes and word types')
+    subparsers = parser.add_subparsers(dest='command', required=True)
 
+    # Define the 'list-languages' subcommand
+    list_languages_parser = subparsers.add_parser('languages-list', aliases=['ll'], help='List available languages')
+    
+    # Define the 'list-word-types' subcommand
+    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types for a specific language')
+    list_word_types_parser.add_argument('-l', '--language', required=True, help='Language code')
+    
+    # Define the 'query' subcommand
     query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
     query_parser.add_argument('-l', '--language', required=True, help='Language code')
     query_parser.add_argument('-wt', '--word-type', required=True, help='Word type')
 
     args = parser.parse_args()
 
-    if args.command == 'list-languages':
+    if args.command in ['languages-list', 'll']:
         list_languages()
+    elif args.command in ['list-word-types', 'lwt']:
+        list_word_types(args.language)
     elif args.command == 'query':
         query_data(args.language, args.word_type)
     else:

From fcec4e0c3168cf886166b7358e40f7191d4eed5e Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Thu, 6 Jun 2024 16:47:05 +0600
Subject: [PATCH 04/18] add language code

---
 src/scribe_data/cli.py | 44 ++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
index 277cca12e..213bd548c 100644
--- a/src/scribe_data/cli.py
+++ b/src/scribe_data/cli.py
@@ -29,13 +29,24 @@
 from pathlib import Path
 from typing import Dict, List, Union
 
-# Add the parent directory of 'src' to sys.path
 current_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(current_dir)
 sys.path.append(parent_dir)
 
 DATA_DIR = Path('language_data_export')
 
+# Mapping of possible inputs to standardized language names
+LANGUAGE_MAP = {
+    'en': 'English', 'english': 'English',
+    'fr': 'French', 'french': 'French',
+    'de': 'German', 'german': 'German',
+    'it': 'Italian', 'italian': 'Italian',
+    'pt': 'Portuguese', 'portuguese': 'Portuguese',
+    'ru': 'Russian', 'russian': 'Russian',
+    'es': 'Spanish', 'spanish': 'Spanish',
+    'sv': 'Swedish', 'swedish': 'Swedish'
+}
+
 def list_languages() -> None:
     if not DATA_DIR.exists() or not DATA_DIR.is_dir():
         print(f"Directory '{DATA_DIR}' does not exist.")
@@ -47,18 +58,24 @@ def list_languages() -> None:
         print(f"- {lang}")
 
 def list_word_types(language: str) -> None:
-    language_dir = DATA_DIR / language
+    # Normalize the input language
+    normalized_language = LANGUAGE_MAP.get(language.lower())
+    if not normalized_language:
+        print(f"Language '{language}' is not recognized.")
+        return
+
+    language_dir = DATA_DIR / normalized_language
     if not language_dir.exists() or not language_dir.is_dir():
-        print(f"No data found for language '{language}'.")
+        print(f"No data found for language '{normalized_language}'.")
         return
 
     word_types = [wt.stem for wt in language_dir.glob('*.json')]
     if not word_types:
-        print(f"No word types available for language '{language}'.")
+        print(f"No word types available for language '{normalized_language}'.")
         return
 
     max_word_type_length = max(len(wt) for wt in word_types)
-    print(f"Word types for language '{language}':")
+    print(f"Word types for language '{normalized_language}':")
     for wt in word_types:
         print(f"  - {wt:<{max_word_type_length}}")
 
@@ -106,9 +123,15 @@ def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
             print(data)
 
 def query_data(language: str, word_type: str) -> None:
-    data_file = DATA_DIR / language / f"{word_type}.json"
+    # Normalize the input language
+    normalized_language = LANGUAGE_MAP.get(language.lower())
+    if not normalized_language:
+        print(f"Language '{language}' is not recognized.")
+        return
+
+    data_file = DATA_DIR / normalized_language / f"{word_type}.json"
     if not data_file.exists():
-        print(f"No data found for language '{language}' and word type '{word_type}'.")
+        print(f"No data found for language '{normalized_language}' and word type '{word_type}'.")
         return
 
     try:
@@ -118,7 +141,7 @@ def query_data(language: str, word_type: str) -> None:
         print(f"Error reading '{data_file}': {e}")
         return
 
-    print(f"Data for language '{language}' and word type '{word_type}':")
+    print(f"Data for language '{normalized_language}' and word type '{word_type}':")
     print_formatted_data(data, word_type)
 
     if word_type.lower() == 'nouns':
@@ -130,14 +153,11 @@ def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
     subparsers = parser.add_subparsers(dest='command', required=True)
 
-    # Define the 'list-languages' subcommand
-    list_languages_parser = subparsers.add_parser('languages-list', aliases=['ll'], help='List available languages')
+    subparsers.add_parser('languages-list', aliases=['ll'], help='List available languages')
     
-    # Define the 'list-word-types' subcommand
     list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types for a specific language')
     list_word_types_parser.add_argument('-l', '--language', required=True, help='Language code')
     
-    # Define the 'query' subcommand
     query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
     query_parser.add_argument('-l', '--language', required=True, help='Language code')
     query_parser.add_argument('-wt', '--word-type', required=True, help='Word type')

From e01670372713b571afc8393219c0be3134e2a44f Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Sat, 8 Jun 2024 07:10:53 +0600
Subject: [PATCH 05/18] update as per requirement in #148

---
 src/scribe_data/cli.py | 71 ++++++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
index 213bd548c..e8cba7783 100644
--- a/src/scribe_data/cli.py
+++ b/src/scribe_data/cli.py
@@ -53,31 +53,47 @@ def list_languages() -> None:
         return
 
     languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
+    languages.sort()
     print("Available languages:")
     for lang in languages:
         print(f"- {lang}")
 
-def list_word_types(language: str) -> None:
-    # Normalize the input language
-    normalized_language = LANGUAGE_MAP.get(language.lower())
-    if not normalized_language:
-        print(f"Language '{language}' is not recognized.")
-        return
-
-    language_dir = DATA_DIR / normalized_language
-    if not language_dir.exists() or not language_dir.is_dir():
-        print(f"No data found for language '{normalized_language}'.")
-        return
+def list_word_types(language: str = None) -> None:
+    if language:
+        # Normalize the input language
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+
+        language_dir = DATA_DIR / normalized_language
+        if not language_dir.exists() or not language_dir.is_dir():
+            print(f"No data found for language '{normalized_language}'.")
+            return
+
+        word_types = [wt.stem for wt in language_dir.glob('*.json')]
+        if not word_types:
+            print(f"No word types available for language '{normalized_language}'.")
+            return
+
+        max_word_type_length = max(len(wt) for wt in word_types)
+        print(f"Word types for language '{normalized_language}':")
+        for wt in word_types:
+            print(f"  - {wt:<{max_word_type_length}}")
+    else:
+        word_types = set()
+        for lang_dir in DATA_DIR.iterdir():
+            if lang_dir.is_dir():
+                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
 
-    word_types = [wt.stem for wt in language_dir.glob('*.json')]
-    if not word_types:
-        print(f"No word types available for language '{normalized_language}'.")
-        return
+        if not word_types:
+            print("No word types available.")
+            return
 
-    max_word_type_length = max(len(wt) for wt in word_types)
-    print(f"Word types for language '{normalized_language}':")
-    for wt in word_types:
-        print(f"  - {wt:<{max_word_type_length}}")
+        word_types = sorted(word_types)
+        print("Available word types:")
+        for wt in word_types:
+            print(f"  - {wt}")
 
 def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
     if word_type == 'autosuggestions':
@@ -153,21 +169,24 @@ def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
     subparsers = parser.add_subparsers(dest='command', required=True)
 
-    subparsers.add_parser('languages-list', aliases=['ll'], help='List available languages')
-    
-    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types for a specific language')
-    list_word_types_parser.add_argument('-l', '--language', required=True, help='Language code')
-    
+    subparsers.add_parser('list-languages', aliases=['ll'], help='List available languages')
+
+    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
+    list_word_types_parser.add_argument('-l', '--language', help='Language code')
+
     query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
     query_parser.add_argument('-l', '--language', required=True, help='Language code')
     query_parser.add_argument('-wt', '--word-type', required=True, help='Word type')
 
     args = parser.parse_args()
 
-    if args.command in ['languages-list', 'll']:
+    if args.command in ['list-languages', 'll']:
         list_languages()
     elif args.command in ['list-word-types', 'lwt']:
-        list_word_types(args.language)
+        if args.language:
+            list_word_types(args.language)
+        else:
+            list_word_types()
     elif args.command == 'query':
         query_data(args.language, args.word_type)
     else:

From 6e6da98480e808587468c48163588b8773a5322e Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Wed, 12 Jun 2024 08:49:14 +0600
Subject: [PATCH 06/18] update cli file structure

---
 setup.py                         |   6 +-
 src/scribe_data/cli.py           | 196 -------------------------------
 src/scribe_data/cli/__init__.py  |   0
 src/scribe_data/cli/cli_list.py  |  52 ++++++++
 src/scribe_data/cli/cli_main.py  |  58 +++++++++
 src/scribe_data/cli/cli_query.py |  63 ++++++++++
 src/scribe_data/cli/cli_utils.py |  53 +++++++++
 7 files changed, 229 insertions(+), 199 deletions(-)
 delete mode 100644 src/scribe_data/cli.py
 create mode 100644 src/scribe_data/cli/__init__.py
 create mode 100644 src/scribe_data/cli/cli_list.py
 create mode 100644 src/scribe_data/cli/cli_main.py
 create mode 100644 src/scribe_data/cli/cli_query.py
 create mode 100644 src/scribe_data/cli/cli_utils.py

diff --git a/setup.py b/setup.py
index 9c5b1741c..854612ba4 100644
--- a/setup.py
+++ b/setup.py
@@ -49,9 +49,9 @@
     url="https://github.com/scribe-org/Scribe-Data",
     entry_points={
         "console_scripts": [
-            "scribe-data=scribe_data.cli:main",
-        ],
-    }
+            "scribe-data=scribe_data.cli.cli_main:main",
+            ],
+        }
 )
 
 if __name__ == "__main__":
diff --git a/src/scribe_data/cli.py b/src/scribe_data/cli.py
deleted file mode 100644
index e8cba7783..000000000
--- a/src/scribe_data/cli.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
-Setup and commands for the Scribe-Data command line interface.
-
-.. raw:: html
-    <!--
-    * Copyright (C) 2024 Scribe
-    *
-    * This program is free software: you can redistribute it and/or modify
-    * it under the terms of the GNU General Public License as published by
-    * the Free Software Foundation, either version 3 of the License, or
-    * (at your option) any later version.
-    *
-    * This program is distributed in the hope that it will be useful,
-    * but WITHOUT ANY WARRANTY; without even the implied warranty of
-    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    * GNU General Public License for more details.
-    *
-    * You should have received a copy of the GNU General Public License
-    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
-    -->
-"""
-
-#!/usr/bin/env python3
-
-import sys
-import os
-import argparse
-import json
-from pathlib import Path
-from typing import Dict, List, Union
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(parent_dir)
-
-DATA_DIR = Path('language_data_export')
-
-# Mapping of possible inputs to standardized language names
-LANGUAGE_MAP = {
-    'en': 'English', 'english': 'English',
-    'fr': 'French', 'french': 'French',
-    'de': 'German', 'german': 'German',
-    'it': 'Italian', 'italian': 'Italian',
-    'pt': 'Portuguese', 'portuguese': 'Portuguese',
-    'ru': 'Russian', 'russian': 'Russian',
-    'es': 'Spanish', 'spanish': 'Spanish',
-    'sv': 'Swedish', 'swedish': 'Swedish'
-}
-
-def list_languages() -> None:
-    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
-        print(f"Directory '{DATA_DIR}' does not exist.")
-        return
-
-    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
-    languages.sort()
-    print("Available languages:")
-    for lang in languages:
-        print(f"- {lang}")
-
-def list_word_types(language: str = None) -> None:
-    if language:
-        # Normalize the input language
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
-
-        language_dir = DATA_DIR / normalized_language
-        if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language}'.")
-            return
-
-        word_types = [wt.stem for wt in language_dir.glob('*.json')]
-        if not word_types:
-            print(f"No word types available for language '{normalized_language}'.")
-            return
-
-        max_word_type_length = max(len(wt) for wt in word_types)
-        print(f"Word types for language '{normalized_language}':")
-        for wt in word_types:
-            print(f"  - {wt:<{max_word_type_length}}")
-    else:
-        word_types = set()
-        for lang_dir in DATA_DIR.iterdir():
-            if lang_dir.is_dir():
-                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
-
-        if not word_types:
-            print("No word types available.")
-            return
-
-        word_types = sorted(word_types)
-        print("Available word types:")
-        for wt in word_types:
-            print(f"  - {wt}")
-
-def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
-    if word_type == 'autosuggestions':
-        max_key_length = max(len(key) for key in data.keys())
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {', '.join(value)}")
-    elif word_type == 'emoji_keywords':
-        max_key_length = max(len(key) for key in data.keys())
-        for key, value in data.items():
-            emojis = [item['emoji'] for item in value]
-            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
-    elif word_type == 'prepositions' or word_type == 'translations':
-        max_key_length = max(len(key) for key in data.keys())
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {value}")
-    else:
-        if isinstance(data, dict):
-            max_key_length = max(len(key) for key in data.keys())
-            for key, value in data.items():
-                if isinstance(value, dict):
-                    print(f"{key:<{max_key_length}} : ")
-                    max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
-                    for sub_key, sub_value in value.items():
-                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-                elif isinstance(value, list):
-                    print(f"{key:<{max_key_length}} : ")
-                    for item in value:
-                        if isinstance(item, dict):
-                            for sub_key, sub_value in item.items():
-                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
-                        else:
-                            print(f"  {item}")
-                else:
-                    print(f"{key:<{max_key_length}} : {value}")
-        elif isinstance(data, list):
-            for item in data:
-                if isinstance(item, dict):
-                    for key, value in item.items():
-                        print(f"{key} : {value}")
-                else:
-                    print(item)
-        else:
-            print(data)
-
-def query_data(language: str, word_type: str) -> None:
-    # Normalize the input language
-    normalized_language = LANGUAGE_MAP.get(language.lower())
-    if not normalized_language:
-        print(f"Language '{language}' is not recognized.")
-        return
-
-    data_file = DATA_DIR / normalized_language / f"{word_type}.json"
-    if not data_file.exists():
-        print(f"No data found for language '{normalized_language}' and word type '{word_type}'.")
-        return
-
-    try:
-        with data_file.open('r') as file:
-            data = json.load(file)
-    except (IOError, json.JSONDecodeError) as e:
-        print(f"Error reading '{data_file}': {e}")
-        return
-
-    print(f"Data for language '{normalized_language}' and word type '{word_type}':")
-    print_formatted_data(data, word_type)
-
-    if word_type.lower() == 'nouns':
-        print("\nLegend:")
-        print("PL    : Plural")
-        print("empty : Singular\n")
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
-    subparsers = parser.add_subparsers(dest='command', required=True)
-
-    subparsers.add_parser('list-languages', aliases=['ll'], help='List available languages')
-
-    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
-    list_word_types_parser.add_argument('-l', '--language', help='Language code')
-
-    query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
-    query_parser.add_argument('-l', '--language', required=True, help='Language code')
-    query_parser.add_argument('-wt', '--word-type', required=True, help='Word type')
-
-    args = parser.parse_args()
-
-    if args.command in ['list-languages', 'll']:
-        list_languages()
-    elif args.command in ['list-word-types', 'lwt']:
-        if args.language:
-            list_word_types(args.language)
-        else:
-            list_word_types()
-    elif args.command == 'query':
-        query_data(args.language, args.word_type)
-    else:
-        parser.print_help()
-
-if __name__ == '__main__':
-    main()
diff --git a/src/scribe_data/cli/__init__.py b/src/scribe_data/cli/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/scribe_data/cli/cli_list.py b/src/scribe_data/cli/cli_list.py
new file mode 100644
index 000000000..affadf648
--- /dev/null
+++ b/src/scribe_data/cli/cli_list.py
@@ -0,0 +1,52 @@
+# import os
+from pathlib import Path
+from .cli_utils import LANGUAGE_MAP
+
+DATA_DIR = Path('scribe_data_json_export')
+
+def list_languages() -> None:
+    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
+        print(f"Directory '{DATA_DIR}' does not exist.")
+        return
+
+    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
+    languages.sort()
+    print("Available languages:")
+    for lang in languages:
+        print(f"- {lang}")
+
+def list_word_types(language: str = None) -> None:
+    if language:
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+
+        language_dir = DATA_DIR / normalized_language
+        if not language_dir.exists() or not language_dir.is_dir():
+            print(f"No data found for language '{normalized_language}'.")
+            return
+
+        word_types = [wt.stem for wt in language_dir.glob('*.json')]
+        if not word_types:
+            print(f"No word types available for language '{normalized_language}'.")
+            return
+
+        max_word_type_length = max(len(wt) for wt in word_types)
+        print(f"Word types for language '{normalized_language}':")
+        for wt in word_types:
+            print(f"  - {wt:<{max_word_type_length}}")
+    else:
+        word_types = set()
+        for lang_dir in DATA_DIR.iterdir():
+            if lang_dir.is_dir():
+                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
+
+        if not word_types:
+            print("No word types available.")
+            return
+
+        word_types = sorted(word_types)
+        print("Available word types:")
+        for wt in word_types:
+            print(f"  - {wt}")
diff --git a/src/scribe_data/cli/cli_main.py b/src/scribe_data/cli/cli_main.py
new file mode 100644
index 000000000..dd857ca70
--- /dev/null
+++ b/src/scribe_data/cli/cli_main.py
@@ -0,0 +1,58 @@
+"""
+Setup and commands for the Scribe-Data command line interface.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+#!/usr/bin/env python3
+
+import argparse
+from .cli_list import list_languages, list_word_types
+from .cli_query import query_data
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
+    subparsers = parser.add_subparsers(dest='command', required=True)
+
+    subparsers.add_parser('list-languages', aliases=['ll'], help='List available languages')
+
+    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
+    list_word_types_parser.add_argument('-l', '--language', help='Language code')
+
+    query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
+    query_parser.add_argument('--all', action='store_true', help='Query all data')
+    query_parser.add_argument('-l', '--language', help='Language code')
+    query_parser.add_argument('-wt', '--word-type', help='Word type')
+
+    args = parser.parse_args()
+
+    if args.command in ['list-languages', 'll']:
+        list_languages()
+    elif args.command in ['list-word-types', 'lwt']:
+        if args.language:
+            list_word_types(args.language)
+        else:
+            list_word_types()
+    elif args.command == 'query':
+        query_data(args.all, args.language, args.word_type)
+    else:
+        parser.print_help()
+
+if __name__ == '__main__':
+    main()
diff --git a/src/scribe_data/cli/cli_query.py b/src/scribe_data/cli/cli_query.py
new file mode 100644
index 000000000..b35ebff3d
--- /dev/null
+++ b/src/scribe_data/cli/cli_query.py
@@ -0,0 +1,63 @@
+import json
+from pathlib import Path
+from .cli_utils import print_formatted_data, LANGUAGE_MAP
+
+DATA_DIR = Path('scribe_data_json_export')
+
+def query_data(all_data: bool, language: str = None, word_type: str = None) -> None:
+    if not (all_data or language or word_type):
+        print("Error: You must provide at least one of --all, --language, or --word-type.")
+        return
+
+    if all_data:
+        for lang_dir in DATA_DIR.iterdir():
+            if lang_dir.is_dir():
+                for wt in lang_dir.glob('*.json'):
+                    query_and_print_data(lang_dir.name, wt.stem)
+    elif language and word_type:
+        query_and_print_data(language, word_type)
+    elif language:
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+
+        language_dir = DATA_DIR / normalized_language
+        if not language_dir.exists() or not language_dir.is_dir():
+            print(f"No data found for language '{normalized_language}'.")
+            return
+
+        for wt in language_dir.glob('*.json'):
+            query_and_print_data(language, wt.stem)
+    elif word_type:
+        for lang_dir in DATA_DIR.iterdir():
+            if lang_dir.is_dir():
+                wt_path = lang_dir / f"{word_type}.json"
+                if wt_path.exists():
+                    query_and_print_data(lang_dir.name, word_type)
+
+def query_and_print_data(language: str, word_type: str) -> None:
+    normalized_language = LANGUAGE_MAP.get(language.lower())
+    if not normalized_language:
+        print(f"Language '{language}' is not recognized.")
+        return
+
+    data_file = DATA_DIR / normalized_language / f"{word_type}.json"
+    if not data_file.exists():
+        print(f"No data found for language '{normalized_language}' and word type '{word_type}'.")
+        return
+
+    try:
+        with data_file.open('r') as file:
+            data = json.load(file)
+    except (IOError, json.JSONDecodeError) as e:
+        print(f"Error reading '{data_file}': {e}")
+        return
+
+    print(f"Data for language '{normalized_language}' and word type '{word_type}':")
+    print_formatted_data(data, word_type)
+
+    if word_type.lower() == 'nouns':
+        print("\nLegend:")
+        print("PL    : Plural")
+        print("empty : Singular\n")
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
new file mode 100644
index 000000000..2531de9a2
--- /dev/null
+++ b/src/scribe_data/cli/cli_utils.py
@@ -0,0 +1,53 @@
+from typing import Dict, List, Union
+
+LANGUAGE_MAP = {
+    'en': 'English', 'english': 'English',
+    'fr': 'French', 'french': 'French',
+    'de': 'German', 'german': 'German',
+    'it': 'Italian', 'italian': 'Italian',
+    'pt': 'Portuguese', 'portuguese': 'Portuguese',
+    'ru': 'Russian', 'russian': 'Russian',
+    'es': 'Spanish', 'spanish': 'Spanish',
+    'sv': 'Swedish', 'swedish': 'Swedish'
+}
+
+def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    if not data:
+        print("No data available.")
+        return
+
+    if word_type == 'autosuggestions':
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {', '.join(value)}")
+    elif word_type == 'emoji_keywords':
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            emojis = [item['emoji'] for item in value]
+            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
+    elif word_type == 'prepositions' or word_type == 'translations':
+        max_key_length = max(len(key) for key in data.keys())
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {value}")
+    else:
+        if isinstance(data, dict):
+            max_key_length = max(len(key) for key in data.keys())
+            for key, value in data.items():
+                if isinstance(value, dict):
+                    print(f"{key:<{max_key_length}} : ")
+                    max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
+                    for sub_key, sub_value in value.items():
+                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+                elif isinstance(value, list):
+                    print(f"{key:<{max_key_length}} : ")
+                    for item in value:
+                        if isinstance(item, dict):
+                            for sub_key, sub_value in item.items():
+                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+                        else:
+                            print(f"  {item}")
+                else:
+                    print(f"{key:<{max_key_length}} : {value}")
+        elif isinstance(data, list):
+            for item in data:
+                print(item)

From 2487d6d42a924744057beb1cc534b1ef6d1069a0 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Wed, 12 Jun 2024 16:11:27 +0600
Subject: [PATCH 07/18] rename files, fix commands for list

---
 setup.py                                      |  6 +-
 src/scribe_data/cli/cli_list.py               | 52 ----------
 src/scribe_data/cli/list.py                   | 97 +++++++++++++++++++
 src/scribe_data/cli/{cli_main.py => main.py}  | 23 ++---
 .../cli/{cli_query.py => query.py}            |  9 +-
 .../cli/{cli_utils.py => utils.py}            | 16 +--
 6 files changed, 121 insertions(+), 82 deletions(-)
 delete mode 100644 src/scribe_data/cli/cli_list.py
 create mode 100644 src/scribe_data/cli/list.py
 rename src/scribe_data/cli/{cli_main.py => main.py} (70%)
 rename src/scribe_data/cli/{cli_query.py => query.py} (91%)
 rename src/scribe_data/cli/{cli_utils.py => utils.py} (85%)

diff --git a/setup.py b/setup.py
index 854612ba4..d6006a57b 100644
--- a/setup.py
+++ b/setup.py
@@ -49,9 +49,9 @@
     url="https://github.com/scribe-org/Scribe-Data",
     entry_points={
         "console_scripts": [
-            "scribe-data=scribe_data.cli.cli_main:main",
-            ],
-        }
+            "scribe-data=scribe_data.cli.main:main",
+        ],
+    }
 )
 
 if __name__ == "__main__":
diff --git a/src/scribe_data/cli/cli_list.py b/src/scribe_data/cli/cli_list.py
deleted file mode 100644
index affadf648..000000000
--- a/src/scribe_data/cli/cli_list.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# import os
-from pathlib import Path
-from .cli_utils import LANGUAGE_MAP
-
-DATA_DIR = Path('scribe_data_json_export')
-
-def list_languages() -> None:
-    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
-        print(f"Directory '{DATA_DIR}' does not exist.")
-        return
-
-    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
-    languages.sort()
-    print("Available languages:")
-    for lang in languages:
-        print(f"- {lang}")
-
-def list_word_types(language: str = None) -> None:
-    if language:
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
-
-        language_dir = DATA_DIR / normalized_language
-        if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language}'.")
-            return
-
-        word_types = [wt.stem for wt in language_dir.glob('*.json')]
-        if not word_types:
-            print(f"No word types available for language '{normalized_language}'.")
-            return
-
-        max_word_type_length = max(len(wt) for wt in word_types)
-        print(f"Word types for language '{normalized_language}':")
-        for wt in word_types:
-            print(f"  - {wt:<{max_word_type_length}}")
-    else:
-        word_types = set()
-        for lang_dir in DATA_DIR.iterdir():
-            if lang_dir.is_dir():
-                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
-
-        if not word_types:
-            print("No word types available.")
-            return
-
-        word_types = sorted(word_types)
-        print("Available word types:")
-        for wt in word_types:
-            print(f"  - {wt}")
diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
new file mode 100644
index 000000000..5f5abc784
--- /dev/null
+++ b/src/scribe_data/cli/list.py
@@ -0,0 +1,97 @@
+from pathlib import Path
+from.utils import LANGUAGE_MAP
+
+DATA_DIR = Path('scribe_data_json_export')
+
+def list_languages() -> None:
+    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
+        print(f"Directory '{DATA_DIR}' does not exist.")
+        return
+
+    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
+    languages.sort()
+    print("Available languages:")
+    for lang in languages:
+        print(f"- {lang}")
+
+def list_word_types(language: str = None) -> None:
+    if language:
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+
+        language_dir = DATA_DIR / normalized_language
+        if not language_dir.exists() or not language_dir.is_dir():
+            print(f"No data found for language '{normalized_language}'.")
+            return
+
+        word_types = [wt.stem for wt in language_dir.glob('*.json')]
+        if not word_types:
+            print(f"No word types available for language '{normalized_language}'.")
+            return
+
+        max_word_type_length = max(len(wt) for wt in word_types)
+        print(f"Word types for language '{normalized_language}':")
+        for wt in word_types:
+            print(f"  - {wt:<{max_word_type_length}}")
+    else:
+        word_types = set()
+        for lang_dir in DATA_DIR.iterdir():
+            if lang_dir.is_dir():
+                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
+
+        if not word_types:
+            print("No word types available.")
+            return
+
+        word_types = sorted(word_types)
+        print("Available word types:")
+        for wt in word_types:
+            print(f"  - {wt}")
+
+def list_all() -> None:
+    list_languages()
+    print()
+    list_word_types()
+
+def list_languages_for_word_type(word_type: str) -> None:
+    available_languages = []
+    for lang_dir in DATA_DIR.iterdir():
+        if lang_dir.is_dir():
+            wt_path = lang_dir / f"{word_type}.json"
+            if wt_path.exists():
+                available_languages.append(lang_dir.name)
+
+    if not available_languages:
+        print(f"No languages found with word type '{word_type}'.")
+        return
+
+    available_languages.sort()
+    print(f"Languages with word type '{word_type}':")
+    for lang in available_languages:
+        print(f"- {lang}")
+
+def list_wrapper(language: str = None, word_type: str = None) -> None:
+    if language is None and word_type is None:
+        list_all()
+    elif language is True and word_type is None:
+        list_languages()
+    elif language is None and word_type is True:
+        list_word_types()
+    elif language is True and word_type is True:
+        print("Please specify both a language and a word type.")
+    elif language is True and word_type is not None:
+        list_languages_for_word_type(word_type)
+    elif language is not None and word_type is True:
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+        list_word_types(normalized_language)
+    elif language is not None and word_type is not None:
+        normalized_language = LANGUAGE_MAP.get(language.lower())
+        if not normalized_language:
+            print(f"Language '{language}' is not recognized.")
+            return
+        list_word_types(normalized_language)
diff --git a/src/scribe_data/cli/cli_main.py b/src/scribe_data/cli/main.py
similarity index 70%
rename from src/scribe_data/cli/cli_main.py
rename to src/scribe_data/cli/main.py
index dd857ca70..68bc86534 100644
--- a/src/scribe_data/cli/cli_main.py
+++ b/src/scribe_data/cli/main.py
@@ -21,19 +21,17 @@
 """
 
 #!/usr/bin/env python3
-
 import argparse
-from .cli_list import list_languages, list_word_types
-from .cli_query import query_data
+from .list import list_wrapper
+from .query import query_data
 
 def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
     subparsers = parser.add_subparsers(dest='command', required=True)
 
-    subparsers.add_parser('list-languages', aliases=['ll'], help='List available languages')
-
-    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
-    list_word_types_parser.add_argument('-l', '--language', help='Language code')
+    list_parser = subparsers.add_parser('list', help='List languages and word types')
+    list_parser.add_argument('--language', '-l', nargs='?', const=True, help='List all languages or filter by language code')
+    list_parser.add_argument('--word-type', '-wt', nargs='?', const=True, help='List all word types or filter by word type')
 
     query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
     query_parser.add_argument('--all', action='store_true', help='Query all data')
@@ -42,17 +40,12 @@ def main() -> None:
 
     args = parser.parse_args()
 
-    if args.command in ['list-languages', 'll']:
-        list_languages()
-    elif args.command in ['list-word-types', 'lwt']:
-        if args.language:
-            list_word_types(args.language)
-        else:
-            list_word_types()
+    if args.command == 'list':
+        list_wrapper(args.language, args.word_type)
     elif args.command == 'query':
         query_data(args.all, args.language, args.word_type)
     else:
         parser.print_help()
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file
diff --git a/src/scribe_data/cli/cli_query.py b/src/scribe_data/cli/query.py
similarity index 91%
rename from src/scribe_data/cli/cli_query.py
rename to src/scribe_data/cli/query.py
index b35ebff3d..bd4f629c1 100644
--- a/src/scribe_data/cli/cli_query.py
+++ b/src/scribe_data/cli/query.py
@@ -1,6 +1,7 @@
+# src/scribe_data/cli/query.py
 import json
 from pathlib import Path
-from .cli_utils import print_formatted_data, LANGUAGE_MAP
+from .utils import LANGUAGE_MAP, print_formatted_data
 
 DATA_DIR = Path('scribe_data_json_export')
 
@@ -56,8 +57,4 @@ def query_and_print_data(language: str, word_type: str) -> None:
 
     print(f"Data for language '{normalized_language}' and word type '{word_type}':")
     print_formatted_data(data, word_type)
-
-    if word_type.lower() == 'nouns':
-        print("\nLegend:")
-        print("PL    : Plural")
-        print("empty : Singular\n")
+    
\ No newline at end of file
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/utils.py
similarity index 85%
rename from src/scribe_data/cli/cli_utils.py
rename to src/scribe_data/cli/utils.py
index 2531de9a2..4ceac0f80 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/utils.py
@@ -1,5 +1,7 @@
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
+from difflib import SequenceMatcher
 
+# Mapping of possible inputs to standardized language names
 LANGUAGE_MAP = {
     'en': 'English', 'english': 'English',
     'fr': 'French', 'french': 'French',
@@ -12,10 +14,6 @@
 }
 
 def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
-    if not data:
-        print("No data available.")
-        return
-
     if word_type == 'autosuggestions':
         max_key_length = max(len(key) for key in data.keys())
         for key, value in data.items():
@@ -50,4 +48,10 @@ def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
                     print(f"{key:<{max_key_length}} : {value}")
         elif isinstance(data, list):
             for item in data:
-                print(item)
+                if isinstance(item, dict):
+                    for key, value in item.items():
+                        print(f"{key} : {value}")
+                else:
+                    print(item)
+        else:
+            print(data)

From bc6c7da9c70a19d6197cfa29f9cf61b6f0e16eb8 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Sun, 16 Jun 2024 07:43:26 +0600
Subject: [PATCH 08/18] changed alias for query into q

---
 src/scribe_data/cli/main.py  | 11 ++++++++---
 src/scribe_data/cli/utils.py | 17 ++++++++++-------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 68bc86534..d6a15ee06 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -33,7 +33,10 @@ def main() -> None:
     list_parser.add_argument('--language', '-l', nargs='?', const=True, help='List all languages or filter by language code')
     list_parser.add_argument('--word-type', '-wt', nargs='?', const=True, help='List all word types or filter by word type')
 
-    query_parser = subparsers.add_parser('query', help='Query data for a specific language and word type')
+    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
+    list_word_types_parser.add_argument('-l', '--language', help='Language code')
+
+    query_parser = subparsers.add_parser('query', aliases=['q'], help='Query data for a specific language and word type')
     query_parser.add_argument('--all', action='store_true', help='Query all data')
     query_parser.add_argument('-l', '--language', help='Language code')
     query_parser.add_argument('-wt', '--word-type', help='Word type')
@@ -42,10 +45,12 @@ def main() -> None:
 
     if args.command == 'list':
         list_wrapper(args.language, args.word_type)
-    elif args.command == 'query':
+    elif args.command in ['list-word-types', 'lwt']:
+        list_wrapper(None, args.language)
+    elif args.command in ['query', 'q']:
         query_data(args.all, args.language, args.word_type)
     else:
         parser.print_help()
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/src/scribe_data/cli/utils.py b/src/scribe_data/cli/utils.py
index 4ceac0f80..fe5a71c5d 100644
--- a/src/scribe_data/cli/utils.py
+++ b/src/scribe_data/cli/utils.py
@@ -1,5 +1,4 @@
-from typing import Dict, List, Union, Optional
-from difflib import SequenceMatcher
+from typing import Dict, List, Union
 
 # Mapping of possible inputs to standardized language names
 LANGUAGE_MAP = {
@@ -14,26 +13,30 @@
 }
 
 def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    if not data:
+        print(f"No data available for word type '{word_type}'.")
+        return
+
     if word_type == 'autosuggestions':
-        max_key_length = max(len(key) for key in data.keys())
+        max_key_length = max((len(key) for key in data.keys()), default=0)
         for key, value in data.items():
             print(f"{key:<{max_key_length}} : {', '.join(value)}")
     elif word_type == 'emoji_keywords':
-        max_key_length = max(len(key) for key in data.keys())
+        max_key_length = max((len(key) for key in data.keys()), default=0)
         for key, value in data.items():
             emojis = [item['emoji'] for item in value]
             print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
     elif word_type == 'prepositions' or word_type == 'translations':
-        max_key_length = max(len(key) for key in data.keys())
+        max_key_length = max((len(key) for key in data.keys()), default=0)
         for key, value in data.items():
             print(f"{key:<{max_key_length}} : {value}")
     else:
         if isinstance(data, dict):
-            max_key_length = max(len(key) for key in data.keys())
+            max_key_length = max((len(key) for key in data.keys()), default=0)
             for key, value in data.items():
                 if isinstance(value, dict):
                     print(f"{key:<{max_key_length}} : ")
-                    max_sub_key_length = max(len(sub_key) for sub_key in value.keys())
+                    max_sub_key_length = max((len(sub_key) for sub_key in value.keys()), default=0)
                     for sub_key, sub_value in value.items():
                         print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
                 elif isinstance(value, list):

From c4348569c47240834a297c7db17e1804511f8339 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Sun, 16 Jun 2024 08:12:18 +0600
Subject: [PATCH 09/18] getting lang info from language_meta_data.json

---
 src/scribe_data/cli/list.py                   | 52 ++++++++-----------
 src/scribe_data/cli/query.py                  | 26 +++++-----
 src/scribe_data/cli/utils.py                  | 24 ++++-----
 .../resources/language_meta_data.json         | 29 +++++++----
 4 files changed, 65 insertions(+), 66 deletions(-)

diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
index 5f5abc784..6519d50d1 100644
--- a/src/scribe_data/cli/list.py
+++ b/src/scribe_data/cli/list.py
@@ -1,18 +1,14 @@
 from pathlib import Path
-from.utils import LANGUAGE_MAP
+from .utils import LANGUAGE_METADATA, LANGUAGE_MAP
 
 DATA_DIR = Path('scribe_data_json_export')
 
 def list_languages() -> None:
-    if not DATA_DIR.exists() or not DATA_DIR.is_dir():
-        print(f"Directory '{DATA_DIR}' does not exist.")
-        return
-
-    languages = [lang.name for lang in DATA_DIR.iterdir() if lang.is_dir()]
+    languages = [lang['language'] for lang in LANGUAGE_METADATA['languages']]
     languages.sort()
     print("Available languages:")
     for lang in languages:
-        print(f"- {lang}")
+        print(f"- {lang.capitalize()}")
 
 def list_word_types(language: str = None) -> None:
     if language:
@@ -21,25 +17,26 @@ def list_word_types(language: str = None) -> None:
             print(f"Language '{language}' is not recognized.")
             return
 
-        language_dir = DATA_DIR / normalized_language
+        language_dir = DATA_DIR / normalized_language['language'].capitalize()
         if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language}'.")
+            print(f"No data found for language '{normalized_language['language']}'.")
             return
 
         word_types = [wt.stem for wt in language_dir.glob('*.json')]
         if not word_types:
-            print(f"No word types available for language '{normalized_language}'.")
+            print(f"No word types available for language '{normalized_language['language']}'.")
             return
 
-        max_word_type_length = max(len(wt) for wt in word_types)
-        print(f"Word types for language '{normalized_language}':")
+        word_types = sorted(word_types)
+        print(f"Word types for language '{normalized_language['language']}':")
         for wt in word_types:
-            print(f"  - {wt:<{max_word_type_length}}")
+            print(f"  - {wt}")
     else:
         word_types = set()
-        for lang_dir in DATA_DIR.iterdir():
-            if lang_dir.is_dir():
-                word_types.update(wt.stem for wt in lang_dir.glob('*.json'))
+        for lang in LANGUAGE_METADATA['languages']:
+            language_dir = DATA_DIR / lang['language'].capitalize()
+            if language_dir.is_dir():
+                word_types.update(wt.stem for wt in language_dir.glob('*.json'))
 
         if not word_types:
             print("No word types available.")
@@ -57,11 +54,12 @@ def list_all() -> None:
 
 def list_languages_for_word_type(word_type: str) -> None:
     available_languages = []
-    for lang_dir in DATA_DIR.iterdir():
-        if lang_dir.is_dir():
-            wt_path = lang_dir / f"{word_type}.json"
+    for lang in LANGUAGE_METADATA['languages']:
+        language_dir = DATA_DIR / lang['language'].capitalize()
+        if language_dir.is_dir():
+            wt_path = language_dir / f"{word_type}.json"
             if wt_path.exists():
-                available_languages.append(lang_dir.name)
+                available_languages.append(lang['language'])
 
     if not available_languages:
         print(f"No languages found with word type '{word_type}'.")
@@ -70,7 +68,7 @@ def list_languages_for_word_type(word_type: str) -> None:
     available_languages.sort()
     print(f"Languages with word type '{word_type}':")
     for lang in available_languages:
-        print(f"- {lang}")
+        print(f"- {lang.capitalize()}")
 
 def list_wrapper(language: str = None, word_type: str = None) -> None:
     if language is None and word_type is None:
@@ -84,14 +82,6 @@ def list_wrapper(language: str = None, word_type: str = None) -> None:
     elif language is True and word_type is not None:
         list_languages_for_word_type(word_type)
     elif language is not None and word_type is True:
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
-        list_word_types(normalized_language)
+        list_word_types(language)
     elif language is not None and word_type is not None:
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
-        list_word_types(normalized_language)
+        list_word_types(language)
diff --git a/src/scribe_data/cli/query.py b/src/scribe_data/cli/query.py
index bd4f629c1..ed415a0cb 100644
--- a/src/scribe_data/cli/query.py
+++ b/src/scribe_data/cli/query.py
@@ -1,7 +1,6 @@
-# src/scribe_data/cli/query.py
 import json
 from pathlib import Path
-from .utils import LANGUAGE_MAP, print_formatted_data
+from .utils import LANGUAGE_METADATA, LANGUAGE_MAP, print_formatted_data
 
 DATA_DIR = Path('scribe_data_json_export')
 
@@ -11,10 +10,11 @@ def query_data(all_data: bool, language: str = None, word_type: str = None) -> N
         return
 
     if all_data:
-        for lang_dir in DATA_DIR.iterdir():
+        for lang in LANGUAGE_METADATA['languages']:
+            lang_dir = DATA_DIR / lang['language'].capitalize()
             if lang_dir.is_dir():
                 for wt in lang_dir.glob('*.json'):
-                    query_and_print_data(lang_dir.name, wt.stem)
+                    query_and_print_data(lang['language'], wt.stem)
     elif language and word_type:
         query_and_print_data(language, word_type)
     elif language:
@@ -23,19 +23,20 @@ def query_data(all_data: bool, language: str = None, word_type: str = None) -> N
             print(f"Language '{language}' is not recognized.")
             return
 
-        language_dir = DATA_DIR / normalized_language
+        language_dir = DATA_DIR / normalized_language['language'].capitalize()
         if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language}'.")
+            print(f"No data found for language '{normalized_language['language']}'.")
             return
 
         for wt in language_dir.glob('*.json'):
-            query_and_print_data(language, wt.stem)
+            query_and_print_data(normalized_language['language'], wt.stem)
     elif word_type:
-        for lang_dir in DATA_DIR.iterdir():
+        for lang in LANGUAGE_METADATA['languages']:
+            lang_dir = DATA_DIR / lang['language'].capitalize()
             if lang_dir.is_dir():
                 wt_path = lang_dir / f"{word_type}.json"
                 if wt_path.exists():
-                    query_and_print_data(lang_dir.name, word_type)
+                    query_and_print_data(lang['language'], word_type)
 
 def query_and_print_data(language: str, word_type: str) -> None:
     normalized_language = LANGUAGE_MAP.get(language.lower())
@@ -43,9 +44,9 @@ def query_and_print_data(language: str, word_type: str) -> None:
         print(f"Language '{language}' is not recognized.")
         return
 
-    data_file = DATA_DIR / normalized_language / f"{word_type}.json"
+    data_file = DATA_DIR / normalized_language['language'].capitalize() / f"{word_type}.json"
     if not data_file.exists():
-        print(f"No data found for language '{normalized_language}' and word type '{word_type}'.")
+        print(f"No data found for language '{normalized_language['language']}' and word type '{word_type}'.")
         return
 
     try:
@@ -55,6 +56,5 @@ def query_and_print_data(language: str, word_type: str) -> None:
         print(f"Error reading '{data_file}': {e}")
         return
 
-    print(f"Data for language '{normalized_language}' and word type '{word_type}':")
+    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}':")
     print_formatted_data(data, word_type)
-    
\ No newline at end of file
diff --git a/src/scribe_data/cli/utils.py b/src/scribe_data/cli/utils.py
index fe5a71c5d..f38086043 100644
--- a/src/scribe_data/cli/utils.py
+++ b/src/scribe_data/cli/utils.py
@@ -1,16 +1,16 @@
+import json
+from pathlib import Path
 from typing import Dict, List, Union
 
-# Mapping of possible inputs to standardized language names
-LANGUAGE_MAP = {
-    'en': 'English', 'english': 'English',
-    'fr': 'French', 'french': 'French',
-    'de': 'German', 'german': 'German',
-    'it': 'Italian', 'italian': 'Italian',
-    'pt': 'Portuguese', 'portuguese': 'Portuguese',
-    'ru': 'Russian', 'russian': 'Russian',
-    'es': 'Spanish', 'spanish': 'Spanish',
-    'sv': 'Swedish', 'swedish': 'Swedish'
-}
+# Load language metadata from JSON file
+METADATA_FILE = Path(__file__).parent.parent / 'resources' / 'language_meta_data.json'
+
+def load_language_metadata() -> Dict:
+    with METADATA_FILE.open('r', encoding='utf-8') as file:
+        return json.load(file)
+
+LANGUAGE_METADATA = load_language_metadata()
+LANGUAGE_MAP = {lang['language'].lower(): lang for lang in LANGUAGE_METADATA['languages']}
 
 def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
     if not data:
@@ -26,7 +26,7 @@ def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
         for key, value in data.items():
             emojis = [item['emoji'] for item in value]
             print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
-    elif word_type == 'prepositions' or word_type == 'translations':
+    elif word_type in ['prepositions', 'translations']:
         max_key_length = max((len(key) for key in data.keys()), default=0)
         for key, value in data.items():
             print(f"{key:<{max_key_length}} : {value}")
diff --git a/src/scribe_data/resources/language_meta_data.json b/src/scribe_data/resources/language_meta_data.json
index 27a8110ea..88ba732e1 100755
--- a/src/scribe_data/resources/language_meta_data.json
+++ b/src/scribe_data/resources/language_meta_data.json
@@ -3,10 +3,11 @@
   "description": {
     "entry": {
       "language": "the supported language. All lowercase",
-      "iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes . All lowercase",
+      "iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes. All lowercase",
       "qid": "the unique identifier of 'language' on Wikidata. 'Q' followed by one or more digits. See https://www.wikidata.org/wiki/Q43649390",
       "remove-words": "words that should not be included as autosuggestions for the given language.",
-      "ignore-words": "TODO. Case sensitive."
+      "ignore-words": "TODO. Case sensitive.",
+      "word-types": "A list of word types available for the given language."
     }
   },
   "languages": [
@@ -15,56 +16,64 @@
       "iso": "en",
       "qid": "Q1860",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
+      "ignore-words": [],
+      "word-types": ["nouns", "verbs", "translated_words"]
     },
     {
       "language": "french",
       "iso": "fr",
       "qid": "Q150",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": ["XXe"]
+      "ignore-words": ["XXe"],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "german",
       "iso": "de",
       "qid": "Q188",
       "remove-words": ["of", "the", "The", "and", "NeinJa", "et", "redirect"],
-      "ignore-words": ["Gemeinde", "Familienname"]
+      "ignore-words": ["Gemeinde", "Familienname"],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "italian",
       "iso": "it",
       "qid": "Q652",
       "remove-words": ["of", "the", "The", "and", "text", "from"],
-      "ignore-words": ["The", "ATP"]
+      "ignore-words": ["The", "ATP"],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "portuguese",
       "iso": "pt",
       "qid": "Q5146",
       "remove-words": ["of", "the", "The", "and", "jbutadptflora"],
-      "ignore-words": []
+      "ignore-words": [],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "russian",
       "iso": "ru",
       "qid": "Q7737",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
+      "ignore-words": [],
+      "word-types": ["nouns", "verbs", "translated_words", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "spanish",
       "iso": "es",
       "qid": "Q1321",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": []
+      "ignore-words": [],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     },
     {
       "language": "swedish",
       "iso": "sv",
       "qid": "Q9027",
       "remove-words": ["of", "the", "The", "and", "Checklist", "Catalogue"],
-      "ignore-words": ["databasdump"]
+      "ignore-words": ["databasdump"],
+      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
     }
   ]
 }

From e1e8e68112ff19a2a24f5b0437e8c341295e780d Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Sun, 16 Jun 2024 09:15:01 +0600
Subject: [PATCH 10/18] show formatted data from meta file

---
 src/scribe_data/cli/list.py | 76 ++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
index 6519d50d1..0656a067a 100644
--- a/src/scribe_data/cli/list.py
+++ b/src/scribe_data/cli/list.py
@@ -1,14 +1,80 @@
+import json
 from pathlib import Path
-from .utils import LANGUAGE_METADATA, LANGUAGE_MAP
+from typing import Dict, List, Union
+
+# Load language metadata from JSON file
+METADATA_FILE = Path(__file__).parent.parent / 'resources' / 'language_meta_data.json'
+
+def load_language_metadata() -> Dict:
+    with METADATA_FILE.open('r', encoding='utf-8') as file:
+        return json.load(file)
+
+LANGUAGE_METADATA = load_language_metadata()
+LANGUAGE_MAP = {lang['language'].lower(): lang for lang in LANGUAGE_METADATA['languages']}
 
 DATA_DIR = Path('scribe_data_json_export')
 
+def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    if not data:
+        print(f"No data available for word type '{word_type}'.")
+        return
+
+    if word_type == 'autosuggestions':
+        max_key_length = max((len(key) for key in data.keys()), default=0)
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {', '.join(value)}")
+    elif word_type == 'emoji_keywords':
+        max_key_length = max((len(key) for key in data.keys()), default=0)
+        for key, value in data.items():
+            emojis = [item['emoji'] for item in value]
+            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
+    elif word_type in ['prepositions', 'translations']:
+        max_key_length = max((len(key) for key in data.keys()), default=0)
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {value}")
+    else:
+        if isinstance(data, dict):
+            max_key_length = max((len(key) for key in data.keys()), default=0)
+            for key, value in data.items():
+                if isinstance(value, dict):
+                    print(f"{key:<{max_key_length}} : ")
+                    max_sub_key_length = max((len(sub_key) for sub_key in value.keys()), default=0)
+                    for sub_key, sub_value in value.items():
+                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+                elif isinstance(value, list):
+                    print(f"{key:<{max_key_length}} : ")
+                    for item in value:
+                        if isinstance(item, dict):
+                            for sub_key, sub_value in item.items():
+                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+                        else:
+                            print(f"  {item}")
+                else:
+                    print(f"{key:<{max_key_length}} : {value}")
+        elif isinstance(data, list):
+            for item in data:
+                if isinstance(item, dict):
+                    for key, value in item.items():
+                        print(f"{key} : {value}")
+                else:
+                    print(item)
+        else:
+            print(data)
+
 def list_languages() -> None:
-    languages = [lang['language'] for lang in LANGUAGE_METADATA['languages']]
-    languages.sort()
-    print("Available languages:")
+    languages = [lang for lang in LANGUAGE_METADATA['languages']]
+    languages.sort(key=lambda x: x['language'])
+
+    # Define column widths
+    language_col_width = max(len(lang['language']) for lang in languages) + 2
+    iso_col_width = 5  # Length of "ISO" column header + padding
+    qid_col_width = 5  # Length of "QID" column header + padding
+
+    print(f"{'Language':<{language_col_width}} {'ISO':<{iso_col_width}} {'QID':<{qid_col_width}}")
+    print('-' * (language_col_width + iso_col_width + qid_col_width))
+
     for lang in languages:
-        print(f"- {lang.capitalize()}")
+        print(f"{lang['language'].capitalize():<{language_col_width}} {lang['iso']:<{iso_col_width}} {lang['qid']:<{qid_col_width}}")
 
 def list_word_types(language: str = None) -> None:
     if language:

From d39dd296b531d239500e30e89ce95cf09022dfbd Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Tue, 18 Jun 2024 16:16:59 +0600
Subject: [PATCH 11/18] add not implemented function

---
 src/scribe_data/cli/main.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index d6a15ee06..923c6f97c 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -25,21 +25,39 @@
 from .list import list_wrapper
 from .query import query_data
 
+def not_implemented():
+    print("This command is not implemented yet.")
+
 def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
     subparsers = parser.add_subparsers(dest='command', required=True)
 
+    # List command
     list_parser = subparsers.add_parser('list', help='List languages and word types')
     list_parser.add_argument('--language', '-l', nargs='?', const=True, help='List all languages or filter by language code')
     list_parser.add_argument('--word-type', '-wt', nargs='?', const=True, help='List all word types or filter by word type')
 
+    # List word types command
     list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
     list_word_types_parser.add_argument('-l', '--language', help='Language code')
 
+    # Query command
     query_parser = subparsers.add_parser('query', aliases=['q'], help='Query data for a specific language and word type')
     query_parser.add_argument('--all', action='store_true', help='Query all data')
     query_parser.add_argument('-l', '--language', help='Language code')
     query_parser.add_argument('-wt', '--word-type', help='Word type')
+    query_parser.add_argument('-of', '--output-file', help='Output file')
+    query_parser.add_argument('-ot', '--output-type', help='Output type')
+    query_parser.add_argument('-ll', '--list-languages', action='store_true', help='List available language codes')
+
+    # Poll command
+    poll_parser = subparsers.add_parser('poll', help='Check whether there is new data available')
+
+    # Version command
+    version_parser = subparsers.add_parser('version', aliases=['v'], help='Show the version of the CLI tool')
+
+    # Update command
+    update_parser = subparsers.add_parser('update', aliases=['u'], help='Update the CLI tool')
 
     args = parser.parse_args()
 
@@ -49,8 +67,15 @@ def main() -> None:
         list_wrapper(None, args.language)
     elif args.command in ['query', 'q']:
         query_data(args.all, args.language, args.word_type)
+    elif args.command == 'poll':
+        not_implemented()
+    elif args.command in ['version', 'v']:
+        not_implemented()
+    elif args.command in ['update', 'u']:
+        not_implemented()
     else:
         parser.print_help()
 
 if __name__ == '__main__':
     main()
+

From 4f63cf0b5df32f232ef23f1d2c59ed4c54242c55 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Wed, 19 Jun 2024 07:50:51 +0600
Subject: [PATCH 12/18] added --output-dir and --overwrite - #144

---
 src/scribe_data/cli/main.py  | 26 +++-----------------
 src/scribe_data/cli/query.py | 46 +++++++++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 923c6f97c..ef05beb9b 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -25,9 +25,6 @@
 from .list import list_wrapper
 from .query import query_data
 
-def not_implemented():
-    print("This command is not implemented yet.")
-
 def main() -> None:
     parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
     subparsers = parser.add_subparsers(dest='command', required=True)
@@ -46,18 +43,8 @@ def main() -> None:
     query_parser.add_argument('--all', action='store_true', help='Query all data')
     query_parser.add_argument('-l', '--language', help='Language code')
     query_parser.add_argument('-wt', '--word-type', help='Word type')
-    query_parser.add_argument('-of', '--output-file', help='Output file')
-    query_parser.add_argument('-ot', '--output-type', help='Output type')
-    query_parser.add_argument('-ll', '--list-languages', action='store_true', help='List available language codes')
-
-    # Poll command
-    poll_parser = subparsers.add_parser('poll', help='Check whether there is new data available')
-
-    # Version command
-    version_parser = subparsers.add_parser('version', aliases=['v'], help='Show the version of the CLI tool')
-
-    # Update command
-    update_parser = subparsers.add_parser('update', aliases=['u'], help='Update the CLI tool')
+    query_parser.add_argument('-od', '--output-dir', help='Output directory')
+    query_parser.add_argument('-o', '--overwrite', action='store_true', help='Overwrite existing files in output directory')
 
     args = parser.parse_args()
 
@@ -66,16 +53,9 @@ def main() -> None:
     elif args.command in ['list-word-types', 'lwt']:
         list_wrapper(None, args.language)
     elif args.command in ['query', 'q']:
-        query_data(args.all, args.language, args.word_type)
-    elif args.command == 'poll':
-        not_implemented()
-    elif args.command in ['version', 'v']:
-        not_implemented()
-    elif args.command in ['update', 'u']:
-        not_implemented()
+        query_data(args.all, args.language, args.word_type, args.output_dir, args.overwrite)
     else:
         parser.print_help()
 
 if __name__ == '__main__':
     main()
-
diff --git a/src/scribe_data/cli/query.py b/src/scribe_data/cli/query.py
index ed415a0cb..ddaec9ccd 100644
--- a/src/scribe_data/cli/query.py
+++ b/src/scribe_data/cli/query.py
@@ -4,19 +4,33 @@
 
 DATA_DIR = Path('scribe_data_json_export')
 
-def query_data(all_data: bool, language: str = None, word_type: str = None) -> None:
+def query_data(all_data: bool, language: str = None, word_type: str = None, output_dir: str = None, overwrite: bool = False) -> None:
     if not (all_data or language or word_type):
         print("Error: You must provide at least one of --all, --language, or --word-type.")
         return
 
+    if output_dir:
+        output_dir = Path(output_dir).expanduser()  # Ensure it's a Path object and expand user (~)
+        if output_dir.suffix:
+            print("Error: The output path should be a directory, not a file.")
+            return
+
+        if not output_dir.exists():
+            output_dir.mkdir(parents=True, exist_ok=True)  # Create directories if they do not exist
+        elif not output_dir.is_dir():
+            print(f"Error: {output_dir} is not a directory.")
+            return
+    else:
+        output_dir = None
+
     if all_data:
         for lang in LANGUAGE_METADATA['languages']:
             lang_dir = DATA_DIR / lang['language'].capitalize()
             if lang_dir.is_dir():
                 for wt in lang_dir.glob('*.json'):
-                    query_and_print_data(lang['language'], wt.stem)
+                    query_and_print_data(lang['language'], wt.stem, output_dir, overwrite)
     elif language and word_type:
-        query_and_print_data(language, word_type)
+        query_and_print_data(language, word_type, output_dir, overwrite)
     elif language:
         normalized_language = LANGUAGE_MAP.get(language.lower())
         if not normalized_language:
@@ -29,16 +43,16 @@ def query_data(all_data: bool, language: str = None, word_type: str = None) -> N
             return
 
         for wt in language_dir.glob('*.json'):
-            query_and_print_data(normalized_language['language'], wt.stem)
+            query_and_print_data(normalized_language['language'], wt.stem, output_dir, overwrite)
     elif word_type:
         for lang in LANGUAGE_METADATA['languages']:
             lang_dir = DATA_DIR / lang['language'].capitalize()
             if lang_dir.is_dir():
                 wt_path = lang_dir / f"{word_type}.json"
                 if wt_path.exists():
-                    query_and_print_data(lang['language'], word_type)
+                    query_and_print_data(lang['language'], word_type, output_dir, overwrite)
 
-def query_and_print_data(language: str, word_type: str) -> None:
+def query_and_print_data(language: str, word_type: str, output_dir: Path, overwrite: bool) -> None:
     normalized_language = LANGUAGE_MAP.get(language.lower())
     if not normalized_language:
         print(f"Language '{language}' is not recognized.")
@@ -56,5 +70,21 @@ def query_and_print_data(language: str, word_type: str) -> None:
         print(f"Error reading '{data_file}': {e}")
         return
 
-    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}':")
-    print_formatted_data(data, word_type)
+    if output_dir:
+        output_file = output_dir / f"{normalized_language['language']}_{word_type}.json"
+        if output_file.exists() and not overwrite:
+            user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
+            if user_input.lower() != 'y':
+                print(f"Skipping {normalized_language['language']} - {word_type}")
+                return
+
+        try:
+            with output_file.open('w') as file:
+                json.dump(data, file, indent=2)
+        except IOError as e:
+            print(f"Error writing to '{output_file}': {e}")
+            return
+        print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")
+    else:
+        print(f"Data for language '{normalized_language['language']}' and word type '{word_type}':")
+        print_formatted_data(data, word_type)

From afa4eefcaae2a6b0a4bbeb005107e6083550a5c5 Mon Sep 17 00:00:00 2001
From: Mahfuza Humayra Mohona <mhmohona@gmail.com>
Date: Thu, 20 Jun 2024 08:13:51 +0600
Subject: [PATCH 13/18] implementation of #146

---
 src/scribe_data/cli/main.py  |  14 ++--
 src/scribe_data/cli/query.py | 150 +++++++++++++++++++++--------------
 2 files changed, 98 insertions(+), 66 deletions(-)

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index ef05beb9b..d012d860c 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -39,12 +39,12 @@ def main() -> None:
     list_word_types_parser.add_argument('-l', '--language', help='Language code')
 
     # Query command
-    query_parser = subparsers.add_parser('query', aliases=['q'], help='Query data for a specific language and word type')
-    query_parser.add_argument('--all', action='store_true', help='Query all data')
-    query_parser.add_argument('-l', '--language', help='Language code')
-    query_parser.add_argument('-wt', '--word-type', help='Word type')
-    query_parser.add_argument('-od', '--output-dir', help='Output directory')
-    query_parser.add_argument('-o', '--overwrite', action='store_true', help='Overwrite existing files in output directory')
+    query_parser = subparsers.add_parser('query', help='Query data')
+    query_parser.add_argument('-l', '--language', type=str, help='Language for query')
+    query_parser.add_argument('-wt', '--word-type', type=str, help='Word type for query')
+    query_parser.add_argument('-od', '--output-dir', type=str, help='Output directory')
+    query_parser.add_argument('-o', '--overwrite', action='store_true', help='Overwrite existing files')
+    query_parser.add_argument('--output-type', type=str, choices=['json', 'csv', 'tsv'], help='Output file type')
 
     args = parser.parse_args()
 
@@ -53,7 +53,7 @@ def main() -> None:
     elif args.command in ['list-word-types', 'lwt']:
         list_wrapper(None, args.language)
     elif args.command in ['query', 'q']:
-        query_data(args.all, args.language, args.word_type, args.output_dir, args.overwrite)
+        query_data(args.language, args.word_type, args.output_dir, args.overwrite, args.output_type)
     else:
         parser.print_help()
 
diff --git a/src/scribe_data/cli/query.py b/src/scribe_data/cli/query.py
index ddaec9ccd..d993eff15 100644
--- a/src/scribe_data/cli/query.py
+++ b/src/scribe_data/cli/query.py
@@ -1,58 +1,68 @@
 import json
+import csv
 from pathlib import Path
-from .utils import LANGUAGE_METADATA, LANGUAGE_MAP, print_formatted_data
+from typing import Optional
+from .utils import LANGUAGE_METADATA, LANGUAGE_MAP
 
 DATA_DIR = Path('scribe_data_json_export')
 
-def query_data(all_data: bool, language: str = None, word_type: str = None, output_dir: str = None, overwrite: bool = False) -> None:
-    if not (all_data or language or word_type):
-        print("Error: You must provide at least one of --all, --language, or --word-type.")
+def query_data(language: str = None, word_type: str = None, output_dir: Optional[str] = None, overwrite: bool = False, output_type: Optional[str] = None) -> None:
+    if not (language and word_type):
+        print("Error: You must provide both --language (-l) and --word-type (-wt) options.")
         return
 
     if output_dir:
-        output_dir = Path(output_dir).expanduser()  # Ensure it's a Path object and expand user (~)
-        if output_dir.suffix:
-            print("Error: The output path should be a directory, not a file.")
-            return
-
+        output_dir = Path(output_dir)
         if not output_dir.exists():
-            output_dir.mkdir(parents=True, exist_ok=True)  # Create directories if they do not exist
-        elif not output_dir.is_dir():
-            print(f"Error: {output_dir} is not a directory.")
-            return
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+        if output_type == 'json' or output_type is None:
+            export_json(language, word_type, output_dir, overwrite)
+        elif output_type in ['csv', 'tsv']:
+            export_csv_or_tsv(language, word_type, output_dir, overwrite, output_type)
+        else:
+            print("Error: Unsupported output type. Please use 'json', 'csv', or 'tsv'.")
     else:
-        output_dir = None
-
-    if all_data:
-        for lang in LANGUAGE_METADATA['languages']:
-            lang_dir = DATA_DIR / lang['language'].capitalize()
-            if lang_dir.is_dir():
-                for wt in lang_dir.glob('*.json'):
-                    query_and_print_data(lang['language'], wt.stem, output_dir, overwrite)
-    elif language and word_type:
-        query_and_print_data(language, word_type, output_dir, overwrite)
-    elif language:
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
+        print("Error: Please specify an output directory using --output-dir (-od).")
+
+def export_json(language: str, word_type: str, output_dir: Path, overwrite: bool) -> None:
+    normalized_language = LANGUAGE_MAP.get(language.lower())
+    if not normalized_language:
+        print(f"Language '{language}' is not recognized.")
+        return
+
+    data_file = DATA_DIR / normalized_language['language'].capitalize() / f"{word_type}.json"
+    if not data_file.exists():
+        print(f"No data found for language '{normalized_language['language']}' and word type '{word_type}'.")
+        return
+
+    try:
+        with data_file.open('r') as file:
+            data = json.load(file)
+    except (IOError, json.JSONDecodeError) as e:
+        print(f"Error reading '{data_file}': {e}")
+        return
+
+    # Adjust the output directory for JSON exports
+    json_output_dir = output_dir / "scribe_data_json_export" / normalized_language['language'].capitalize()
+    json_output_dir.mkdir(parents=True, exist_ok=True)
 
-        language_dir = DATA_DIR / normalized_language['language'].capitalize()
-        if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language['language']}'.")
+    output_file = json_output_dir / f"{word_type}.json"
+    if output_file.exists() and not overwrite:
+        user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
+        if user_input.lower()!= 'y':
+            print(f"Skipping {normalized_language['language']} - {word_type}")
             return
 
-        for wt in language_dir.glob('*.json'):
-            query_and_print_data(normalized_language['language'], wt.stem, output_dir, overwrite)
-    elif word_type:
-        for lang in LANGUAGE_METADATA['languages']:
-            lang_dir = DATA_DIR / lang['language'].capitalize()
-            if lang_dir.is_dir():
-                wt_path = lang_dir / f"{word_type}.json"
-                if wt_path.exists():
-                    query_and_print_data(lang['language'], word_type, output_dir, overwrite)
-
-def query_and_print_data(language: str, word_type: str, output_dir: Path, overwrite: bool) -> None:
+    try:
+        with output_file.open('w') as file:
+            json.dump(data, file, indent=2)
+    except IOError as e:
+        print(f"Error writing to '{output_file}': {e}")
+        return
+    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")
+
+def export_csv_or_tsv(language: str, word_type: str, output_dir: Path, overwrite: bool, output_type: str) -> None:
     normalized_language = LANGUAGE_MAP.get(language.lower())
     if not normalized_language:
         print(f"Language '{language}' is not recognized.")
@@ -70,21 +80,43 @@ def query_and_print_data(language: str, word_type: str, output_dir: Path, overwr
         print(f"Error reading '{data_file}': {e}")
         return
 
-    if output_dir:
-        output_file = output_dir / f"{normalized_language['language']}_{word_type}.json"
-        if output_file.exists() and not overwrite:
-            user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
-            if user_input.lower() != 'y':
-                print(f"Skipping {normalized_language['language']} - {word_type}")
-                return
-
-        try:
-            with output_file.open('w') as file:
-                json.dump(data, file, indent=2)
-        except IOError as e:
-            print(f"Error writing to '{output_file}': {e}")
-            return
-        print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")
+    if output_type == 'csv':
+        delimiter = ','
+        file_extension = 'csv'
+    elif output_type == 'tsv':
+        delimiter = '\t'
+        file_extension = 'tsv'
     else:
-        print(f"Data for language '{normalized_language['language']}' and word type '{word_type}':")
-        print_formatted_data(data, word_type)
+        print(f"Unsupported output type '{output_type}'.")
+        return
+
+    # Adjust the output directory for CSV exports
+    csv_output_dir = output_dir / "scribe_data_csv_export" / normalized_language['language'].capitalize()
+    csv_output_dir.mkdir(parents=True, exist_ok=True)
+
+    output_file = csv_output_dir / f"{word_type}.{file_extension}"
+    if output_file.exists() and not overwrite:
+        user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
+        if user_input.lower()!= 'y':
+            print(f"Skipping {normalized_language['language']} - {word_type}")
+            return
+
+    try:
+        with output_file.open('w', newline='', encoding='utf-8') as file:
+            writer = csv.writer(file, delimiter=delimiter)
+            if isinstance(data, dict):
+                for key, value in data.items():
+                    writer.writerow([key, value])
+            elif isinstance(data, list):
+                for item in data:
+                    if isinstance(item, dict):
+                        writer.writerow(item.values())
+                    else:
+                        writer.writerow([item])
+            else:
+                print(f"Unsupported data format for {output_type} export.")
+    except IOError as e:
+        print(f"Error writing to '{output_file}': {e}")
+        return
+
+    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")

From 6958366327e07aac4bd60b6f4f4ce788e2d854b9 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Sat, 22 Jun 2024 19:52:26 +0200
Subject: [PATCH 14/18] Update CLI structure + refactoring

---
 .../workflows/pr_maintainer_checklist.yaml    |   2 +
 src/scribe_data/cli/cli_utils.py              | 121 ++++++++
 src/scribe_data/cli/convert.py                |  21 ++
 src/scribe_data/cli/list.py                   | 260 ++++++++++--------
 src/scribe_data/cli/main.py                   | 149 ++++++++--
 src/scribe_data/cli/query.py                  | 149 +++++++---
 src/scribe_data/cli/utils.py                  |  60 ----
 .../resources/language_meta_data.json         |  60 +++-
 src/scribe_data/wikipedia/extract_wiki.py     |   2 +-
 9 files changed, 578 insertions(+), 246 deletions(-)
 create mode 100644 src/scribe_data/cli/cli_utils.py
 create mode 100644 src/scribe_data/cli/convert.py
 delete mode 100644 src/scribe_data/cli/utils.py

diff --git a/.github/workflows/pr_maintainer_checklist.yaml b/.github/workflows/pr_maintainer_checklist.yaml
index ed33f7373..cd300ca5a 100644
--- a/.github/workflows/pr_maintainer_checklist.yaml
+++ b/.github/workflows/pr_maintainer_checklist.yaml
@@ -32,4 +32,6 @@ jobs:
               - The contributor's name and icon in remote commits should be the same as what appears in the PR
               - If there's a mismatch, the contributor needs to make sure that the [email they use for GitHub](https://github.com/settings/emails) matches what they have for `git config user.email` in their local Scribe-Data repo
 
+            - [ ] The linting and formatting workflow within the [PR checks](https://github.com/scribe-org/Scribe-Data/pull/${{ github.event.pull_request.number }}/checks) do not indicate new errors in the files changed
+
             - [ ] The [CHANGELOG](https://github.com/scribe-org/Scribe-Data/blob/main/CHANGELOG.md) has been updated with a description of the changes for the upcoming release and the corresponding issue (if necessary)
diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
new file mode 100644
index 000000000..18812015b
--- /dev/null
+++ b/src/scribe_data/cli/cli_utils.py
@@ -0,0 +1,121 @@
+"""
+Utility functions for the Scribe-Data CLI.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Union
+
+METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_meta_data.json"
+DATA_DIR = Path("scribe_data_json_export")
+
+with METADATA_FILE.open("r", encoding="utf-8") as file:
+    language_metadata = json.load(file)
+
+language_map = {
+    lang["language"].lower(): lang for lang in language_metadata["languages"]
+}
+
+
+def correct_word_type(word_type: str) -> str:
+    """
+    Corrects common versions of word type arguments so users can choose between them.
+
+    Parameters
+    ----------
+        word_type : str
+            The word type to potentially correct.
+
+    Returns
+    -------
+        The word_type value or a corrected version of it.
+    """
+    all_word_types = set()
+    for language in language_metadata["languages"]:
+        all_word_types.update(language["word-types"])
+
+    all_word_types = list(all_word_types)
+
+    if word_type in all_word_types:
+        return word_type
+
+    for wt in all_word_types:
+        if f"{word_type}s" == wt:
+            return wt
+
+
+def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
+    """
+    Prints a formatted output from the Scribe-Data CLI.
+    """
+    if not data:
+        print(f"No data available for word type '{word_type}'.")
+        return
+
+    max_key_length = max((len(key) for key in data.keys()), default=0)
+
+    if word_type == "autosuggestions":
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {', '.join(value)}")
+
+    elif word_type == "emoji_keywords":
+        for key, value in data.items():
+            emojis = [item["emoji"] for item in value]
+            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
+
+    elif word_type in {"prepositions", "translations"}:
+        for key, value in data.items():
+            print(f"{key:<{max_key_length}} : {value}")
+
+    elif isinstance(data, dict):
+        for key, value in data.items():
+            if isinstance(value, dict):
+                print(f"{key:<{max_key_length}} : ")
+                max_sub_key_length = max(
+                    (len(sub_key) for sub_key in value.keys()), default=0
+                )
+                for sub_key, sub_value in value.items():
+                    print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
+
+            elif isinstance(value, list):
+                print(f"{key:<{max_key_length}} : ")
+                for item in value:
+                    if isinstance(item, dict):
+                        for sub_key, sub_value in item.items():
+                            print(f"  {sub_key:<{max_key_length}} : {sub_value}")
+
+                    else:
+                        print(f"  {item}")
+
+            else:
+                print(f"{key:<{max_key_length}} : {value}")
+
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item, dict):
+                for key, value in item.items():
+                    print(f"{key} : {value}")
+
+            else:
+                print(item)
+
+    else:
+        print(data)
diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py
new file mode 100644
index 000000000..cb7a3fc8a
--- /dev/null
+++ b/src/scribe_data/cli/convert.py
@@ -0,0 +1,21 @@
+"""
+Functions to convert data returned from the Scribe-Data CLI to other file types.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
index 0656a067a..8f78a84db 100644
--- a/src/scribe_data/cli/list.py
+++ b/src/scribe_data/cli/list.py
@@ -1,153 +1,187 @@
+"""
+Functions for listing languages and word types for the Scribe-Data CLI.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
 import json
 from pathlib import Path
-from typing import Dict, List, Union
-
-# Load language metadata from JSON file
-METADATA_FILE = Path(__file__).parent.parent / 'resources' / 'language_meta_data.json'
-
-def load_language_metadata() -> Dict:
-    with METADATA_FILE.open('r', encoding='utf-8') as file:
-        return json.load(file)
-
-LANGUAGE_METADATA = load_language_metadata()
-LANGUAGE_MAP = {lang['language'].lower(): lang for lang in LANGUAGE_METADATA['languages']}
-
-DATA_DIR = Path('scribe_data_json_export')
-
-def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
-    if not data:
-        print(f"No data available for word type '{word_type}'.")
-        return
-
-    if word_type == 'autosuggestions':
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {', '.join(value)}")
-    elif word_type == 'emoji_keywords':
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            emojis = [item['emoji'] for item in value]
-            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
-    elif word_type in ['prepositions', 'translations']:
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {value}")
-    else:
-        if isinstance(data, dict):
-            max_key_length = max((len(key) for key in data.keys()), default=0)
-            for key, value in data.items():
-                if isinstance(value, dict):
-                    print(f"{key:<{max_key_length}} : ")
-                    max_sub_key_length = max((len(sub_key) for sub_key in value.keys()), default=0)
-                    for sub_key, sub_value in value.items():
-                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-                elif isinstance(value, list):
-                    print(f"{key:<{max_key_length}} : ")
-                    for item in value:
-                        if isinstance(item, dict):
-                            for sub_key, sub_value in item.items():
-                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
-                        else:
-                            print(f"  {item}")
-                else:
-                    print(f"{key:<{max_key_length}} : {value}")
-        elif isinstance(data, list):
-            for item in data:
-                if isinstance(item, dict):
-                    for key, value in item.items():
-                        print(f"{key} : {value}")
-                else:
-                    print(item)
-        else:
-            print(data)
+
+from .cli_utils import correct_word_type
+
+# Load language metadata from JSON file.
+METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_meta_data.json"
+LANGUAGE_DATA_EXTRACTION_DIR = Path(__file__).parent.parent / "language_data_extraction"
+
+with METADATA_FILE.open("r", encoding="utf-8") as file:
+    language_metadata = json.load(file)
+
+language_map = {
+    lang["language"].lower(): lang for lang in language_metadata["languages"]
+}
+
 
 def list_languages() -> None:
-    languages = [lang for lang in LANGUAGE_METADATA['languages']]
-    languages.sort(key=lambda x: x['language'])
+    """
+    Generates a table of languages, their ISO-2 codes and their Wikidata QIDs.
+    """
+    languages = list(language_metadata["languages"])
+    languages.sort(key=lambda x: x["language"])
 
-    # Define column widths
-    language_col_width = max(len(lang['language']) for lang in languages) + 2
-    iso_col_width = 5  # Length of "ISO" column header + padding
-    qid_col_width = 5  # Length of "QID" column header + padding
+    language_col_width = max(len(lang["language"]) for lang in languages) + 2
+    iso_col_width = max(len(lang["iso"]) for lang in languages) + 2
+    qid_col_width = max(len(lang["qid"]) for lang in languages) + 2
 
-    print(f"{'Language':<{language_col_width}} {'ISO':<{iso_col_width}} {'QID':<{qid_col_width}}")
-    print('-' * (language_col_width + iso_col_width + qid_col_width))
+    table_line_length = language_col_width + iso_col_width + qid_col_width
+
+    print()
+    print(
+        f"{'Language':<{language_col_width}} {'ISO':<{iso_col_width}} {'QID':<{qid_col_width}}"
+    )
+    print("-" * table_line_length)
 
     for lang in languages:
-        print(f"{lang['language'].capitalize():<{language_col_width}} {lang['iso']:<{iso_col_width}} {lang['qid']:<{qid_col_width}}")
+        print(
+            f"{lang['language'].capitalize():<{language_col_width}} {lang['iso']:<{iso_col_width}} {lang['qid']:<{qid_col_width}}"
+        )
+
+    print("-" * table_line_length)
+    print()
+
 
 def list_word_types(language: str = None) -> None:
+    """
+    Lists all word types or those available for a given language.
+
+    Parameters
+    ----------
+        language : str
+            The language to potentially list word types for.
+    """
     if language:
-        normalized_language = LANGUAGE_MAP.get(language.lower())
-        if not normalized_language:
-            print(f"Language '{language}' is not recognized.")
-            return
+        language_data = language_map.get(language.lower())
+        language_capitalized = language.capitalize()
+        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / language_capitalized
 
-        language_dir = DATA_DIR / normalized_language['language'].capitalize()
-        if not language_dir.exists() or not language_dir.is_dir():
-            print(f"No data found for language '{normalized_language['language']}'.")
-            return
+        if not language_data:
+            raise ValueError(f"Language '{language}' is not recognized.")
 
-        word_types = [wt.stem for wt in language_dir.glob('*.json')]
+        word_types = [f.name for f in language_dir.iterdir() if f.is_dir()]
         if not word_types:
-            print(f"No word types available for language '{normalized_language['language']}'.")
-            return
+            raise ValueError(
+                f"No word types available for language '{language_capitalized}'."
+            )
+
+        table_header = f"Available word types: {language_capitalized}"
 
-        word_types = sorted(word_types)
-        print(f"Word types for language '{normalized_language['language']}':")
-        for wt in word_types:
-            print(f"  - {wt}")
     else:
         word_types = set()
-        for lang in LANGUAGE_METADATA['languages']:
-            language_dir = DATA_DIR / lang['language'].capitalize()
+        for lang in language_metadata["languages"]:
+            language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
             if language_dir.is_dir():
-                word_types.update(wt.stem for wt in language_dir.glob('*.json'))
+                word_types.update(f.name for f in language_dir.iterdir() if f.is_dir())
 
-        if not word_types:
-            print("No word types available.")
-            return
+        table_header = "Available word types: All languages"
+
+    table_line_length = max(len(table_header), max(len(wt) for wt in word_types))
+
+    print()
+    print(table_header)
+    print("-" * table_line_length)
+
+    word_types = sorted(word_types)
+    for wt in word_types:
+        print(wt)
+
+    print("-" * table_line_length)
+    print()
 
-        word_types = sorted(word_types)
-        print("Available word types:")
-        for wt in word_types:
-            print(f"  - {wt}")
 
 def list_all() -> None:
+    """
+    Lists all available languages and word types.
+    """
     list_languages()
-    print()
     list_word_types()
 
+
 def list_languages_for_word_type(word_type: str) -> None:
+    """
+    Lists the available languages for a given word type.
+
+    Parameters
+    ----------
+        word_type : str
+            The word type to check for.
+    """
+    word_type = correct_word_type(word_type)
     available_languages = []
-    for lang in LANGUAGE_METADATA['languages']:
-        language_dir = DATA_DIR / lang['language'].capitalize()
+    for lang in language_metadata["languages"]:
+        language_dir = LANGUAGE_DATA_EXTRACTION_DIR / lang["language"].capitalize()
         if language_dir.is_dir():
-            wt_path = language_dir / f"{word_type}.json"
+            wt_path = language_dir / word_type
             if wt_path.exists():
-                available_languages.append(lang['language'])
-
-    if not available_languages:
-        print(f"No languages found with word type '{word_type}'.")
-        return
+                available_languages.append(lang["language"])
 
     available_languages.sort()
-    print(f"Languages with word type '{word_type}':")
+    table_header = f"Available languages: {word_type}"
+    table_line_length = max(
+        len(table_header), max(len(lang) for lang in available_languages)
+    )
+
+    print()
+    print(table_header)
+    print("-" * table_line_length)
+
     for lang in available_languages:
-        print(f"- {lang.capitalize()}")
+        print(f"{lang.capitalize()}")
+
+    print("-" * table_line_length)
+    print()
+
 
 def list_wrapper(language: str = None, word_type: str = None) -> None:
-    if language is None and word_type is None:
+    """
+    Conditionally provides the full functionality of the list command.
+
+    Parameters
+    ----------
+        language : str
+            The language to potentially list word types for.
+
+        word_type : str
+            The word type to check for.
+    """
+    if not language and not word_type:
         list_all()
-    elif language is True and word_type is None:
+
+    elif language is True and not word_type:
         list_languages()
-    elif language is None and word_type is True:
+
+    elif not language and word_type is True:
         list_word_types()
+
     elif language is True and word_type is True:
-        print("Please specify both a language and a word type.")
-    elif language is True and word_type is not None:
+        print("Please specify either a language or a word type.")
+
+    elif word_type is not None:
         list_languages_for_word_type(word_type)
-    elif language is not None and word_type is True:
-        list_word_types(language)
-    elif language is not None and word_type is not None:
+
+    elif language is not None:
         list_word_types(language)
diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index d012d860c..32bd40a6d 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -22,40 +22,139 @@
 
 #!/usr/bin/env python3
 import argparse
+
 from .list import list_wrapper
 from .query import query_data
 
+LIST_DESCRIPTION = "List languages and word types that Scribe-Data can be used for."
+QUERY_DESCRIPTION = "Query data from Wikidata for given languages and word types."
+CONVERT_DESCRIPTION = "Convert data returned by Scribe-Data to different file types."
+CLI_EPILOG = "Visit the codebase at https://github.com/scribe-org/Scribe-Data and documentation at https://scribe-data.readthedocs.io/en/latest/ to learn more!"
+
+
 def main() -> None:
-    parser = argparse.ArgumentParser(description='Scribe-Data CLI Tool')
-    subparsers = parser.add_subparsers(dest='command', required=True)
-
-    # List command
-    list_parser = subparsers.add_parser('list', help='List languages and word types')
-    list_parser.add_argument('--language', '-l', nargs='?', const=True, help='List all languages or filter by language code')
-    list_parser.add_argument('--word-type', '-wt', nargs='?', const=True, help='List all word types or filter by word type')
-
-    # List word types command
-    list_word_types_parser = subparsers.add_parser('list-word-types', aliases=['lwt'], help='List available word types')
-    list_word_types_parser.add_argument('-l', '--language', help='Language code')
-
-    # Query command
-    query_parser = subparsers.add_parser('query', help='Query data')
-    query_parser.add_argument('-l', '--language', type=str, help='Language for query')
-    query_parser.add_argument('-wt', '--word-type', type=str, help='Word type for query')
-    query_parser.add_argument('-od', '--output-dir', type=str, help='Output directory')
-    query_parser.add_argument('-o', '--overwrite', action='store_true', help='Overwrite existing files')
-    query_parser.add_argument('--output-type', type=str, choices=['json', 'csv', 'tsv'], help='Output file type')
+    parser = argparse.ArgumentParser(
+        prog="Scribe-Data",
+        description="The Scribe-Data CLI is a tool to query language data from Wikidata and other sources.",
+        epilog=CLI_EPILOG,
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    parser._actions[0].help = "Show this help message and exit."
+    parser.add_argument(
+        "-v", "--verbose", help="Increase output verbosity.", action="store_true"
+    )
+    parser.add_argument("-u", "--update", help="Update the Scribe-Data CLI.")
+
+    # MARK: List
+    list_parser = subparsers.add_parser(
+        "list",
+        aliases=["l"],
+        help=LIST_DESCRIPTION,
+        description=LIST_DESCRIPTION,
+        epilog=CLI_EPILOG,
+    )
+    list_parser._actions[0].help = "Show this help message and exit."
+    list_parser.add_argument(
+        "--language",
+        "-lang",
+        nargs="?",
+        const=True,
+        help="Run list command on all or given languages.",
+    )
+    list_parser.add_argument(
+        "--word-type",
+        "-wt",
+        nargs="?",
+        const=True,
+        help="Run list command on all or given word types.",
+    )
+
+    # MARK: Query
+    query_parser = subparsers.add_parser(
+        "query",
+        aliases=["q"],
+        help=QUERY_DESCRIPTION,
+        description=QUERY_DESCRIPTION,
+        epilog=CLI_EPILOG,
+    )
+    query_parser._actions[0].help = "Show this help message and exit."
+    query_parser.add_argument(
+        "-lang", "--language", type=str, help="The language(s) to query."
+    )
+    query_parser.add_argument(
+        "-wt", "--word-type", type=str, help="The word type(s) to query."
+    )
+    query_parser.add_argument(
+        "-od", "--output-dir", type=str, help="The output directory path for results."
+    )
+    query_parser.add_argument(
+        "-ot",
+        "--output-type",
+        type=str,
+        choices=["json", "csv", "tsv"],
+        help="The output file type.",
+    )
+    query_parser.add_argument(
+        "-o",
+        "--overwrite",
+        action="store_true",
+        help="Whether to overwrite existing files (default: False).",
+    )
+
+    # MARK: Convert
+    convert_parser = subparsers.add_parser(
+        "convert",
+        aliases=["c"],
+        help=CONVERT_DESCRIPTION,
+        description=CONVERT_DESCRIPTION,
+        epilog=CLI_EPILOG,
+    )
+    convert_parser._actions[0].help = "Show this help message and exit."
+    convert_parser.add_argument(
+        "-f", "--file", type=str, help="The file to convert to a new type."
+    )
+    convert_parser.add_argument(
+        "-ko",
+        "--keep-original",
+        action="store_false",
+        help="Whether to keep the file to be converted (default: True).",
+    )
+    convert_parser.add_argument(
+        "-json", "--to-json", type=str, help="Convert the file to JSON format."
+    )
+    convert_parser.add_argument(
+        "-csv", "--to-csv", type=str, help="Convert the file to CSV format."
+    )
+    convert_parser.add_argument(
+        "-tsv", "--to-tsv", type=str, help="Convert the file to TSV format."
+    )
+    convert_parser.add_argument(
+        "-sqlite", "--to-sqlite", type=str, help="Convert the file to SQLite format."
+    )
+
+    # MARK: Setup CLI
 
     args = parser.parse_args()
 
-    if args.command == 'list':
+    if args.command in ["list", "l"]:
         list_wrapper(args.language, args.word_type)
-    elif args.command in ['list-word-types', 'lwt']:
-        list_wrapper(None, args.language)
-    elif args.command in ['query', 'q']:
-        query_data(args.language, args.word_type, args.output_dir, args.overwrite, args.output_type)
+
+    elif args.command in ["query", "q"]:
+        query_data(
+            args.language,
+            args.word_type,
+            args.output_dir,
+            args.overwrite,
+            args.output_type,
+        )
+
+    elif args.command in ["convert", "c"]:
+        return
+
     else:
         parser.print_help()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/src/scribe_data/cli/query.py b/src/scribe_data/cli/query.py
index d993eff15..b0c8334a0 100644
--- a/src/scribe_data/cli/query.py
+++ b/src/scribe_data/cli/query.py
@@ -1,122 +1,193 @@
-import json
+"""
+Functions for querying languages-word types packs for the Scribe-Data CLI.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
 import csv
+import json
 from pathlib import Path
 from typing import Optional
-from .utils import LANGUAGE_METADATA, LANGUAGE_MAP
 
-DATA_DIR = Path('scribe_data_json_export')
+from .cli_utils import language_map
 
-def query_data(language: str = None, word_type: str = None, output_dir: Optional[str] = None, overwrite: bool = False, output_type: Optional[str] = None) -> None:
-    if not (language and word_type):
-        print("Error: You must provide both --language (-l) and --word-type (-wt) options.")
-        return
+DATA_DIR = Path("scribe_data_json_export")
+
+
+def query_data(
+    language: str = None,
+    word_type: str = None,
+    output_dir: Optional[str] = None,
+    overwrite: bool = False,
+    output_type: Optional[str] = None,
+) -> None:
+    if not (language or word_type):
+        raise ValueError(
+            "You must provide either a --language (-l) or --word-type (-wt) option."
+        )
 
     if output_dir:
         output_dir = Path(output_dir)
         if not output_dir.exists():
             output_dir.mkdir(parents=True, exist_ok=True)
 
-        if output_type == 'json' or output_type is None:
+        if output_type == "json" or output_type is None:
             export_json(language, word_type, output_dir, overwrite)
-        elif output_type in ['csv', 'tsv']:
+
+        elif output_type in ["csv", "tsv"]:
             export_csv_or_tsv(language, word_type, output_dir, overwrite, output_type)
+
         else:
-            print("Error: Unsupported output type. Please use 'json', 'csv', or 'tsv'.")
+            raise ValueError(
+                "Unsupported output type. Please use 'json', 'csv', or 'tsv'."
+            )
+
     else:
-        print("Error: Please specify an output directory using --output-dir (-od).")
+        raise ValueError("Please specify an output directory using --output-dir (-od).")
+
 
-def export_json(language: str, word_type: str, output_dir: Path, overwrite: bool) -> None:
-    normalized_language = LANGUAGE_MAP.get(language.lower())
+def export_json(
+    language: str, word_type: str, output_dir: Path, overwrite: bool
+) -> None:
+    normalized_language = language_map.get(language.lower())
+    language_capitalized = language.capitalize()
     if not normalized_language:
-        print(f"Language '{language}' is not recognized.")
-        return
+        raise ValueError(f"Language '{language_capitalized}' is not recognized.")
+
+    data_file = (
+        DATA_DIR / normalized_language["language"].capitalize() / f"{word_type}.json"
+    )
 
-    data_file = DATA_DIR / normalized_language['language'].capitalize() / f"{word_type}.json"
     if not data_file.exists():
-        print(f"No data found for language '{normalized_language['language']}' and word type '{word_type}'.")
+        print(
+            f"No data found for language '{normalized_language['language']}' and word type '{word_type}'."
+        )
         return
 
     try:
-        with data_file.open('r') as file:
+        with data_file.open("r") as file:
             data = json.load(file)
+
     except (IOError, json.JSONDecodeError) as e:
         print(f"Error reading '{data_file}': {e}")
         return
 
     # Adjust the output directory for JSON exports
-    json_output_dir = output_dir / "scribe_data_json_export" / normalized_language['language'].capitalize()
+    json_output_dir = (
+        output_dir
+        / "scribe_data_json_export"
+        / normalized_language["language"].capitalize()
+    )
     json_output_dir.mkdir(parents=True, exist_ok=True)
 
     output_file = json_output_dir / f"{word_type}.json"
     if output_file.exists() and not overwrite:
         user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
-        if user_input.lower()!= 'y':
+        if user_input.lower() != "y":
             print(f"Skipping {normalized_language['language']} - {word_type}")
             return
 
     try:
-        with output_file.open('w') as file:
+        with output_file.open("w") as file:
             json.dump(data, file, indent=2)
     except IOError as e:
-        print(f"Error writing to '{output_file}': {e}")
-        return
-    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")
+        raise IOError(f"Error writing to '{output_file}': {e}") from e
 
-def export_csv_or_tsv(language: str, word_type: str, output_dir: Path, overwrite: bool, output_type: str) -> None:
-    normalized_language = LANGUAGE_MAP.get(language.lower())
+    print(
+        f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'"
+    )
+
+
+def export_csv_or_tsv(
+    language: str, word_type: str, output_dir: Path, overwrite: bool, output_type: str
+) -> None:
+    normalized_language = language_map.get(language.lower())
     if not normalized_language:
         print(f"Language '{language}' is not recognized.")
         return
 
-    data_file = DATA_DIR / normalized_language['language'].capitalize() / f"{word_type}.json"
+    data_file = (
+        DATA_DIR / normalized_language["language"].capitalize() / f"{word_type}.json"
+    )
     if not data_file.exists():
-        print(f"No data found for language '{normalized_language['language']}' and word type '{word_type}'.")
+        print(
+            f"No data found for language '{normalized_language['language']}' and word type '{word_type}'."
+        )
         return
 
     try:
-        with data_file.open('r') as file:
+        with data_file.open("r") as file:
             data = json.load(file)
+
     except (IOError, json.JSONDecodeError) as e:
         print(f"Error reading '{data_file}': {e}")
         return
 
-    if output_type == 'csv':
-        delimiter = ','
-        file_extension = 'csv'
-    elif output_type == 'tsv':
-        delimiter = '\t'
-        file_extension = 'tsv'
+    if output_type == "csv":
+        delimiter = ","
+        file_extension = "csv"
+
+    elif output_type == "tsv":
+        delimiter = "\t"
+        file_extension = "tsv"
+
     else:
         print(f"Unsupported output type '{output_type}'.")
         return
 
     # Adjust the output directory for CSV exports
-    csv_output_dir = output_dir / "scribe_data_csv_export" / normalized_language['language'].capitalize()
+    csv_output_dir = (
+        output_dir
+        / "scribe_data_csv_export"
+        / normalized_language["language"].capitalize()
+    )
     csv_output_dir.mkdir(parents=True, exist_ok=True)
 
     output_file = csv_output_dir / f"{word_type}.{file_extension}"
     if output_file.exists() and not overwrite:
         user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
-        if user_input.lower()!= 'y':
+        if user_input.lower() != "y":
             print(f"Skipping {normalized_language['language']} - {word_type}")
             return
 
     try:
-        with output_file.open('w', newline='', encoding='utf-8') as file:
+        with output_file.open("w", newline="", encoding="utf-8") as file:
             writer = csv.writer(file, delimiter=delimiter)
             if isinstance(data, dict):
                 for key, value in data.items():
                     writer.writerow([key, value])
+
             elif isinstance(data, list):
                 for item in data:
                     if isinstance(item, dict):
                         writer.writerow(item.values())
+
                     else:
                         writer.writerow([item])
+
             else:
                 print(f"Unsupported data format for {output_type} export.")
+
     except IOError as e:
         print(f"Error writing to '{output_file}': {e}")
         return
 
-    print(f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'")
+    print(
+        f"Data for language '{normalized_language['language']}' and word type '{word_type}' written to '{output_file}'"
+    )
diff --git a/src/scribe_data/cli/utils.py b/src/scribe_data/cli/utils.py
deleted file mode 100644
index f38086043..000000000
--- a/src/scribe_data/cli/utils.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import json
-from pathlib import Path
-from typing import Dict, List, Union
-
-# Load language metadata from JSON file
-METADATA_FILE = Path(__file__).parent.parent / 'resources' / 'language_meta_data.json'
-
-def load_language_metadata() -> Dict:
-    with METADATA_FILE.open('r', encoding='utf-8') as file:
-        return json.load(file)
-
-LANGUAGE_METADATA = load_language_metadata()
-LANGUAGE_MAP = {lang['language'].lower(): lang for lang in LANGUAGE_METADATA['languages']}
-
-def print_formatted_data(data: Union[Dict, List], word_type: str) -> None:
-    if not data:
-        print(f"No data available for word type '{word_type}'.")
-        return
-
-    if word_type == 'autosuggestions':
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {', '.join(value)}")
-    elif word_type == 'emoji_keywords':
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            emojis = [item['emoji'] for item in value]
-            print(f"{key:<{max_key_length}} : {' '.join(emojis)}")
-    elif word_type in ['prepositions', 'translations']:
-        max_key_length = max((len(key) for key in data.keys()), default=0)
-        for key, value in data.items():
-            print(f"{key:<{max_key_length}} : {value}")
-    else:
-        if isinstance(data, dict):
-            max_key_length = max((len(key) for key in data.keys()), default=0)
-            for key, value in data.items():
-                if isinstance(value, dict):
-                    print(f"{key:<{max_key_length}} : ")
-                    max_sub_key_length = max((len(sub_key) for sub_key in value.keys()), default=0)
-                    for sub_key, sub_value in value.items():
-                        print(f"  {sub_key:<{max_sub_key_length}} : {sub_value}")
-                elif isinstance(value, list):
-                    print(f"{key:<{max_key_length}} : ")
-                    for item in value:
-                        if isinstance(item, dict):
-                            for sub_key, sub_value in item.items():
-                                print(f"  {sub_key:<{max_key_length}} : {sub_value}")
-                        else:
-                            print(f"  {item}")
-                else:
-                    print(f"{key:<{max_key_length}} : {value}")
-        elif isinstance(data, list):
-            for item in data:
-                if isinstance(item, dict):
-                    for key, value in item.items():
-                        print(f"{key} : {value}")
-                else:
-                    print(item)
-        else:
-            print(data)
diff --git a/src/scribe_data/resources/language_meta_data.json b/src/scribe_data/resources/language_meta_data.json
index 88ba732e1..7ef4faf60 100755
--- a/src/scribe_data/resources/language_meta_data.json
+++ b/src/scribe_data/resources/language_meta_data.json
@@ -17,7 +17,7 @@
       "qid": "Q1860",
       "remove-words": ["of", "the", "The", "and"],
       "ignore-words": [],
-      "word-types": ["nouns", "verbs", "translated_words"]
+      "word-types": ["nouns", "verbs", "translations"]
     },
     {
       "language": "french",
@@ -25,7 +25,13 @@
       "qid": "Q150",
       "remove-words": ["of", "the", "The", "and"],
       "ignore-words": ["XXe"],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "autosuggestions"
+      ]
     },
     {
       "language": "german",
@@ -33,7 +39,14 @@
       "qid": "Q188",
       "remove-words": ["of", "the", "The", "and", "NeinJa", "et", "redirect"],
       "ignore-words": ["Gemeinde", "Familienname"],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "prepositions",
+        "autosuggestions"
+      ]
     },
     {
       "language": "italian",
@@ -41,7 +54,13 @@
       "qid": "Q652",
       "remove-words": ["of", "the", "The", "and", "text", "from"],
       "ignore-words": ["The", "ATP"],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "autosuggestions"
+      ]
     },
     {
       "language": "portuguese",
@@ -49,7 +68,13 @@
       "qid": "Q5146",
       "remove-words": ["of", "the", "The", "and", "jbutadptflora"],
       "ignore-words": [],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "autosuggestions"
+      ]
     },
     {
       "language": "russian",
@@ -57,7 +82,14 @@
       "qid": "Q7737",
       "remove-words": ["of", "the", "The", "and"],
       "ignore-words": [],
-      "word-types": ["nouns", "verbs", "translated_words", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "prepositions",
+        "autosuggestions"
+      ]
     },
     {
       "language": "spanish",
@@ -65,7 +97,13 @@
       "qid": "Q1321",
       "remove-words": ["of", "the", "The", "and"],
       "ignore-words": [],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "autosuggestions"
+      ]
     },
     {
       "language": "swedish",
@@ -73,7 +111,13 @@
       "qid": "Q9027",
       "remove-words": ["of", "the", "The", "and", "Checklist", "Catalogue"],
       "ignore-words": ["databasdump"],
-      "word-types": ["nouns", "verbs", "translations", "emoji_keywords", "prepositions", "autosuggestions"]
+      "word-types": [
+        "nouns",
+        "verbs",
+        "translations",
+        "emoji_keywords",
+        "autosuggestions"
+      ]
     }
   ]
 }
diff --git a/src/scribe_data/wikipedia/extract_wiki.py b/src/scribe_data/wikipedia/extract_wiki.py
index ed97210c9..7c7581650 100644
--- a/src/scribe_data/wikipedia/extract_wiki.py
+++ b/src/scribe_data/wikipedia/extract_wiki.py
@@ -182,7 +182,7 @@ def iterate_and_parse_file(args):
         partitions_dir : str
             The path to where output file should be stored.
 
-        article_limit : int optional (default=None)
+        article_limit : int (default=None)
             An optional article_limit of the number of articles to find.
 
         verbose : bool (default=True)

From 10511aea070cecdb665bc196237b20916975e298 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Sat, 22 Jun 2024 20:16:56 +0200
Subject: [PATCH 15/18] Switch over word type correction + file rename

---
 src/scribe_data/cli/cli_utils.py               | 18 +++++++++++-------
 src/scribe_data/cli/list.py                    |  2 +-
 ...e_meta_data.json => language_metadata.json} |  0
 .../resources/word_type_metadata.json          | 10 ++++++++++
 src/scribe_data/utils.py                       |  2 +-
 5 files changed, 23 insertions(+), 9 deletions(-)
 rename src/scribe_data/resources/{language_meta_data.json => language_metadata.json} (100%)
 create mode 100644 src/scribe_data/resources/word_type_metadata.json

diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py
index 18812015b..7d3578aeb 100644
--- a/src/scribe_data/cli/cli_utils.py
+++ b/src/scribe_data/cli/cli_utils.py
@@ -24,12 +24,20 @@
 from pathlib import Path
 from typing import Dict, List, Union
 
-METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_meta_data.json"
+LANGUAGE_METADATA_FILE = (
+    Path(__file__).parent.parent / "resources" / "language_metadata.json"
+)
+WORD_TYPE_METADATA_FILE = (
+    Path(__file__).parent.parent / "resources" / "word_type_metadata.json"
+)
 DATA_DIR = Path("scribe_data_json_export")
 
-with METADATA_FILE.open("r", encoding="utf-8") as file:
+with LANGUAGE_METADATA_FILE.open("r", encoding="utf-8") as file:
     language_metadata = json.load(file)
 
+with WORD_TYPE_METADATA_FILE.open("r", encoding="utf-8") as file:
+    word_type_metadata = json.load(file)
+
 language_map = {
     lang["language"].lower(): lang for lang in language_metadata["languages"]
 }
@@ -48,11 +56,7 @@ def correct_word_type(word_type: str) -> str:
     -------
         The word_type value or a corrected version of it.
     """
-    all_word_types = set()
-    for language in language_metadata["languages"]:
-        all_word_types.update(language["word-types"])
-
-    all_word_types = list(all_word_types)
+    all_word_types = word_type_metadata["word-types"]
 
     if word_type in all_word_types:
         return word_type
diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py
index 8f78a84db..f79230ecd 100644
--- a/src/scribe_data/cli/list.py
+++ b/src/scribe_data/cli/list.py
@@ -26,7 +26,7 @@
 from .cli_utils import correct_word_type
 
 # Load language metadata from JSON file.
-METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_meta_data.json"
+METADATA_FILE = Path(__file__).parent.parent / "resources" / "language_metadata.json"
 LANGUAGE_DATA_EXTRACTION_DIR = Path(__file__).parent.parent / "language_data_extraction"
 
 with METADATA_FILE.open("r", encoding="utf-8") as file:
diff --git a/src/scribe_data/resources/language_meta_data.json b/src/scribe_data/resources/language_metadata.json
similarity index 100%
rename from src/scribe_data/resources/language_meta_data.json
rename to src/scribe_data/resources/language_metadata.json
diff --git a/src/scribe_data/resources/word_type_metadata.json b/src/scribe_data/resources/word_type_metadata.json
new file mode 100644
index 000000000..3479d94c6
--- /dev/null
+++ b/src/scribe_data/resources/word_type_metadata.json
@@ -0,0 +1,10 @@
+{
+  "word-types": [
+    "autosuggestions",
+    "emoji_keywords",
+    "nouns",
+    "prepositions",
+    "translations",
+    "verbs"
+  ]
+}
diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
index 13a0da4c0..ca056f8cc 100644
--- a/src/scribe_data/utils.py
+++ b/src/scribe_data/utils.py
@@ -62,7 +62,7 @@ def _load_json(package_path: str, file_name: str, root: str):
 
 _languages = _load_json(
     package_path="scribe_data.resources",
-    file_name="language_meta_data.json",
+    file_name="language_metadata.json",
     root="languages",
 )
 

From 78ae17ef73973ce70a0a2aa7c77675a189b34182 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Sat, 22 Jun 2024 20:19:48 +0200
Subject: [PATCH 16/18] Remove word-type keys from language metadata

---
 .../resources/language_metadata.json          | 68 +++----------------
 1 file changed, 8 insertions(+), 60 deletions(-)

diff --git a/src/scribe_data/resources/language_metadata.json b/src/scribe_data/resources/language_metadata.json
index 7ef4faf60..794ef4009 100755
--- a/src/scribe_data/resources/language_metadata.json
+++ b/src/scribe_data/resources/language_metadata.json
@@ -16,108 +16,56 @@
       "iso": "en",
       "qid": "Q1860",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": [],
-      "word-types": ["nouns", "verbs", "translations"]
+      "ignore-words": []
     },
     {
       "language": "french",
       "iso": "fr",
       "qid": "Q150",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": ["XXe"],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "autosuggestions"
-      ]
+      "ignore-words": ["XXe"]
     },
     {
       "language": "german",
       "iso": "de",
       "qid": "Q188",
       "remove-words": ["of", "the", "The", "and", "NeinJa", "et", "redirect"],
-      "ignore-words": ["Gemeinde", "Familienname"],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "prepositions",
-        "autosuggestions"
-      ]
+      "ignore-words": ["Gemeinde", "Familienname"]
     },
     {
       "language": "italian",
       "iso": "it",
       "qid": "Q652",
       "remove-words": ["of", "the", "The", "and", "text", "from"],
-      "ignore-words": ["The", "ATP"],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "autosuggestions"
-      ]
+      "ignore-words": ["The", "ATP"]
     },
     {
       "language": "portuguese",
       "iso": "pt",
       "qid": "Q5146",
       "remove-words": ["of", "the", "The", "and", "jbutadptflora"],
-      "ignore-words": [],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "autosuggestions"
-      ]
+      "ignore-words": []
     },
     {
       "language": "russian",
       "iso": "ru",
       "qid": "Q7737",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": [],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "prepositions",
-        "autosuggestions"
-      ]
+      "ignore-words": []
     },
     {
       "language": "spanish",
       "iso": "es",
       "qid": "Q1321",
       "remove-words": ["of", "the", "The", "and"],
-      "ignore-words": [],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "autosuggestions"
-      ]
+      "ignore-words": []
     },
     {
       "language": "swedish",
       "iso": "sv",
       "qid": "Q9027",
       "remove-words": ["of", "the", "The", "and", "Checklist", "Catalogue"],
-      "ignore-words": ["databasdump"],
-      "word-types": [
-        "nouns",
-        "verbs",
-        "translations",
-        "emoji_keywords",
-        "autosuggestions"
-      ]
+      "ignore-words": ["databasdump"]
     }
   ]
 }

From eb74dff058d0300eb39c4476d1e14df696a9b9b9 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Sat, 22 Jun 2024 20:22:16 +0200
Subject: [PATCH 17/18] Remove word-type description from language metadata

---
 src/scribe_data/resources/language_metadata.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/scribe_data/resources/language_metadata.json b/src/scribe_data/resources/language_metadata.json
index 794ef4009..e6d7de8a6 100755
--- a/src/scribe_data/resources/language_metadata.json
+++ b/src/scribe_data/resources/language_metadata.json
@@ -6,8 +6,7 @@
       "iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes. All lowercase",
       "qid": "the unique identifier of 'language' on Wikidata. 'Q' followed by one or more digits. See https://www.wikidata.org/wiki/Q43649390",
       "remove-words": "words that should not be included as autosuggestions for the given language.",
-      "ignore-words": "TODO. Case sensitive.",
-      "word-types": "A list of word types available for the given language."
+      "ignore-words": "words that should be removed from the autosuggestion generation process."
     }
   },
   "languages": [

From a212390cacf4d03ff8c4ff788c6d13a0a98e58be Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Sat, 22 Jun 2024 20:24:10 +0200
Subject: [PATCH 18/18] File spacing and comment formatting

---
 src/scribe_data/cli/main.py  | 3 +++
 src/scribe_data/cli/query.py | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
index 32bd40a6d..c057c53de 100644
--- a/src/scribe_data/cli/main.py
+++ b/src/scribe_data/cli/main.py
@@ -47,6 +47,7 @@ def main() -> None:
     parser.add_argument("-u", "--update", help="Update the Scribe-Data CLI.")
 
     # MARK: List
+
     list_parser = subparsers.add_parser(
         "list",
         aliases=["l"],
@@ -71,6 +72,7 @@ def main() -> None:
     )
 
     # MARK: Query
+
     query_parser = subparsers.add_parser(
         "query",
         aliases=["q"],
@@ -103,6 +105,7 @@ def main() -> None:
     )
 
     # MARK: Convert
+
     convert_parser = subparsers.add_parser(
         "convert",
         aliases=["c"],
diff --git a/src/scribe_data/cli/query.py b/src/scribe_data/cli/query.py
index b0c8334a0..f2b629905 100644
--- a/src/scribe_data/cli/query.py
+++ b/src/scribe_data/cli/query.py
@@ -88,7 +88,7 @@ def export_json(
         print(f"Error reading '{data_file}': {e}")
         return
 
-    # Adjust the output directory for JSON exports
+    # Adjust the output directory for JSON exports.
     json_output_dir = (
         output_dir
         / "scribe_data_json_export"
@@ -151,7 +151,7 @@ def export_csv_or_tsv(
         print(f"Unsupported output type '{output_type}'.")
         return
 
-    # Adjust the output directory for CSV exports
+    # Adjust the output directory for CSV exports.
     csv_output_dir = (
         output_dir
         / "scribe_data_csv_export"