From 04c3a825d914866b6e4870d50df56a6e4a4dfa48 Mon Sep 17 00:00:00 2001
From: kouloumos <kouloumosa@gmail.com>
Date: Thu, 27 Jun 2024 22:32:26 +0300
Subject: [PATCH] add support for configuration file

Support a `config.ini` file to set default
values for various options and flags, offering
greater control over the application's behavior
---
 .gitignore               |  3 +++
 Readme.md => README.md   | 20 ++++++++++++++++++--
 app/config.py            | 12 ++++++++++++
 app/services/deepgram.py |  7 +++++--
 config.ini.example       | 12 ++++++++++++
 setup.py                 |  4 ++--
 transcriber.py           | 35 ++++++++++++++++-------------------
 7 files changed, 68 insertions(+), 25 deletions(-)
 rename Readme.md => README.md (89%)
 create mode 100644 app/config.py
 create mode 100644 config.ini.example

diff --git a/.gitignore b/.gitignore
index 7076465..c24424b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -112,6 +112,9 @@ ENV/
 env.bak/
 venv.bak/
 
+# Configurations
+config.ini
+
 # Spyder project settings
 .spyderproject
 .spyproject
diff --git a/Readme.md b/README.md
similarity index 89%
rename from Readme.md
rename to README.md
index 335aeb8..d57558d 100644
--- a/Readme.md
+++ b/README.md
@@ -60,6 +60,23 @@ This transcription tool operates through a structured four-stage process:
     - for other users, follow the instruction on
       their [site](https://ffmpeg.org/) to install
 
+- To use a specific [configuration profile](#configuration), set the `PROFILE` variable in your `.env` file.
+
+
+## Configuration
+
+This application supports configuration via a `config.ini` file.
+This file allows you to set default values for various options and flags, reducing the need to specify them on the command line every time.
+Additionally, the configuration file can include options not available through the command line, offering greater flexibility and control over the application's behavior.
+
+### Creating a Configuration File
+
+An example configuration file named `config.ini.example` is included in the repository.
+To use it, copy it to `config.ini` and modify it according to your needs:
+```sh
+cp config.ini.example config.ini
+```
+
 ## Install/Uninstall
 
 Navigate to the application directory and run the below commands:
@@ -68,9 +85,8 @@ Navigate to the application directory and run the below commands:
 
 `source venv/bin/activate` activates the virtual environment
 
-`pip3 install . --use-pep517` to install the application
+`pip3 install .` to install the application
 
-To check the version:
 `tstbtc --version` view the application version
 
 `tstbtc --help` view the application help
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000..ae6af7e
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,12 @@
+import configparser
+from dotenv import dotenv_values
+
+def read_config(profile='DEFAULT'):
+    config = configparser.ConfigParser()
+    config.read('config.ini')
+
+    return config[profile]
+
+# Get the current profile from an environment variable or default to 'DEFAULT'
+env = dotenv_values(".env")
+config = read_config(env.get("PROFILE", "DEFAULT"))
diff --git a/app/services/deepgram.py b/app/services/deepgram.py
index e01a9d3..5ed9dac 100644
--- a/app/services/deepgram.py
+++ b/app/services/deepgram.py
@@ -11,6 +11,7 @@
     application,
     utils
 )
+from app.config import config as config_profile
 from app.data_writer import DataWriter
 from app.logging import get_logger
 from app.media_processor import MediaProcessor
@@ -33,14 +34,15 @@ def __init__(self, summarize, diarize, upload, data_writer: DataWriter):
         self.diarize = diarize
         self.upload = upload
         self.data_writer = data_writer
-        self.one_sentence_per_line = True
+        self.one_sentence_per_line = config_profile.getboolean('one_sentence_per_line', True)
         self.dev_mode = False  # Extra capabilities during development mode
         self.max_audio_length = 3600.0  # 60 minutes in seconds
         self.processor = MediaProcessor(chunk_length=1200.0)
 
     def audio_to_text(self, audio_file, chunk=None):
+        language = config_profile.get('language','en')
         logger.info(
-            f"Transcribing audio {f'(chunk {chunk}) ' if chunk else ''}to text using deepgram...")
+            f"Transcribing audio {f'(chunk {chunk}) ' if chunk else ''}to text using deepgram[{language}]...")
         try:
             config = dotenv_values(".env")
             dg_client = deepgram.Deepgram(config["DEEPGRAM_API_KEY"])
@@ -57,6 +59,7 @@ def audio_to_text(self, audio_file, chunk=None):
                         "smart_formatting": True,
                         "summarize": self.summarize,
                         "model": "whisper-large",
+                        "language": language,
                     },
                 )
                 audio.close()
diff --git a/config.ini.example b/config.ini.example
new file mode 100644
index 0000000..8a181c4
--- /dev/null
+++ b/config.ini.example
@@ -0,0 +1,12 @@
+[DEFAULT]
+deepgram = True
+diarize = True
+summarize = False
+github = none
+save_to_markdown = True
+noqueue = True
+needs_review = False
+one_sentence_per_line = True
+
+[development]
+verbose_logging = True
diff --git a/setup.py b/setup.py
index 023ed4e..d86c485 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import find_packages, setup
 
 
-with open("Readme.md", "r", encoding="utf-8") as fh:
+with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 with open("requirements.txt", "r", encoding="utf-8") as fh:
     requirements = fh.read()
@@ -15,7 +15,7 @@
     description="transcribes youtube videos/media to bitcointranscript",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    url="https://github.com/tvpeter/yt2btc",
+    url="https://github.com/bitcointranscripts/tstbtc",
     py_modules=["transcriber"],
     packages=find_packages(),
     install_requires=[requirements],
diff --git a/transcriber.py b/transcriber.py
index 899eace..22cac3b 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -8,9 +8,10 @@
 from app import (
     __app_name__,
     __version__,
+    commands,
     utils
 )
-from app.commands import queue
+from app.config import config
 from app.logging import configure_logger, get_logger
 from app.transcription import Transcription
 from app.types import GitHubMode
@@ -24,7 +25,6 @@ def print_version(ctx, param, value):
     click.echo(f"{__app_name__} v{__version__}")
     ctx.exit()
 
-
 @click.option(
     "-v",
     "--version",
@@ -38,14 +38,12 @@ def print_version(ctx, param, value):
 def cli():
     pass
 
-
 def print_help(ctx, param, value):
     if not value or ctx.resilient_parsing:
         return
     logging.info(ctx.get_help())
     ctx.exit()
 
-
 whisper = click.option(
     "-m",
     "--model",
@@ -62,7 +60,7 @@ def print_help(ctx, param, value):
             "large-v2",
         ]
     ),
-    default="tiny.en",
+    default=config.get('model', 'tiny.en'),
     show_default=True,
     help="Select which whisper model to use for the transcription",
 )
@@ -70,14 +68,14 @@ def print_help(ctx, param, value):
     "-D",
     "--deepgram",
     is_flag=True,
-    default=False,
+    default=config.getboolean('deepgram', False),
     help="Use deepgram for transcription",
 )
 diarize = click.option(
     "-M",
     "--diarize",
     is_flag=True,
-    default=False,
+    default=config.getboolean('diarize', False),
     help="Supply this flag if you have multiple speakers AKA "
     "want to diarize the content",
 )
@@ -85,12 +83,13 @@ def print_help(ctx, param, value):
     "-S",
     "--summarize",
     is_flag=True,
-    default=False,
+    default=config.getboolean('summarize', False),
     help="Summarize the transcript [only available with deepgram]",
 )
 cutoff_date = click.option(
     "--cutoff-date",
     type=str,
+    default=config.get('cutoff_date', None),
     help=("Specify a cutoff date (in YYYY-MM-DD format) to process only sources "
           "published after this date. Sources with a publication date on or before "
           "the cutoff will be excluded from processing. This option is useful for "
@@ -100,7 +99,7 @@ def print_help(ctx, param, value):
 github = click.option(
     "--github",
     type=click.Choice(["remote", "local", "none"]),
-    default="none",
+    default=config.get('github', 'none'),
     help=("Specify the GitHub operation mode."
           "'remote': Create a new branch, push changes to it, and push it to the origin bitcointranscripts repo. "
           "'local': Commit changes to the current local branch without pushing to the remote repo."
@@ -111,46 +110,46 @@ def print_help(ctx, param, value):
     "-u",
     "--upload",
     is_flag=True,
-    default=False,
+    default=config.getboolean('upload_to_s3', False),
     help="Upload processed model files to AWS S3",
 )
 save_to_markdown = click.option(
     "--markdown",
     is_flag=True,
-    default=False,
+    default=config.getboolean('save_to_markdown', False),
     help="Save the resulting transcript to a markdown format supported by bitcointranscripts",
 )
 noqueue = click.option(
     "--noqueue",
     is_flag=True,
-    default=False,
+    default=config.getboolean('noqueue', False),
     help="Do not push the resulting transcript to the Queuer backend",
 )
 needs_review = click.option(
     "--needs-review",
     is_flag=True,
-    default=False,
+    default=config.getboolean('needs_review', False),
     help="Add 'needs review' flag to the resulting transcript",
 )
 model_output_dir = click.option(
     "-o",
     "--model_output_dir",
     type=str,
-    default="local_models/",
+    default=config.get('model_output_dir', 'local_models/'),
     show_default=True,
     help="Set the directory for saving model outputs",
 )
 nocleanup = click.option(
     "--nocleanup",
     is_flag=True,
-    default=False,
+    default=config.getboolean('nocleanup', False),
     help="Do not remove temp files on exit",
 )
 verbose_logging = click.option(
     "-V",
     "--verbose",
     is_flag=True,
-    default=False,
+    default=config.getboolean('verbose_logging', False),
     help="Supply this flag to enable verbose logging",
 )
 
@@ -190,7 +189,6 @@ def print_help(ctx, param, value):
     help="Add a category to the transcript's metadata (can be used multiple times)",
 )
 
-
 @cli.command()
 @click.argument("source", nargs=1)
 # Available transcription models and services
@@ -462,8 +460,7 @@ def postprocess(
         logger.error(e)
         traceback.print_exc()
 
-
-cli.add_command(queue.commands)
+cli.add_command(commands.queue)
 
 if __name__ == '__main__':
     cli()