Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use module-level logging #80

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions pywhispercpp/_logger.py

This file was deleted.

3 changes: 1 addition & 2 deletions pywhispercpp/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""
Constants
"""
import logging
from pathlib import Path
from typing import Tuple

Expand All @@ -21,10 +20,10 @@

PACKAGE_NAME = 'pywhispercpp'

LOGGIN_LEVEL = logging.INFO

MODELS_DIR = Path(user_data_dir(PACKAGE_NAME)) / 'models'


AVAILABLE_MODELS = [
"base",
"base-q5_1",
Expand Down
14 changes: 5 additions & 9 deletions pywhispercpp/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import Union, Callable, List
import _pywhispercpp as pw
import numpy as np
from pywhispercpp._logger import set_log_level
import pywhispercpp.utils as utils
import pywhispercpp.constants as constants
import subprocess
Expand All @@ -27,6 +26,8 @@
__version__ = importlib.metadata.version('pywhispercpp')


logger = logging.getLogger(__name__)

class Segment:
"""
A small class representing a transcription segment
Expand Down Expand Up @@ -67,21 +68,16 @@ def __init__(self,
model: str = 'tiny',
models_dir: str = None,
params_sampling_strategy: int = 0,
log_level: int = logging.INFO,
**params):
"""
:param model: The name of the model, one of the [AVAILABLE_MODELS](/pywhispercpp/#pywhispercpp.constants.AVAILABLE_MODELS),
(default to `tiny`), or a direct path to a `ggml` model.
:param models_dir: The directory where the models are stored, or where they will be downloaded if they don't
exist, default to [MODELS_DIR](/pywhispercpp/#pywhispercpp.constants.MODELS_DIR) <user_data_dir/pywhsipercpp/models>
:param params_sampling_strategy: 0 -> GREEDY, else BEAM_SEARCH
:param log_level: logging level, set to INFO by default
:param params: keyword arguments for different whisper.cpp parameters,
see [PARAMS_SCHEMA](/pywhispercpp/#pywhispercpp.constants.PARAMS_SCHEMA)
"""
# set logging level
set_log_level(log_level)

if Path(model).is_file():
self.model_path = model
else:
Expand Down Expand Up @@ -129,10 +125,10 @@ def transcribe(self,

# run inference
start_time = time()
logging.info(f"Transcribing ...")
logger.info("Transcribing ...")
res = self._transcribe(audio, n_processors=n_processors)
end_time = time()
logging.info(f"Inference time: {end_time - start_time:.3f} s")
logger.info(f"Inference time: {end_time - start_time:.3f} s")
return res

@staticmethod
Expand Down Expand Up @@ -220,7 +216,7 @@ def _init_model(self) -> None:
Private method to initialize the method from the bindings, it will be called automatically from the __init__
:return:
"""
logging.info("Initializing the model ...")
logger.info("Initializing the model ...")
self._ctx = pw.whisper_init_from_file(self.model_path)
self._params = pw.whisper_full_default_params(pw.whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY)

Expand Down
11 changes: 7 additions & 4 deletions pywhispercpp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
from pywhispercpp.constants import MODELS_BASE_URL, MODELS_PREFIX_URL, AVAILABLE_MODELS, MODELS_DIR


logger = logging.getLogger(__name__)


def _get_model_url(model_name: str) -> str:
"""
Returns the url of the `ggml` model
Expand All @@ -31,19 +34,19 @@ def download_model(model_name: str, download_dir=None, chunk_size=1024) -> str:
:return: Absolute path of the downloaded model
"""
if model_name not in AVAILABLE_MODELS:
logging.error(f"Invalid model name `{model_name}`, available models are: {AVAILABLE_MODELS}")
logger.error(f"Invalid model name `{model_name}`, available models are: {AVAILABLE_MODELS}")
return
if download_dir is None:
download_dir = MODELS_DIR
logging.info(f"No download directory was provided, models will be downloaded to {download_dir}")
logger.info(f"No download directory was provided, models will be downloaded to {download_dir}")

os.makedirs(download_dir, exist_ok=True)

url = _get_model_url(model_name=model_name)
file_path = Path(download_dir) / os.path.basename(url)
# check if the file is already there
if file_path.exists():
logging.info(f"Model {model_name} already exists in {download_dir}")
logger.info(f"Model {model_name} already exists in {download_dir}")
else:
# download it from huggingface
resp = requests.get(url, stream=True)
Expand All @@ -60,7 +63,7 @@ def download_model(model_name: str, download_dir=None, chunk_size=1024) -> str:
for data in resp.iter_content(chunk_size=chunk_size):
size = file.write(data)
progress_bar.update(size)
logging.info(f"Model downloaded to {file_path.absolute()}")
logger.info(f"Model downloaded to {file_path.absolute()}")
except Exception as e:
# error download, just remove the file
os.remove(file_path)
Expand Down
Loading