Skip to content

Commit

Permalink
refactor: use Magika v2 model
Browse files Browse the repository at this point in the history
Signed-off-by: Jack Cherng <[email protected]>
  • Loading branch information
jfcherng committed Aug 12, 2024
1 parent ca855a5 commit 39b2cb5
Show file tree
Hide file tree
Showing 18 changed files with 501 additions and 169 deletions.
12 changes: 9 additions & 3 deletions plugin/commands/auto_set_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,16 @@ def _assign_syntax_with_magika(view_snapshot: ViewSnapshot, event: ListenerEvent
status_result = magika.identify_path(view_snapshot.path_obj)
else:
status_result = magika.identify_bytes(view_snapshot.content_bytes)
# Logger.log(f"🐛 Magika's prediction: {status_result.output!r}", window=window)
# Logger.log(f"🐛 Magika's prediction: {status_result.value!r}", window=window)

magika_label = status_result.output.ct_label
magika_score = status_result.output.score # range: 0.0 ~ 1.0
if not status_result.ok:
Logger.log(f"😢 Magika failed: {status_result.status}", window=window)
return False

result = status_result.value
print(f"{result!r}") # @todo remove debug print
magika_label = result.output.label
magika_score = result.score # range: 0.0 ~ 1.0

threadshold: float = settings.get("magika.min_confidence", 0.0)
if magika_score < threadshold or magika_label in {"directory", "empty", "txt", "unknown"}:
Expand Down
3 changes: 2 additions & 1 deletion typings/magika/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from magika import magika as magika, prediction_mode as prediction_mode
from magika import magika as magika
from magika.types import prediction_mode as prediction_mode

Magika = magika.Magika
MagikaError = magika.MagikaError
Expand Down
19 changes: 0 additions & 19 deletions typings/magika/cli/magika.pyi

This file was deleted.

15 changes: 15 additions & 0 deletions typings/magika/cli/magika_python_module_tester.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from _typeshed import Incomplete
from magika import Magika as Magika, MagikaError as MagikaError, PredictionMode as PredictionMode, colors as colors
from magika.logger import get_logger as get_logger
from magika.types import ContentTypeLabel as ContentTypeLabel, MagikaResult as MagikaResult, Status as Status, StatusOr as StatusOr
from pathlib import Path

VERSION: Incomplete
CONTACT_EMAIL: str
CONTEXT_SETTINGS: Incomplete
HELP_EPILOG: Incomplete

def main(file: list[Path], recursive: bool, json_output: bool, jsonl_output: bool, mime_output: bool, label_output: bool, magic_compatibility_mode: bool, output_score: bool, prediction_mode_str: str, batch_size: int, no_dereference: bool, with_colors: bool, verbose: bool, debug: bool, dump_performance_stats_flag: bool, output_version: bool, model_dir: Path | None) -> None: ...
def should_read_from_stdin(files_paths: list[Path]) -> bool: ...
def get_magika_result_from_stdin(magika: Magika) -> StatusOr[MagikaResult]: ...
def path_and_result_to_dict(file_path: Path, result: StatusOr[MagikaResult]) -> dict: ...
72 changes: 0 additions & 72 deletions typings/magika/content_types.pyi

This file was deleted.

19 changes: 10 additions & 9 deletions typings/magika/magika.pyi
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from magika.content_types import ContentType as ContentType, ContentTypesManager as ContentTypesManager
from magika.logger import get_logger as get_logger
from magika.prediction_mode import PredictionMode as PredictionMode
from magika.types import MagikaOutputFields as MagikaOutputFields, MagikaResult as MagikaResult, ModelFeatures as ModelFeatures, ModelOutput as ModelOutput, ModelOutputFields as ModelOutputFields
from magika.seekable import Buffer as Buffer, File as File, Seekable as Seekable
from magika.types import ContentTypeInfo as ContentTypeInfo, ContentTypeLabel as ContentTypeLabel, MagikaResult as MagikaResult, ModelConfig as ModelConfig, ModelFeatures as ModelFeatures, ModelOutput as ModelOutput, PredictionMode as PredictionMode, Status as Status, StatusOr as StatusOr
from pathlib import Path
from typing import List, Optional

DEFAULT_MODEL_NAME: str

class Magika:
def __init__(self, model_dir: Optional[Path] = None, prediction_mode: PredictionMode = ..., no_dereference: bool = False, verbose: bool = False, debug: bool = False, use_colors: bool = False) -> None: ...
def identify_path(self, path: Path) -> MagikaResult: ...
def identify_paths(self, paths: List[Path]) -> List[MagikaResult]: ...
def identify_bytes(self, content: bytes) -> MagikaResult: ...
def __init__(self, model_dir: Path | None = None, prediction_mode: PredictionMode = ..., no_dereference: bool = False, verbose: bool = False, debug: bool = False, use_colors: bool = False) -> None: ...
def identify_path(self, path: Path) -> StatusOr[MagikaResult]: ...
def identify_paths(self, paths: list[Path]) -> list[StatusOr[MagikaResult]]: ...
def identify_bytes(self, content: bytes) -> StatusOr[MagikaResult]: ...
@staticmethod
def get_default_model_name() -> str: ...
def get_model_name(self) -> str: ...
def get_model_dir_name(self) -> str: ...
def dump_performance_stats(self) -> None: ...

class MagikaError(Exception): ...
10 changes: 0 additions & 10 deletions typings/magika/prediction_mode.pyi

This file was deleted.

19 changes: 19 additions & 0 deletions typings/magika/seekable.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import abc
from pathlib import Path

class Seekable(abc.ABC, metaclass=abc.ABCMeta):
def __init__(self) -> None: ...
@property
def size(self) -> int: ...
@abc.abstractmethod
def read_at(self, offset: int, size: int) -> bytes: ...
def close(self) -> None: ...

class File(Seekable):
def __init__(self, path: Path) -> None: ...
def read_at(self, offset: int, size: int) -> bytes: ...
def close(self) -> None: ...

class Buffer(Seekable):
def __init__(self, buffer: bytes) -> None: ...
def read_at(self, offset: int, size: int) -> bytes: ...
49 changes: 0 additions & 49 deletions typings/magika/types.pyi

This file was deleted.

9 changes: 9 additions & 0 deletions typings/magika/types/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from magika.types.content_type_info import ContentTypeInfo as ContentTypeInfo
from magika.types.content_type_label import ContentTypeLabel as ContentTypeLabel
from magika.types.magika_result import MagikaResult as MagikaResult
from magika.types.model import ModelConfig as ModelConfig, ModelFeatures as ModelFeatures, ModelOutput as ModelOutput
from magika.types.prediction_mode import PredictionMode as PredictionMode
from magika.types.status import Status as Status
from magika.types.statusor import StatusOr as StatusOr

__all__ = ['ContentTypeInfo', 'ContentTypeLabel', 'MagikaResult', 'ModelConfig', 'ModelFeatures', 'ModelOutput', 'PredictionMode', 'Status', 'StatusOr']
12 changes: 12 additions & 0 deletions typings/magika/types/content_type_info.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from dataclasses import dataclass
from magika.types.content_type_label import ContentTypeLabel as ContentTypeLabel

@dataclass(frozen=True)
class ContentTypeInfo:
label: ContentTypeLabel
mime_type: str
group: str
description: str
extensions: list[str]
is_text: bool
def __init__(self, label, mime_type, group, description, extensions, is_text) -> None: ...
Loading

0 comments on commit 39b2cb5

Please sign in to comment.