From 1deccc2288aa6e658e2324f81ab14d6db1902d13 Mon Sep 17 00:00:00 2001 From: Jack Cherng Date: Sun, 18 Feb 2024 07:00:54 +0800 Subject: [PATCH] WIP Signed-off-by: Jack Cherng --- AutoSetSyntax.sublime-settings | 20 ++- menus/Default.sublime-commands | 4 + plugin/__init__.py | 11 +- plugin/commands/__init__.py | 2 + plugin/commands/auto_set_syntax.py | 57 +++++++- .../auto_set_syntax_download_dependencies.py | 124 ++++++++++++++++++ plugin/constants.py | 8 ++ plugin/helpers.py | 17 ++- tests/files/this-is-rust | 15 +++ typings/magika/__init__.pyi | 5 + typings/magika/cli/magika.pyi | 19 +++ typings/magika/colors.pyi | 17 +++ typings/magika/content_types.pyi | 72 ++++++++++ typings/magika/logger.pyi | 17 +++ typings/magika/magika.pyi | 17 +++ typings/magika/prediction_mode.pyi | 10 ++ typings/magika/strenum.pyi | 6 + typings/magika/types.pyi | 49 +++++++ 18 files changed, 462 insertions(+), 8 deletions(-) create mode 100644 plugin/commands/auto_set_syntax_download_dependencies.py create mode 100644 tests/files/this-is-rust create mode 100644 typings/magika/__init__.pyi create mode 100644 typings/magika/cli/magika.pyi create mode 100644 typings/magika/colors.pyi create mode 100644 typings/magika/content_types.pyi create mode 100644 typings/magika/logger.pyi create mode 100644 typings/magika/magika.pyi create mode 100644 typings/magika/prediction_mode.pyi create mode 100644 typings/magika/strenum.pyi create mode 100644 typings/magika/types.pyi diff --git a/AutoSetSyntax.sublime-settings b/AutoSetSyntax.sublime-settings index 121a5ddc..a247517f 100644 --- a/AutoSetSyntax.sublime-settings +++ b/AutoSetSyntax.sublime-settings @@ -643,11 +643,21 @@ } ], - /////////////////////////////////////// - // Guesslang settings (experimental) // - ///////////////////////////////////////////////////////////////////////// - // You have to restart ST after modifying any of guesslang's settings. // - ///////////////////////////////////////////////////////////////////////// + ///////////////////// + // Magika settings // + ///////////////////// + + // To use this feature, you have to install the "magika" library. + // @see https://jfcherng-sublime.github.io/ST-AutoSetSyntax/dl-based-syntax-detection/#prerequisites + "magika.enabled": true, + "magika.min_confidence": 0.85, + // @see https://github.com/google/magika/blob/9e733e847ea0d93ea100d5d478a4b54c3ec5fd1c/docs/supported-content-types-list.md + "magika.syntax_map": { + "json": ["scope:source.json"], + "rs": ["scope:source.rust"], + // ... + "rust": ["=rs"], + }, // To use this feature, you have to install the server. // @see https://jfcherng-sublime.github.io/ST-AutoSetSyntax/experimental/ml-based-syntax-detection/#prerequisites diff --git a/menus/Default.sublime-commands b/menus/Default.sublime-commands index ea561952..b6bc4f4e 100644 --- a/menus/Default.sublime-commands +++ b/menus/Default.sublime-commands @@ -30,6 +30,10 @@ "caption": "AutoSetSyntax: Toggle Log Panel", "command": "auto_set_syntax_toggle_log_panel", }, + { + "caption": "AutoSetSyntax: Download Dependencies", + "command": "auto_set_syntax_download_dependencies", + }, { "caption": "AutoSetSyntax: Download Guesslang Server", "command": "auto_set_syntax_download_guesslang_server", diff --git a/plugin/__init__.py b/plugin/__init__.py index 40bde3eb..9c0cc07d 100644 --- a/plugin/__init__.py +++ b/plugin/__init__.py @@ -1,6 +1,7 @@ import importlib import importlib.machinery import pkgutil +import sys from pathlib import Path import sublime @@ -11,11 +12,12 @@ AutoSetSyntaxCreateNewConstraintCommand, AutoSetSyntaxCreateNewMatchCommand, AutoSetSyntaxDebugInformationCommand, + AutoSetSyntaxDownloadDependenciesCommand, AutoSetSyntaxDownloadGuesslangServerCommand, AutoSetSyntaxRestartGuesslangCommand, run_auto_set_syntax_on_view, ) -from .constants import PLUGIN_CUSTOM_MODULE_PATHS, PLUGIN_NAME +from .constants import PLUGIN_CUSTOM_MODULE_PATHS, PLUGIN_NAME, PLUGIN_PY_LIBS_DIR from .listener import ( AutoSetSyntaxEventListener, AutoSetSyntaxTextChangeListener, @@ -48,6 +50,7 @@ "AutoSetSyntaxCreateNewConstraintCommand", "AutoSetSyntaxCreateNewMatchCommand", "AutoSetSyntaxDebugInformationCommand", + "AutoSetSyntaxDownloadDependenciesCommand", "AutoSetSyntaxDownloadGuesslangServerCommand", "AutoSetSyntaxRestartGuesslangCommand", # ST: listeners @@ -67,6 +70,7 @@ def plugin_loaded() -> None: def _plugin_loaded() -> None: + _add_python_lib_path() _load_custom_implementations() AioSettings.plugin_name = PLUGIN_NAME @@ -98,6 +102,11 @@ def _settings_changed_callback(window: sublime.Window) -> None: compile_rules(window, is_update=True) +def _add_python_lib_path() -> None: + if (path := str(PLUGIN_PY_LIBS_DIR)) not in sys.path: + sys.path.insert(0, path) + + def _load_custom_implementations() -> None: for finder, name, _ in pkgutil.iter_modules(map(str, PLUGIN_CUSTOM_MODULE_PATHS.values())): assert isinstance(finder, importlib.machinery.FileFinder) diff --git a/plugin/commands/__init__.py b/plugin/commands/__init__.py index 624c8e51..57840f6e 100644 --- a/plugin/commands/__init__.py +++ b/plugin/commands/__init__.py @@ -4,6 +4,7 @@ AutoSetSyntaxCreateNewMatchCommand, ) from .auto_set_syntax_debug_information import AutoSetSyntaxDebugInformationCommand +from .auto_set_syntax_download_dependencies import AutoSetSyntaxDownloadDependenciesCommand from .auto_set_syntax_download_guesslang_server import AutoSetSyntaxDownloadGuesslangServerCommand from .auto_set_syntax_restart_guesslang import AutoSetSyntaxRestartGuesslangCommand @@ -13,6 +14,7 @@ "AutoSetSyntaxCreateNewConstraintCommand", "AutoSetSyntaxCreateNewMatchCommand", "AutoSetSyntaxDebugInformationCommand", + "AutoSetSyntaxDownloadDependenciesCommand", "AutoSetSyntaxDownloadGuesslangServerCommand", "AutoSetSyntaxRestartGuesslangCommand", # ... diff --git a/plugin/commands/auto_set_syntax.py b/plugin/commands/auto_set_syntax.py index d039632f..3571dd3c 100644 --- a/plugin/commands/auto_set_syntax.py +++ b/plugin/commands/auto_set_syntax.py @@ -13,7 +13,7 @@ from ..constants import PLUGIN_NAME, RE_ST_SYNTAX_TEST_LINE, RE_VIM_SYNTAX_LINE from ..guesslang.types import GuesslangServerPredictionItem, GuesslangServerResponse -from ..helpers import is_syntaxable_view +from ..helpers import is_syntaxable_view, resolve_magika_label_with_syntax_map from ..libs import websocket from ..logger import Logger from ..rules import SyntaxRuleCollection @@ -194,6 +194,17 @@ def run_auto_set_syntax_on_view( } and _assign_syntax_with_trimmed_filename(view, event): return True + if event in { + ListenerEvent.COMMAND, + ListenerEvent.INIT, + ListenerEvent.LOAD, + ListenerEvent.MODIFY, + ListenerEvent.PASTE, + ListenerEvent.SAVE, + ListenerEvent.UNTRANSIENTIZE, + } and _assign_syntax_with_magika(view, event): + return True + if _assign_syntax_with_heuristics(view, event): return True @@ -374,6 +385,50 @@ def is_json(view: sublime.View) -> bool: return False +def _assign_syntax_with_magika(view: sublime.View, event: ListenerEvent | None = None) -> bool: + if not ( + (window := view.window()) + and (settings := get_merged_plugin_settings(window=window)) + and settings.get("magika.enabled") + and (view_snapshot := G.view_snapshot_collection.get_by_view(view)) + # don't apply on those have an extension + and (event == ListenerEvent.COMMAND or "." not in view_snapshot.file_name_unhidden) + # only apply on plain text syntax + and ((syntax := view_snapshot.syntax) and is_plaintext_syntax(syntax)) + # we don't want to use AI model during typing when there is only one line + # that may result in unwanted behavior such as a new buffer may be assigned to Python + # right after "import" is typed but it could be JavaScript or TypeScript as well + and (event != ListenerEvent.MODIFY or "\n" in view_snapshot.content) + ): + return False + + try: + from magika import Magika + except ImportError as e: + Logger.log(f"💣 Error occured when importing Magika: {e}", window=window) + return False + + classifier = Magika() + output = classifier.identify_bytes(view_snapshot.content.encode()).output + Logger.log(f"🐛 Magika's prediction: {output}", window=window) + + threadshold: float = settings.get("magika.min_confidence", 0.0) + if output.score < threadshold or output.ct_label in {"empty", "txt", "unknown"}: + return False + + syntax_map: dict[str, list[str]] = settings.get("magika.syntax_map", {}) + if not (syntax_likes := resolve_magika_label_with_syntax_map(output.ct_label, syntax_map)): + Logger.log(f'🤔 Unknown "label" from Magika: {output.ct_label}', window=window) + return False + + if not (syntax := find_syntax_by_syntax_likes(syntax_likes, include_plaintext=False)): + Logger.log(f"😢 Failed finding syntax from Magika: {syntax_likes}", window=window) + return False + + sublime.status_message(f"Predicted syntax: {syntax.name} ({round(output.score * 100, 2)}% confidence)") + return assign_syntax_to_view(view, syntax, details={"event": event, "reason": "Magika (Deep Learning)"}) + + def _assign_syntax_with_guesslang_async(view: sublime.View, event: ListenerEvent | None = None) -> None: if not ( G.guesslang_client diff --git a/plugin/commands/auto_set_syntax_download_dependencies.py b/plugin/commands/auto_set_syntax_download_dependencies.py new file mode 100644 index 00000000..08bd20d6 --- /dev/null +++ b/plugin/commands/auto_set_syntax_download_dependencies.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import gzip +import tarfile +import threading +import urllib.request +import zipfile +from collections.abc import Iterable +from pathlib import Path +from typing import Union + +import sublime +import sublime_plugin + +from ..constants import PLUGIN_NAME, PLUGIN_PY_LIBS_DIR, PLUGIN_PY_LIBS_URL, PLUGIN_PY_LIBS_ZIP_NAME +from ..utils import rmtree_ex + +PathLike = Union[Path, str] + + +class AutoSetSyntaxDownloadDependenciesCommand(sublime_plugin.ApplicationCommand): + # Dependencies are published on https://github.com/jfcherng-sublime/ST-AutoSetSyntax/tree/dependencies + + def description(self) -> str: + return f"{PLUGIN_NAME}: Download Dependencies" + + def run(self) -> None: + self.t = threading.Thread(target=self._worker) + self.t.start() + + @classmethod + def _worker(cls) -> None: + sublime.message_dialog(f"[{PLUGIN_NAME}] Start downloading dependencies...") + + cls._prepare_dependencies() + + if not (magika_dir := PLUGIN_PY_LIBS_DIR / "magika").is_dir(): + sublime.error_message(f"[{PLUGIN_NAME}] Cannot find magika: {str(magika_dir)}") + + sublime.message_dialog(f"[{PLUGIN_NAME}] Finish downloading dependencies!") + + @staticmethod + def _prepare_dependencies() -> None: + zip_path = PLUGIN_PY_LIBS_DIR.parent / PLUGIN_PY_LIBS_ZIP_NAME + rmtree_ex(PLUGIN_PY_LIBS_DIR, ignore_errors=True) + try: + download_file(PLUGIN_PY_LIBS_URL, zip_path) + except Exception as e: + sublime.error_message(f"[{PLUGIN_NAME}] {e} (Error while downloading: {PLUGIN_PY_LIBS_URL})") + decompress_file(zip_path) + zip_path.unlink(missing_ok=True) + + +def decompress_file(tarball: PathLike, dst_dir: PathLike | None = None) -> bool: + """ + Decompress the tarball. + + :param tarball: The tarball + :param dst_dir: The destination directory + + :returns: Successfully decompressed the tarball or not + """ + + def tar_safe_extract( + tar: tarfile.TarFile, + path: PathLike = ".", + members: Iterable[tarfile.TarInfo] | None = None, + *, + numeric_owner: bool = False, + ) -> None: + path = Path(path).resolve() + for member in tar.getmembers(): + member_path = (path / member.name).resolve() + if path not in member_path.parents: + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + tarball = Path(tarball) + dst_dir = Path(dst_dir) if dst_dir else tarball.parent + filename = tarball.name + + try: + if filename.endswith(".tar.gz"): + with tarfile.open(tarball, "r:gz") as f_1: + tar_safe_extract(f_1, dst_dir) + return True + + if filename.endswith(".tar"): + with tarfile.open(tarball, "r:") as f_2: + tar_safe_extract(f_2, dst_dir) + return True + + if filename.endswith(".zip"): + with zipfile.ZipFile(tarball) as f_3: + f_3.extractall(dst_dir) + return True + except Exception: + pass + return False + + +def download_file(url: str, save_path: PathLike) -> None: + """ + Downloads a file. + + :param url: The url + :param save_path: The path of the saved file + """ + + save_path = Path(save_path) + save_path.unlink(missing_ok=True) + save_path.parent.mkdir(parents=True, exist_ok=True) + save_path.write_bytes(simple_urlopen(url)) + + +def simple_urlopen(url: str, chunk_size: int = 512 * 1024) -> bytes: + response = urllib.request.urlopen(url) + data = b"" + while chunk := response.read(chunk_size): + data += chunk + if response.info().get("Content-Encoding") == "gzip": + data = gzip.decompress(data) + return data diff --git a/plugin/constants.py b/plugin/constants.py index bce2a959..dc325be0 100644 --- a/plugin/constants.py +++ b/plugin/constants.py @@ -44,6 +44,14 @@ ################################################################################ +PLUGIN_PY_LIBS_DIR_NAME = f"libs-py38@{ST_PLATFORM_ARCH}" +PLUGIN_PY_LIBS_DIR = PLUGIN_STORAGE_DIR / PLUGIN_PY_LIBS_DIR_NAME +PLUGIN_PY_LIBS_ZIP_NAME = f"{PLUGIN_PY_LIBS_DIR_NAME}.zip" +PLUGIN_PY_LIBS_URL = "https://github.com/{repo}/raw/dependencies/{file}".format( + repo="jfcherng-sublime/ST-AutoSetSyntax", + file=PLUGIN_PY_LIBS_ZIP_NAME, +) + GUESSLANG_SERVER_TAG = "server-0.1.7" GUESSLANG_SERVER_URL = "https://github.com/{repo}/archive/{ref}.zip".format( repo="jfcherng-sublime/ST-AutoSetSyntax", diff --git a/plugin/helpers.py b/plugin/helpers.py index 10520b6f..09e8a281 100644 --- a/plugin/helpers.py +++ b/plugin/helpers.py @@ -3,7 +3,7 @@ import sublime from .settings import get_st_setting -from .utils import is_plaintext_syntax, is_transient_view +from .utils import is_plaintext_syntax, is_transient_view, stable_unique def is_syntaxable_view(view: sublime.View, must_plaintext: bool = False) -> bool: @@ -15,3 +15,18 @@ def is_syntaxable_view(view: sublime.View, must_plaintext: bool = False) -> bool and (not must_plaintext or ((syntax := view.syntax()) and is_plaintext_syntax(syntax))) and ((size_max := get_st_setting("syntax_detection_size_limit", 0)) == 0 or size_max >= view.size()) ) + + +def resolve_magika_label_with_syntax_map(label: str, syntax_map: dict[str, list[str]]) -> list[str]: + res: list[str] = [] + queue: list[str] = syntax_map.get(label, []).copy() + + # @todo what if there are circular references? + while queue: + syntax_like = queue.pop() + if syntax_like.startswith("="): + queue.extend(syntax_map.get(syntax_like[1:], [])) + continue + res.append(syntax_like) + + return list(stable_unique(reversed(res))) diff --git a/tests/files/this-is-rust b/tests/files/this-is-rust new file mode 100644 index 00000000..53ea487e --- /dev/null +++ b/tests/files/this-is-rust @@ -0,0 +1,15 @@ +// This is a comment, and is ignored by the compiler. +// You can test this code by clicking the "Run" button over there -> +// or if you prefer to use your keyboard, you can use the "Ctrl + Enter" +// shortcut. + +// This code is editable, feel free to hack it! +// You can always return to the original code by clicking the "Reset" button -> + +// This is the main function. +fn main() { + // Statements here are executed when the compiled binary is called. + + // Print text to the console. + println!("Hello World!"); +} diff --git a/typings/magika/__init__.pyi b/typings/magika/__init__.pyi new file mode 100644 index 00000000..3c4eef3d --- /dev/null +++ b/typings/magika/__init__.pyi @@ -0,0 +1,5 @@ +from magika import magika as magika, prediction_mode as prediction_mode + +Magika = magika.Magika +MagikaError = magika.MagikaError +PredictionMode = prediction_mode.PredictionMode diff --git a/typings/magika/cli/magika.pyi b/typings/magika/cli/magika.pyi new file mode 100644 index 00000000..55e48598 --- /dev/null +++ b/typings/magika/cli/magika.pyi @@ -0,0 +1,19 @@ +from _typeshed import Incomplete +from magika import Magika as Magika, MagikaError as MagikaError, PredictionMode as PredictionMode, colors as colors +from magika.content_types import ContentTypesManager as ContentTypesManager +from magika.logger import get_logger as get_logger +from magika.types import FeedbackReport as FeedbackReport, MagikaResult as MagikaResult +from pathlib import Path +from typing import List, Optional + +VERSION: str +CONTACT_EMAIL: str +CONTEXT_SETTINGS: Incomplete +HELP_EPILOG: Incomplete + +def main(file: List[Path], recursive: bool, json_output: bool, jsonl_output: bool, mime_output: bool, label_output: bool, magic_compatibility_mode: bool, output_score: bool, prediction_mode_str: str, batch_size: int, no_dereference: bool, with_colors: bool, verbose: bool, debug: bool, generate_report_flag: bool, output_version: bool, list_output_content_types: bool, model_dir: Optional[Path]) -> None: ... +def should_read_from_stdin(files_paths: List[Path]) -> bool: ... +def get_magika_result_from_stdin(magika: Magika) -> MagikaResult: ... +def generate_feedback_report(magika: Magika, file_path: Path, magika_result: MagikaResult) -> FeedbackReport: ... +def print_feedback_report(magika: Magika, reports: List[FeedbackReport]) -> None: ... +def print_output_content_types_list() -> None: ... diff --git a/typings/magika/colors.pyi b/typings/magika/colors.pyi new file mode 100644 index 00000000..1f9dc70e --- /dev/null +++ b/typings/magika/colors.pyi @@ -0,0 +1,17 @@ +BLACK: str +RED: str +GREEN: str +YELLOW: str +BLUE: str +PURPLE: str +CYAN: str +LIGHT_GRAY: str +DARK_GRAY: str +LIGHT_RED: str +LIGHT_GREEN: str +LIGHT_YELLOW: str +LIGHT_BLUE: str +LIGHT_PURPLE: str +LIGHT_CYAN: str +WHITE: str +RESET: str diff --git a/typings/magika/content_types.pyi b/typings/magika/content_types.pyi new file mode 100644 index 00000000..5f376e95 --- /dev/null +++ b/typings/magika/content_types.pyi @@ -0,0 +1,72 @@ +from _typeshed import Incomplete +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional + +CONTENT_TYPES_CONFIG_PATH: Incomplete + +class ContentType: + UNKNOWN: str + UNKNOWN_MIME_TYPE: str + UNKNOWN_CONTENT_TYPE_GROUP: str + UNKNOWN_MAGIC: str + UNKNOWN_DESCRIPTION: str + UNSUPPORTED: str + ERROR: str + MISSING: str + EMPTY: str + CORRUPTED: str + TIMEOUT: str + NOT_VALID: str + FILE_DOES_NOT_EXIST: str + PERMISSION_ERROR: str + DIRECTORY: str + SYMLINK: str + GENERIC_TEXT: str + name: Incomplete + extensions: Incomplete + mime_type: Incomplete + group: Incomplete + magic: Incomplete + description: Incomplete + vt_type: Incomplete + datasets: Incomplete + parent: Incomplete + tags: Incomplete + model_target_label: Incomplete + target_label: Incomplete + correct_labels: Incomplete + in_scope_for_output_content_type: Incomplete + def __init__(self, name: str, extensions: List[str], mime_type: Optional[str], group: Optional[str], magic: Optional[str], description: Optional[str], vt_type: Optional[str], datasets: List[str], parent: Optional[str], tags: List[str], model_target_label: Optional[str], target_label: Optional[str], correct_labels: List[str], in_scope_for_output_content_type: bool, add_automatic_tags: bool = True) -> None: ... + @property + def is_text(self) -> bool: ... + @property + def in_scope_for_training(self) -> bool: ... + def to_dict(self) -> Dict[str, Any]: ... + @staticmethod + def from_dict(info_d: Dict, add_automatic_tags: bool = True) -> ContentType: ... + +class ContentTypesManager: + SPECIAL_CONTENT_TYPES: List[str] + SUPPORTED_TARGET_LABELS_SPEC: Incomplete + cts: Incomplete + tag2cts: Incomplete + ext2cts: Incomplete + def __init__(self, content_type_config_path: Path = ..., add_automatic_tags: bool = True) -> None: ... + def load_content_types_info(self, content_type_config_path: Path, add_automatic_tags: bool = True) -> None: ... + def get(self, content_type_name: str) -> Optional[ContentType]: ... + def get_or_raise(self, content_type_name: Optional[str]) -> ContentType: ... + def get_mime_type(self, content_type_name: str, default: str = ...) -> str: ... + def get_group(self, content_type_name: str, default: str = ...) -> str: ... + def get_magic(self, content_type_name: str, default: str = ..., fallback_to_label: bool = True) -> str: ... + def get_description(self, content_type_name: str, default: str = ..., fallback_to_label: bool = True) -> str: ... + def get_cts_by_ext(self, ext: str) -> List[ContentType]: ... + def get_cts_by_ext_or_raise(self, ext: str) -> List[ContentType]: ... + def get_valid_tags(self, only_explicit: bool = True) -> List[str]: ... + def is_valid_ct_label(self, label: str) -> bool: ... + def is_valid_tag(self, tag: str) -> bool: ... + def select(self, query: Optional[str] = None, must_be_in_scope_for_training: bool = True) -> List[ContentType]: ... + def select_names(self, query: Optional[str] = None, must_be_in_scope_for_training: bool = True) -> List[str]: ... + def get_content_types_space(self) -> List[str]: ... + def get_output_content_types(self) -> List[ContentType]: ... + def get_output_content_types_names(self) -> List[str]: ... + def get_invalid_labels(self, labels: Iterable[str]) -> List[str]: ... diff --git a/typings/magika/logger.pyi b/typings/magika/logger.pyi new file mode 100644 index 00000000..f1f3ce17 --- /dev/null +++ b/typings/magika/logger.pyi @@ -0,0 +1,17 @@ +from _typeshed import Incomplete +from magika import colors as colors +from typing import TextIO + +class SimpleLogger: + level: Incomplete + use_colors: Incomplete + def __init__(self, use_colors: bool = False) -> None: ... + def setLevel(self, level: int) -> None: ... + def raw_print_to_stdout(self, msg: str) -> None: ... + def raw_print(self, msg: str, file: TextIO = ...) -> None: ... + def debug(self, msg: str) -> None: ... + def info(self, msg: str) -> None: ... + def warning(self, msg: str) -> None: ... + def error(self, msg: str) -> None: ... + +def get_logger(use_colors: bool = False) -> SimpleLogger: ... diff --git a/typings/magika/magika.pyi b/typings/magika/magika.pyi new file mode 100644 index 00000000..c9873c78 --- /dev/null +++ b/typings/magika/magika.pyi @@ -0,0 +1,17 @@ +from magika.content_types import ContentType as ContentType, ContentTypesManager as ContentTypesManager +from magika.logger import get_logger as get_logger +from magika.prediction_mode import PredictionMode as PredictionMode +from magika.types import MagikaOutputFields as MagikaOutputFields, MagikaResult as MagikaResult, ModelFeatures as ModelFeatures, ModelOutput as ModelOutput, ModelOutputFields as ModelOutputFields +from pathlib import Path +from typing import List, Optional + +class Magika: + def __init__(self, model_dir: Optional[Path] = None, prediction_mode: PredictionMode = ..., no_dereference: bool = False, verbose: bool = False, debug: bool = False, use_colors: bool = False) -> None: ... + def identify_path(self, path: Path) -> MagikaResult: ... + def identify_paths(self, paths: List[Path]) -> List[MagikaResult]: ... + def identify_bytes(self, content: bytes) -> MagikaResult: ... + @staticmethod + def get_default_model_name() -> str: ... + def get_model_name(self) -> str: ... + +class MagikaError(Exception): ... diff --git a/typings/magika/prediction_mode.pyi b/typings/magika/prediction_mode.pyi new file mode 100644 index 00000000..f3ec6210 --- /dev/null +++ b/typings/magika/prediction_mode.pyi @@ -0,0 +1,10 @@ +from _typeshed import Incomplete +from magika.strenum import LowerCaseStrEnum as LowerCaseStrEnum +from typing import List + +class PredictionMode(LowerCaseStrEnum): + BEST_GUESS: Incomplete + MEDIUM_CONFIDENCE: Incomplete + HIGH_CONFIDENCE: Incomplete + @staticmethod + def get_valid_prediction_modes() -> List[str]: ... diff --git a/typings/magika/strenum.pyi b/typings/magika/strenum.pyi new file mode 100644 index 00000000..05cab4e5 --- /dev/null +++ b/typings/magika/strenum.pyi @@ -0,0 +1,6 @@ +import enum + +class StrEnum(str, enum.Enum): + def __new__(cls, value: str | StrEnum, *args, **kwargs): ... + +class LowerCaseStrEnum(StrEnum): ... diff --git a/typings/magika/types.pyi b/typings/magika/types.pyi new file mode 100644 index 00000000..c55fbe89 --- /dev/null +++ b/typings/magika/types.pyi @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import List, Optional + +@dataclass +class ModelFeatures: + beg: List[int] + mid: List[int] + end: List[int] + def __init__(self, beg, mid, end) -> None: ... + +@dataclass +class ModelOutput: + ct_label: str + score: float + def __init__(self, ct_label, score) -> None: ... + +@dataclass +class MagikaResult: + path: str + dl: ModelOutputFields + output: MagikaOutputFields + def __init__(self, path, dl, output) -> None: ... + +@dataclass +class ModelOutputFields: + ct_label: Optional[str] + score: Optional[float] + group: Optional[str] + mime_type: Optional[str] + magic: Optional[str] + description: Optional[str] + def __init__(self, ct_label, score, group, mime_type, magic, description) -> None: ... + +@dataclass +class MagikaOutputFields: + ct_label: str + score: float + group: str + mime_type: str + magic: str + description: str + def __init__(self, ct_label, score, group, mime_type, magic, description) -> None: ... + +@dataclass +class FeedbackReport: + hash: str + features: ModelFeatures + result: MagikaResult + def __init__(self, hash, features, result) -> None: ...