Skip to content

Commit

Permalink
refactor: improve magika detection performance for unmodified files
Browse files Browse the repository at this point in the history
identify_path() is more performant than identify_bytes().

Signed-off-by: Jack Cherng <[email protected]>
  • Loading branch information
jfcherng committed Mar 4, 2024
1 parent cf2e51a commit 05c2632
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions plugin/commands/auto_set_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,23 +314,27 @@ def _assign_syntax_with_magika(view: sublime.View, event: ListenerEvent | None =
return False

classifier = Magika()
output = classifier.identify_bytes(view_snapshot.content.encode()).output
# Logger.log(f"🐛 Magika's prediction: {output}", window=window)
if not view.is_dirty() and view_snapshot.path_obj:
result = classifier.identify_path(view_snapshot.path_obj)
else:
result = classifier.identify_bytes(view_snapshot.content.encode())
# Logger.log(f"🐛 Magika's prediction: {result.output}", window=window)

threadshold: float = settings.get("magika.min_confidence", 0.0)
if output.score < threadshold or output.ct_label in {"directory", "empty", "txt", "unknown"}:
if result.output.score < threadshold or result.output.ct_label in {"directory", "empty", "txt", "unknown"}:
return False

syntax_map: dict[str, list[str]] = settings.get("magika.syntax_map", {})
if not (syntax_likes := resolve_magika_label_with_syntax_map(output.ct_label, syntax_map)):
Logger.log(f'🤔 Unknown "label" from Magika: {output.ct_label}', window=window)
if not (syntax_likes := resolve_magika_label_with_syntax_map(result.output.ct_label, syntax_map)):
Logger.log(f'🤔 Unknown "label" from Magika: {result.output.ct_label}', window=window)
return False

if not (syntax := find_syntax_by_syntax_likes(syntax_likes, include_plaintext=False)):
Logger.log(f"😢 Failed finding syntax from Magika: {syntax_likes}", window=window)
return False

sublime.status_message(f"Predicted syntax: {output.ct_label} ({round(output.score * 100, 2)}% confidence)")
confidence = round(result.output.score * 100, 2)
sublime.status_message(f"Predicted syntax: {result.output.ct_label} ({confidence}% confidence)")
return assign_syntax_to_view(view, syntax, details={"event": event, "reason": "Magika (Deep Learning)"})


Expand Down

0 comments on commit 05c2632

Please sign in to comment.