From 0a83ac43ccb37ddfc08d3554d9dd1c11693b0744 Mon Sep 17 00:00:00 2001 From: Zion Leonahenahe Basque Date: Mon, 22 Jul 2024 14:07:11 -0700 Subject: [PATCH] Allow headless loading with binary (Ghidra) (#95) * Allow headless loading with binary (Ghidra) * bump --- libbs/__init__.py | 2 +- libbs/decompilers/ghidra/compat/headless.py | 93 ++++++++++++++++++++- libbs/decompilers/ghidra/interface.py | 8 +- tests/test_decompilers.py | 1 - 4 files changed, 94 insertions(+), 10 deletions(-) diff --git a/libbs/__init__.py b/libbs/__init__.py index 7cf8a507..369f2ca5 100644 --- a/libbs/__init__.py +++ b/libbs/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.14.1" +__version__ = "1.15.0" import logging diff --git a/libbs/decompilers/ghidra/compat/headless.py b/libbs/decompilers/ghidra/compat/headless.py index cd4feb05..a05bd1a3 100644 --- a/libbs/decompilers/ghidra/compat/headless.py +++ b/libbs/decompilers/ghidra/compat/headless.py @@ -1,17 +1,18 @@ import logging from pathlib import Path -from typing import Union +from typing import Union, Optional, Tuple -from pyhidra.core import _setup_project, _analyze_program +from pyhidra.core import _analyze_program, _get_language, _get_compiler_spec from jpype import JClass _l = logging.getLogger(__name__) def open_program( - binary_path: Union[str, Path], + binary_path: Optional[Union[str, Path]] = None, project_location: Union[str, Path] = None, project_name: str = None, + program_name: str = None, analyze=True, language: str = None, compiler: str = None, @@ -22,6 +23,8 @@ def open_program( https://github.com/dod-cyber-crime-center/pyhidra/blob/c878e91b53498f65f2eb0255e22189a6d172917c/pyhidra/core.py#L178 """ from pyhidra.launcher import PyhidraLauncher, HeadlessPyhidraLauncher + if binary_path is None and project_location is None: + raise ValueError("You must provide either a binary path or a project location.") if not PyhidraLauncher.has_launched(): HeadlessPyhidraLauncher().start() @@ -29,9 +32,10 @@ def open_program( from ghidra.app.script import GhidraScriptUtil from ghidra.program.flatapi import FlatProgramAPI project, program = _setup_project( - binary_path, + binary_path=binary_path, project_location=project_location, project_name=project_name, + program_name=program_name if program_name else project_name, language=language, compiler=compiler, loader=loader @@ -44,6 +48,87 @@ def open_program( return flat_api, project, program +def _setup_project( + binary_path: Optional[Union[str, Path]] = None, + project_location: Union[str, Path] = None, + project_name: str = None, + program_name: str = None, + language: str = None, + compiler: str = None, + loader: Union[str, JClass] = None +) -> Tuple["GhidraProject", "Program"]: + from ghidra.base.project import GhidraProject + from java.lang import ClassLoader + from java.io import IOException + + if binary_path is not None: + binary_path = Path(binary_path) + if project_location: + project_location = Path(project_location) + else: + project_location = binary_path.parent + if not project_name: + project_name = f"{binary_path.name}_ghidra" + project_location /= project_name + project_location.mkdir(exist_ok=True, parents=True) + + if isinstance(loader, str): + from java.lang import ClassNotFoundException + try: + gcl = ClassLoader.getSystemClassLoader() + loader = JClass(loader, gcl) + except (TypeError, ClassNotFoundException) as e: + raise ValueError from e + + if isinstance(loader, JClass): + from ghidra.app.util.opinion import Loader + if not Loader.class_.isAssignableFrom(loader): + raise TypeError(f"{loader} does not implement ghidra.app.util.opinion.Loader") + + # Open/Create project + program: "Program" = None + try: + project = GhidraProject.openProject(project_location, project_name, True) + # XXX: binsync patch added here: + if binary_path is not None or program_name is not None: + if program_name is None: + program_name = binary_path.name + if project.getRootFolder().getFile(program_name): + program = project.openProgram("/", program_name, False) + except IOException: + project = GhidraProject.createProject(project_location, project_name, False) + + # NOTE: GhidraProject.importProgram behaves differently when a loader is provided + # loaderClass may not be null so we must use the correct method override + + if binary_path is not None and program is None: + if language is None: + if loader is None: + program = project.importProgram(binary_path) + else: + program = project.importProgram(binary_path, loader) + if program is None: + raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.") + else: + lang = _get_language(language) + comp = _get_compiler_spec(lang, compiler) + if loader is None: + program = project.importProgram(binary_path, lang, comp) + else: + program = project.importProgram(binary_path, loader, lang, comp) + if program is None: + message = f"Ghidra failed to import '{binary_path}'. " + if compiler: + message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid." + else: + message += f"The provided language ({language}) may be invalid." + raise ValueError(message) + if program_name: + program.setName(program_name) + project.saveAs(program, "/", program.getName(), True) + + return project, program + def close_program(program, project) -> bool: """ Returns true if closing was successful, false otherwise. diff --git a/libbs/decompilers/ghidra/interface.py b/libbs/decompilers/ghidra/interface.py index 0a5fd2a1..e6503f13 100644 --- a/libbs/decompilers/ghidra/interface.py +++ b/libbs/decompilers/ghidra/interface.py @@ -35,6 +35,7 @@ def __init__( analyze=True, project_location: Optional[Union[str, Path]] = None, project_name: Optional[str] = None, + program_name: Optional[str] = None, **kwargs ): self.loop_on_plugin = loop_on_plugin @@ -45,6 +46,7 @@ def __init__( self._headless_analyze = analyze self._headless_project_location = project_location self._headless_project_name = project_name + self._program_name = program_name self._project = None self._program = None @@ -91,18 +93,16 @@ def _shutdown_headless(self): self._program = None def _init_headless_components(self, *args, **kwargs): - if not self._binary_path.exists(): - raise FileNotFoundError(f"Binary path does not exist: {self._binary_path}") - if os.getenv("GHIDRA_INSTALL_DIR", None) is None: raise RuntimeError("GHIDRA_INSTALL_DIR must be set in the environment to use Ghidra headless.") from .compat.headless import open_program flat_api, project, program = open_program( - self._binary_path, + binary_path=self._binary_path, analyze=self._headless_analyze, project_location=self._headless_project_location, project_name=self._headless_project_name, + program_name=self._program_name, ) if flat_api is None: raise RuntimeError("Failed to open program with Pyhidra") diff --git a/tests/test_decompilers.py b/tests/test_decompilers.py index 118c059c..9da6f1c3 100644 --- a/tests/test_decompilers.py +++ b/tests/test_decompilers.py @@ -188,7 +188,6 @@ def test_ghidra_project_loading(self): deci = DecompilerInterface.discover( force_decompiler=GHIDRA_DECOMPILER, headless=True, - binary_path=binary_path, project_location=tmpdir, project_name=proj_name, analyze=False,