From db5157a6facdf4b991cb9b9add9ea5c1bbc490b2 Mon Sep 17 00:00:00 2001 From: Tiago Teixeira Date: Wed, 10 Jan 2024 12:53:35 +0100 Subject: [PATCH 1/5] Add support for "soft link interface", useful for binfmt_misc In order to be able to execute `.bmir` files directly from the command line, a "soft link interface" was added (similar to busybox), where the links `c2m-ei`, `c2m-eg` and `c2m-el` are created, pointing to `c2m`. These links, to correspond with the binfmt nomenclature, are referred as `interpreters` The c2mir driver source was adapted to check if the argv[0] is any of those links. Invoking `c2m` through those links is similar to invoking using ``` $ c2m /path/to/binary {-ei | -eg | -el} ... ``` To use with `binfmt_misc`, the driver expects the `P` flag on the binfmt format, that is, the first argument should be the full path to the executable binary. ``` ./a.bmir ... c2m-ei /path/to/a.mir ./a.mir ``` or if the binary is in PATH ``` a.bmir c2m-ei /usr/local/bin/a.mir a.mir ``` The binfmt line is ``` :mir:M::MIR:/usr/local/bin/c2m-el:P ``` Do adapt the path where the `c2m-*` are and possibly change `-el` to your favorite interface (interpeted, generated or lazy) --- GNUmakefile | 14 ++++++++++++++ c2mir/c2mir-driver.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 8ccdeecc19..32435cc8d3 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -130,6 +130,7 @@ L2M_TEST += l2m-test$(EXE) endif EXECUTABLES=$(BUILD_DIR)/c2m$(EXE) $(BUILD_DIR)/m2b$(EXE) $(BUILD_DIR)/b2m$(EXE) $(BUILD_DIR)/b2ctab$(EXE) $(L2M_EXE) +LINKS=$(BUILD_DIR)/c2m-ei$(EXE) $(BUILD_DIR)/c2m-eg$(EXE) $(BUILD_DIR)/c2m-el$(EXE) Q=@ @@ -157,6 +158,9 @@ else endif endif install -m a+rx $(EXECUTABLES) $(PREFIX)/bin + -ln -s c2m $(PREFIX)/bin/c2m-ei + -ln -s c2m $(PREFIX)/bin/c2m-eg + -ln -s c2m $(PREFIX)/bin/c2m-el $(PREFIX)/include $(PREFIX)/lib $(PREFIX)/bin: mkdir -p $@ @@ -174,6 +178,7 @@ else endif endif $(RM) $(EXECUTABLES:$(BUILD_DIR)/%=$(PREFIX)/bin/%) + $(RM) $(LINKS:$(BUILD_DIR)/%=$(PREFIX)/bin/%) -rmdir $(PREFIX)/include $(PREFIX)/lib $(PREFIX)/bin -rmdir $(PREFIX) @@ -226,6 +231,15 @@ $(BUILD_DIR)/c2mir/%.$(OBJSUFF): $(SRC_DIR)/c2mir/%.c | $(BUILD_DIR)/c2mir $(BUILD_DIR)/c2m$(EXE): $(C2M_BUILD) $(BUILD_DIR)/libmir.$(LIBSUFF) | $(BUILD_DIR) $(LINK) $^ $(LDLIBS) $(EXEO)$@ +$(BUILD_DIR)/c2m-ei$(EXE): $(BUILD_DIR)/c2m$(EXE) + ln -s c2m$(EXE) $@ + +$(BUILD_DIR)/c2m-eg$(EXE): $(BUILD_DIR)/c2m$(EXE) + ln -s c2m$(EXE) $@ + +$(BUILD_DIR)/c2m-el$(EXE): $(BUILD_DIR)/c2m$(EXE) + ln -s c2m$(EXE) $@ + $(BUILD_DIR)/c2mir: mkdir -p $@ diff --git a/c2mir/c2mir-driver.c b/c2mir/c2mir-driver.c index 1043e49f4f..2cdc7dec31 100644 --- a/c2mir/c2mir-driver.c +++ b/c2mir/c2mir-driver.c @@ -226,6 +226,36 @@ static VARR (input_t) * inputs_to_compile; static void init_options (int argc, char *argv[]) { int incl_p, ldir_p = FALSE; /* to remove an uninitialized warning */ + VARR_CREATE (char, temp_string, 0); + VARR_CREATE (char_ptr_t, headers, 0); + VARR_CREATE (macro_command_t, macro_commands, 0); + + // see if called from a linked interface + { + char *slash_ptr = strrchr(argv[0], '/'); + if(slash_ptr == NULL) + slash_ptr = argv[0]; + else + slash_ptr++; + + if(!strcmp(slash_ptr, "c2m-ei")) + interp_exec_p = TRUE; + else if(!strcmp(slash_ptr, "c2m-eg")) + gen_exec_p = TRUE; + else if(!strcmp(slash_ptr, "c2m-el")) + lazy_gen_exec_p = TRUE; + + if(interp_exec_p || gen_exec_p || lazy_gen_exec_p) { + // (probably) called from binfmt_misc + // argv[1] should be full path to binary + // argv[2:] should be args, including binary name + VARR_PUSH (char_ptr_t, source_file_names, argv[1]); + VARR_TRUNC (char_ptr_t, exec_argv, 0); + for (int i=2; i < argc; i++) VARR_PUSH (char_ptr_t, exec_argv, argv[i]); + + return; + } + } options.message_file = stderr; options.output_file_name = NULL; @@ -233,9 +263,6 @@ static void init_options (int argc, char *argv[]) { options.asm_p = options.object_p = options.no_prepro_p = options.prepro_only_p = FALSE; options.syntax_only_p = options.pedantic_p = FALSE; gen_debug_level = -1; - VARR_CREATE (char, temp_string, 0); - VARR_CREATE (char_ptr_t, headers, 0); - VARR_CREATE (macro_command_t, macro_commands, 0); optimize_level = -1; threads_num = 1; curr_input.code = NULL; From 89cdc8964bdfc9443e71c2b8c47d2d2644e6ba1c Mon Sep 17 00:00:00 2001 From: Tiago Teixeira Date: Mon, 15 Jan 2024 10:50:34 +0100 Subject: [PATCH 2/5] Revert "Add support for "soft link interface", useful for binfmt_misc" This reverts commit db5157a6facdf4b991cb9b9add9ea5c1bbc490b2. --- GNUmakefile | 14 -------------- c2mir/c2mir-driver.c | 33 +++------------------------------ 2 files changed, 3 insertions(+), 44 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 32435cc8d3..8ccdeecc19 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -130,7 +130,6 @@ L2M_TEST += l2m-test$(EXE) endif EXECUTABLES=$(BUILD_DIR)/c2m$(EXE) $(BUILD_DIR)/m2b$(EXE) $(BUILD_DIR)/b2m$(EXE) $(BUILD_DIR)/b2ctab$(EXE) $(L2M_EXE) -LINKS=$(BUILD_DIR)/c2m-ei$(EXE) $(BUILD_DIR)/c2m-eg$(EXE) $(BUILD_DIR)/c2m-el$(EXE) Q=@ @@ -158,9 +157,6 @@ else endif endif install -m a+rx $(EXECUTABLES) $(PREFIX)/bin - -ln -s c2m $(PREFIX)/bin/c2m-ei - -ln -s c2m $(PREFIX)/bin/c2m-eg - -ln -s c2m $(PREFIX)/bin/c2m-el $(PREFIX)/include $(PREFIX)/lib $(PREFIX)/bin: mkdir -p $@ @@ -178,7 +174,6 @@ else endif endif $(RM) $(EXECUTABLES:$(BUILD_DIR)/%=$(PREFIX)/bin/%) - $(RM) $(LINKS:$(BUILD_DIR)/%=$(PREFIX)/bin/%) -rmdir $(PREFIX)/include $(PREFIX)/lib $(PREFIX)/bin -rmdir $(PREFIX) @@ -231,15 +226,6 @@ $(BUILD_DIR)/c2mir/%.$(OBJSUFF): $(SRC_DIR)/c2mir/%.c | $(BUILD_DIR)/c2mir $(BUILD_DIR)/c2m$(EXE): $(C2M_BUILD) $(BUILD_DIR)/libmir.$(LIBSUFF) | $(BUILD_DIR) $(LINK) $^ $(LDLIBS) $(EXEO)$@ -$(BUILD_DIR)/c2m-ei$(EXE): $(BUILD_DIR)/c2m$(EXE) - ln -s c2m$(EXE) $@ - -$(BUILD_DIR)/c2m-eg$(EXE): $(BUILD_DIR)/c2m$(EXE) - ln -s c2m$(EXE) $@ - -$(BUILD_DIR)/c2m-el$(EXE): $(BUILD_DIR)/c2m$(EXE) - ln -s c2m$(EXE) $@ - $(BUILD_DIR)/c2mir: mkdir -p $@ diff --git a/c2mir/c2mir-driver.c b/c2mir/c2mir-driver.c index 2cdc7dec31..1043e49f4f 100644 --- a/c2mir/c2mir-driver.c +++ b/c2mir/c2mir-driver.c @@ -226,36 +226,6 @@ static VARR (input_t) * inputs_to_compile; static void init_options (int argc, char *argv[]) { int incl_p, ldir_p = FALSE; /* to remove an uninitialized warning */ - VARR_CREATE (char, temp_string, 0); - VARR_CREATE (char_ptr_t, headers, 0); - VARR_CREATE (macro_command_t, macro_commands, 0); - - // see if called from a linked interface - { - char *slash_ptr = strrchr(argv[0], '/'); - if(slash_ptr == NULL) - slash_ptr = argv[0]; - else - slash_ptr++; - - if(!strcmp(slash_ptr, "c2m-ei")) - interp_exec_p = TRUE; - else if(!strcmp(slash_ptr, "c2m-eg")) - gen_exec_p = TRUE; - else if(!strcmp(slash_ptr, "c2m-el")) - lazy_gen_exec_p = TRUE; - - if(interp_exec_p || gen_exec_p || lazy_gen_exec_p) { - // (probably) called from binfmt_misc - // argv[1] should be full path to binary - // argv[2:] should be args, including binary name - VARR_PUSH (char_ptr_t, source_file_names, argv[1]); - VARR_TRUNC (char_ptr_t, exec_argv, 0); - for (int i=2; i < argc; i++) VARR_PUSH (char_ptr_t, exec_argv, argv[i]); - - return; - } - } options.message_file = stderr; options.output_file_name = NULL; @@ -263,6 +233,9 @@ static void init_options (int argc, char *argv[]) { options.asm_p = options.object_p = options.no_prepro_p = options.prepro_only_p = FALSE; options.syntax_only_p = options.pedantic_p = FALSE; gen_debug_level = -1; + VARR_CREATE (char, temp_string, 0); + VARR_CREATE (char_ptr_t, headers, 0); + VARR_CREATE (macro_command_t, macro_commands, 0); optimize_level = -1; threads_num = 1; curr_input.code = NULL; From 26369bc27e91a0606b85de95056a77ce77da3fbe Mon Sep 17 00:00:00 2001 From: Tiago Teixeira Date: Mon, 15 Jan 2024 12:40:58 +0100 Subject: [PATCH 3/5] Implement mir runner as standalone project As a follow up on `binfmt_misc`, a new `mir-run` tool is created to handle the running of bmir binaries. The MIR execution type is defined by the `MIR_TYPE` environment variable, defaulting to `interp` (other options are `jit` for generation and `lazy` for lazy code generation). This tool also accepts extra libraries to be loaded using the `MIR_LIBS` environment variable and extra library paths to search through environment variables `LD_LIBRARY_PATH` and `MIR_LIB_DIRS` (besides the default ones). --- .gitignore | 2 + CMakeLists.txt | 6 + GNUmakefile | 16 ++- mir-run.c | 361 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 mir-run.c diff --git a/.gitignore b/.gitignore index bf65da380a..52ff72b804 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ *.so *.so.* *.a +/mir-run +/build diff --git a/CMakeLists.txt b/CMakeLists.txt index 7eec01dbd4..ed62d67949 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,12 @@ add_executable (c2m "c2mir/c2mir-driver.c") target_include_directories(c2m PRIVATE ${PROJECT_SOURCE_DIR}) target_link_libraries(c2m mir ${CMAKE_DL_LIBS} ) +# ------------------ MIR RUN ---------------------- + +add_executable (mir-run "mir-run.c") +target_include_directories (mir-run PRIVATE ${PROJECT_SOURCE_DIR}) +target_link_libraries(mir-run mir ${CMAKE_DL_LIBS} ) + # ------------------ MIR utils -------------------- add_executable (m2b "mir-utils/m2b.c") target_include_directories(m2b PRIVATE ${PROJECT_SOURCE_DIR}) diff --git a/GNUmakefile b/GNUmakefile index 8ccdeecc19..da921e977d 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -129,7 +129,7 @@ L2M_EXE += $(BUILD_DIR)/l2m$(EXE) L2M_TEST += l2m-test$(EXE) endif -EXECUTABLES=$(BUILD_DIR)/c2m$(EXE) $(BUILD_DIR)/m2b$(EXE) $(BUILD_DIR)/b2m$(EXE) $(BUILD_DIR)/b2ctab$(EXE) $(L2M_EXE) +EXECUTABLES=$(BUILD_DIR)/c2m$(EXE) $(BUILD_DIR)/m2b$(EXE) $(BUILD_DIR)/b2m$(EXE) $(BUILD_DIR)/b2ctab$(EXE) $(L2M_EXE) $(BUILD_DIR)/mir-run$(EXE) Q=@ @@ -235,6 +235,20 @@ clean-c2m: -include $(C2M_BUILD:.$(OBJSUFF)=.d) +# ------------------ MIR RUN ---------------------- + +MIR_RUN_SRC:=$(SRC_DIR)/mir-run.c +MIR_RUN_BUILD:=$(MIR_RUN_SRC:$(SRC_DIR)/%.c=$(BUILD_DIR)/%.$(OBJSUFF)) + +$(BUILD_DIR)/mir-run$(EXE): $(MIR_RUN_BUILD) $(BUILD_DIR)/libmir.$(LIBSUFF) | $(BUILD_DIR) + $(LINK) $^ $(LDLIBS) $(EXEO)$@ $(BUILD_DIR)/libmir.$(LIBSUFF) + +.PHONY: clean-mir-run +clean-mir-run: + $(RM) $(MIR_RUN_BUILD) $(MIR_RUN_BUILD:.$(OBJSUFF)=.d) + +-include $(MIR_RUN_BUILD:.$(OBJSUFF)=.d) + # ------------------ L2M -------------------------- L2M_SRC:=$(SRC_DIR)/llvm2mir/llvm2mir.c $(SRC_DIR)/llvm2mir/llvm2mir-driver.c L2M_BUILD:=$(L2M_SRC:$(SRC_DIR)/%.c=$(BUILD_DIR)/%.$(OBJSUFF)) diff --git a/mir-run.c b/mir-run.c new file mode 100644 index 0000000000..c73bddbc58 --- /dev/null +++ b/mir-run.c @@ -0,0 +1,361 @@ +#include +#include +#include +#include +#include +#include +#include "mir-gen.h" // mir.h gets included as well + +#define MIR_TYPE_INTERP 1 +#define MIR_TYPE_INTERP_NAME "interp" +#define MIR_TYPE_JIT 2 +#define MIR_TYPE_JIT_NAME "jit" +#define MIR_TYPE_LAZY 3 +#define MIR_TYPE_LAZY_NAME "lazy" + +#define MIR_TYPE_DEFAULT MIR_TYPE_INTERP + +#define MIR_ENV_VAR_LIB_DIRS "MIR_LIB_DIRS" +#define MIR_ENV_VAR_EXTRA_LIBS "MIR_LIBS" +#define MIR_ENV_VAR_TYPE "MIR_TYPE" + +struct lib { + char *name; + void *handler; +}; +typedef struct lib lib_t; + +/* stdlibs according to c2mir */ +#if defined(__unix__) +#if UINTPTR_MAX == 0xffffffff +static lib_t std_libs[] + = {{"/lib/libc.so.6", NULL}, {"/lib32/libc.so.6", NULL}, {"/lib/libm.so.6", NULL}, + {"/lib32/libm.so.6", NULL}, {"/lib/libpthread.so.0", NULL}, {"/lib32/libpthread.so.0", NULL}}; +static const char *std_lib_dirs[] = {"/lib", "/lib32"}; +#elif UINTPTR_MAX == 0xffffffffffffffff +#if defined(__x86_64__) +static lib_t std_libs[] + = {{"/lib64/libc.so.6", NULL}, {"/lib/x86_64-linux-gnu/libc.so.6", NULL}, + {"/lib64/libm.so.6", NULL}, {"/lib/x86_64-linux-gnu/libm.so.6", NULL}, + {"/usr/lib64/libpthread.so.0", NULL}, {"/lib/x86_64-linux-gnu/libpthread.so.0", NULL}, + {"/usr/lib/libc.so", NULL}}; +static const char *std_lib_dirs[] = {"/lib64", "/lib/x86_64-linux-gnu"}; +#elif (__aarch64__) +static lib_t std_libs[] + = {{"/lib64/libc.so.6", NULL}, {"/lib/aarch64-linux-gnu/libc.so.6", NULL}, + {"/lib64/libm.so.6", NULL}, {"/lib/aarch64-linux-gnu/libm.so.6", NULL}, + {"/lib64/libpthread.so.0", NULL}, {"/lib/aarch64-linux-gnu/libpthread.so.0", NULL}}; +static const char *std_lib_dirs[] = {"/lib64", "/lib/aarch64-linux-gnu"}; +#elif (__PPC64__) +static lib_t std_libs[] = { + {"/lib64/libc.so.6", NULL}, + {"/lib64/libm.so.6", NULL}, + {"/lib64/libpthread.so.0", NULL}, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + {"/lib/powerpc64le-linux-gnu/libc.so.6", NULL}, + {"/lib/powerpc64le-linux-gnu/libm.so.6", NULL}, + {"/lib/powerpc64le-linux-gnu/libpthread.so.0", NULL}, +#else + {"/lib/powerpc64-linux-gnu/libc.so.6", NULL}, + {"/lib/powerpc64-linux-gnu/libm.so.6", NULL}, + {"/lib/powerpc64-linux-gnu/libpthread.so.0", NULL}, +#endif +}; +static const char *std_lib_dirs[] = { + "/lib64", +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "/lib/powerpc64le-linux-gnu", +#else + "/lib/powerpc64-linux-gnu", +#endif +}; +#elif (__s390x__) +static lib_t std_libs[] + = {{"/lib64/libc.so.6", NULL}, {"/lib/s390x-linux-gnu/libc.so.6", NULL}, + {"/lib64/libm.so.6", NULL}, {"/lib/s390x-linux-gnu/libm.so.6", NULL}, + {"/lib64/libpthread.so.0", NULL}, {"/lib/s390x-linux-gnu/libpthread.so.0", NULL}}; +static const char *std_lib_dirs[] = {"/lib64", "/lib/s390x-linux-gnu"}; +#elif (__riscv) +static lib_t std_libs[] + = {{"/lib64/libc.so.6", NULL}, {"/lib/riscv64-linux-gnu/libc.so.6", NULL}, + {"/lib64/libm.so.6", NULL}, {"/lib/riscv64-linux-gnu/libm.so.6", NULL}, + {"/lib64/libpthread.so.0", NULL}, {"/lib/riscv64-linux-gnu/libpthread.so.0", NULL}}; +static const char *std_lib_dirs[] = {"/lib64", "/lib/riscv64-linux-gnu"}; +#else +#error cannot recognize 32- or 64-bit target +#endif +#endif +static const char *lib_suffix = ".so"; +#endif + +#ifdef _WIN32 +static const int slash = '\\'; +#else +static const int slash = '/'; +#endif + +#if defined(__APPLE__) +static lib_t std_libs[] = {{"/usr/lib/libc.dylib", NULL}, {"/usr/lib/libm.dylib", NULL}}; +static const char *std_lib_dirs[] = {"/usr/lib"}; +static const char *lib_suffix = ".dylib"; +#endif + +#ifdef _WIN32 +static lib_t std_libs[] = {{"C:\\Windows\\System32\\msvcrt.dll", NULL}, + {"C:\\Windows\\System32\\kernel32.dll", NULL}, + {"C:\\Windows\\System32\\ucrtbase.dll", NULL}}; +static const char *std_lib_dirs[] = {"C:\\Windows\\System32"}; +static const char *lib_suffix = ".dll"; +#define dlopen(n, f) LoadLibrary (n) +#define dlclose(h) FreeLibrary (h) +#define dlsym(h, s) GetProcAddress (h, s) +#endif + + +static void close_std_libs (void) { + for (int i = 0; i < sizeof (std_libs) / sizeof (lib_t); i++) + if (std_libs[i].handler != NULL) dlclose (std_libs[i].handler); +} + +static void open_std_libs (void) { + for (int i = 0; i < sizeof (std_libs) / sizeof (struct lib); i++) + std_libs[i].handler = dlopen (std_libs[i].name, RTLD_LAZY); +} + +DEF_VARR (lib_t); +static VARR (lib_t) * extra_libs; + +typedef const char *char_ptr_t; +DEF_VARR (char_ptr_t); +static VARR (char_ptr_t) * lib_dirs; + +DEF_VARR (char); +static VARR (char) * temp_string; + +static void *open_lib (const char *dir, const char *name) { + const char *last_slash = strrchr (dir, slash); + void *res; + FILE *f; + + VARR_TRUNC (char, temp_string, 0); + VARR_PUSH_ARR (char, temp_string, dir, strlen (dir)); + if (last_slash == NULL || last_slash[1] != '\0') VARR_PUSH (char, temp_string, slash); +#ifndef _WIN32 + VARR_PUSH_ARR (char, temp_string, "lib", 3); +#endif + VARR_PUSH_ARR (char, temp_string, name, strlen (name)); + VARR_PUSH_ARR (char, temp_string, lib_suffix, strlen (lib_suffix)); + VARR_PUSH (char, temp_string, 0); + if ((res = dlopen (VARR_ADDR (char, temp_string), RTLD_LAZY)) == NULL) { +#ifndef _WIN32 + if ((f = fopen (VARR_ADDR (char, temp_string), "r")) != NULL) { + fclose (f); + fprintf (stderr, "loading %s:%s\n", VARR_ADDR (char, temp_string), dlerror ()); + } +#endif + } + return res; +} + +static void process_extra_lib (char *lib_name) { + lib_t lib; + + lib.name = lib_name; + for (size_t i = 0; i < VARR_LENGTH (char_ptr_t, lib_dirs); i++) + if ((lib.handler = open_lib (VARR_GET (char_ptr_t, lib_dirs, i), lib_name)) != NULL) break; + if (lib.handler == NULL) { + fprintf (stderr, "cannot find library lib%s -- good bye\n", lib_name); + exit (1); + } + VARR_PUSH (lib_t, extra_libs, lib); +} + +static void close_extra_libs (void) { + void *handler; + + for (size_t i = 0; i < VARR_LENGTH (lib_t, extra_libs); i++) + if ((handler = VARR_GET (lib_t, extra_libs, i).handler) != NULL) dlclose (handler); +} + +#if defined(__APPLE__) && defined(__aarch64__) +float __nan (void) { + union { + uint32_t i; + float f; + } u = {0x7fc00000}; + return u.f; +} +#endif + +static void *import_resolver (const char *name) { + void *handler, *sym = NULL; + + for (int i = 0; i < sizeof (std_libs) / sizeof (struct lib); i++) + if ((handler = std_libs[i].handler) != NULL && (sym = dlsym (handler, name)) != NULL) break; + if (sym == NULL) + for (int i = 0; i < VARR_LENGTH (lib_t, extra_libs); i++) + if ((handler = VARR_GET (lib_t, extra_libs, i).handler) != NULL + && (sym = dlsym (handler, name)) != NULL) + break; + if (sym == NULL) { +#ifdef _WIN32 + if (strcmp (name, "LoadLibrary") == 0) return LoadLibrary; + if (strcmp (name, "FreeLibrary") == 0) return FreeLibrary; + if (strcmp (name, "GetProcAddress") == 0) return GetProcAddress; +#else + if (strcmp (name, "dlopen") == 0) return dlopen; + if (strcmp (name, "dlerror") == 0) return dlerror; + if (strcmp (name, "dlclose") == 0) return dlclose; + if (strcmp (name, "dlsym") == 0) return dlsym; + if (strcmp (name, "stat") == 0) return stat; + if (strcmp (name, "lstat") == 0) return lstat; + if (strcmp (name, "fstat") == 0) return fstat; +#if defined(__APPLE__) && defined(__aarch64__) + if (strcmp (name, "__nan") == 0) return __nan; + if (strcmp (name, "_MIR_set_code") == 0) return _MIR_set_code; +#endif +#endif + fprintf (stderr, "can not load symbol %s\n", name); + close_std_libs (); + exit (1); + } + return sym; +} + +void lib_dirs_from_env_var(const char *env_var) { + const char *var_value = getenv(env_var); + if (var_value == NULL || var_value[0] == '\0') + return; + + // copy to an allocated buffer + int value_len = strlen(var_value); + char *value = (char*)alloca(value_len+1); + strcpy(value, var_value); + + // colon separated list + char *value_ptr = value; + char *colon = NULL; + while ((colon = strchr(value_ptr, ':')) != NULL) { + colon[0] = '\0'; + VARR_PUSH (char_ptr_t, lib_dirs, value_ptr); + // goto next + value_ptr = colon + 1; + } + // final part of string + // colon == NULL + VARR_PUSH (char_ptr_t, lib_dirs, value_ptr); +} + +int get_mir_type(void) { + const char *type_value = getenv(MIR_ENV_VAR_TYPE); + if (type_value == NULL || type_value[0] == '\0') + return MIR_TYPE_DEFAULT; + + if (strcmp(type_value, MIR_TYPE_INTERP_NAME) == 0) + return MIR_TYPE_INTERP; + + if (strcmp(type_value, MIR_TYPE_JIT_NAME) == 0) + return MIR_TYPE_JIT; + + if (strcmp(type_value, MIR_TYPE_LAZY_NAME) == 0) + return MIR_TYPE_LAZY; + + fprintf(stderr, "warning: unknown MIR_TYPE '%s', using default one\n", type_value); + return MIR_TYPE_DEFAULT; +} + +void open_extra_libs(void) { + const char *var_value = getenv(MIR_ENV_VAR_EXTRA_LIBS); + if (var_value == NULL || var_value[0] == '\0') + return; + + int value_len = strlen(var_value); + char *value = (char*)alloca(value_len+1); + strcpy(value, var_value); + + char *value_ptr = value; + char *colon = NULL; + while ((colon = strchr(value_ptr, ':')) != NULL) { + colon[0] = '\0'; + process_extra_lib(value_ptr); + + value_ptr = colon + 1; + } + process_extra_lib(value_ptr); +} + + +int main (int argc, char **argv, char **envp) { + + // from binfmt_misc we expect the arguments to be: + // `mir-run /full/path/to/mir-binary mir-binary ` + if (argc < 3) { + fprintf(stderr, "usage: %s [...]\n", argv[0]); + return 1; + } + + int mir_type = get_mir_type(); + + MIR_val_t val; + int exit_code; + + VARR_CREATE (lib_t, extra_libs, 16); + VARR_CREATE (char_ptr_t, lib_dirs, 16); + for(int i=0; i < sizeof(std_lib_dirs) / sizeof(char_ptr_t); i++) + VARR_PUSH (char_ptr_t, lib_dirs, std_lib_dirs[i]); + lib_dirs_from_env_var("LD_LIBRARY_PATH"); + lib_dirs_from_env_var(MIR_ENV_VAR_LIB_DIRS); + + MIR_item_t main_func = NULL; + + MIR_context_t mctx = MIR_init(); + FILE *mir_file = fopen(argv[1], "r"); + if (!mir_file) { + fprintf(stderr, "failed to open file '%s'\n", argv[1]); + return 1; + } + MIR_read(mctx, mir_file); + + for (MIR_module_t module = DLIST_HEAD (MIR_module_t, *MIR_get_module_list (mctx)); module != NULL; + module = DLIST_NEXT (MIR_module_t, module)) { + for (MIR_item_t func = DLIST_HEAD (MIR_item_t, module->items); func != NULL; + func = DLIST_NEXT (MIR_item_t, func)) { + if (func->item_type != MIR_func_item) continue; + if (strcmp (func->u.func->name, "main") == 0) main_func = func; + } + MIR_load_module (mctx, module); + } + if (main_func == NULL) { + fprintf (stderr, "cannot execute program w/o main function\n"); + return 1; + } + + open_std_libs (); + open_extra_libs (); + + if (mir_type == MIR_TYPE_INTERP) { + MIR_link (mctx, MIR_set_interp_interface, import_resolver); + MIR_interp ( + mctx, main_func, &val, 3, + (MIR_val_t){.i = (argc-2)}, + (MIR_val_t){.a = (void*)(argv+2)}, + (MIR_val_t){.a = (void*)envp} + ); + exit_code = val.i; + } else { + MIR_gen_init (mctx, 1); + MIR_link (mctx, + mir_type == MIR_TYPE_JIT + ? MIR_set_gen_interface + : MIR_set_lazy_gen_interface, + import_resolver); + uint64_t (*fun_addr)(int, char**, char**) = MIR_gen(mctx, 0, main_func); + exit_code = fun_addr(argc-2, argv+2, envp); + MIR_gen_finish(mctx); + } + MIR_finish (mctx); + close_extra_libs(); + close_std_libs(); + + return exit_code; +} From 80392c753fe581cd1b6f454005b4baf96350d9c3 Mon Sep 17 00:00:00 2001 From: Tiago Teixeira Date: Mon, 15 Jan 2024 13:00:08 +0100 Subject: [PATCH 4/5] update README with mir-run documentation --- README.md | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 394914baa6..90a180f2cc 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ no warranty that MIR will not be changed in the future and the code will work for any tests except ones given here and on platforms other than x86_64 Linux/OSX, aarch64 Linux/OSX(Apple M1), and ppc64be/ppc64le/s390x/riscv64 Linux** - + ## MIR * MIR is strongly typed IR * MIR can represent machine 32-bit and 64-bit insns of different architectures @@ -168,7 +168,7 @@ ex100: func v, 0 * After linking, you can interpret functions from the modules or call machine code for the functions generated with MIR JIT compiler (generator). What way the function can be executed is usually defined by set up interface. How the generated code is produced (lazily on the first call or ahead of time) - can be also dependent on the interface + can be also dependent on the interface * Running code from the above example could look like the following (here `m1` and `m2` are modules `m_sieve` and `m_e100`, `func` is function `ex100`, `sieve` is function `sieve`): ```c @@ -183,6 +183,39 @@ ex100: func v, 0 /* or ((void (*) (void)) func->addr) (); to call interpr. or gen. code through the interface */ ``` +### Running through `binfmt_misc` + +The `mir-run` binary is prepared to be used from `binfmt_misc` with the +following line (example): + +```bash +line=:mir:M::MIR::/usr/local/bin/mir-run:P +echo $line > /proc/sys/fs/binfmt_misc/register +``` + +> Do adapt the mir-run binary path to your system, that is the default one + +And run with +```bash +c2m your-file.c -o your-file +chmod +x your-file +./your-file your args +``` + +The executable is "configurable" with environment variables: + +- `MIR_TYPE` sets the interface for code execution: `interp` (default), + `jit` (for generation) and `lazy` (for lazy generation); +- `MIR_LIBS` (colon separated list) defines a list of extra libraries to load; +- `MIR_LIB_DIRS` or `LD_LIBRARY_PATH` (colon separated list) defines an extra list + of directories to search the libraries on. + + +> Due to the tied nature of `mir-run` with `binfmt_misc`, it may be a bit weird +> to call `mir-run` directly. +> The `P` flag on the binfmt_misc passes an extra argument with the full path +> to the MIR binary. + ## The current state of MIR project ![Current MIR](mir3.svg) @@ -208,7 +241,7 @@ ex100: func v, 0 * Performance minded porting MIR JIT compiler to 32-bit targets will need an implementation of additional small analysis pass to get info what 64-bit variables are used only in 32-bit instructions - + ## MIR JIT compiler * Compiler **Performance Goals** relative to GCC -O2: * 70% of generated code speed @@ -256,7 +289,7 @@ ex100: func v, 0 * **Combine** (code selection): merging data-depended insns into one * **Dead Code Elimination**: removing insns with unused outputs * **Generate Machine Insns**: run machine-dependent code creating machine insns - + ## C to MIR translation * Currently work on 2 different ways of the translation are ongoing * Implementation of a small C11 (2011 ANSI C standard) to MIR compiler. @@ -278,13 +311,13 @@ ex100: func v, 0 * Files `mir-gen-x86_64.c`, `mir-gen-aarch64.c`, `mir-gen-ppc64.c`, `mir-gen-s390x.c`, and `mir-gen-riscv.c` is machine dependent code of JIT compiler * Files `mir-.c` contain simple machine dependent code common for interpreter and - JIT compiler + JIT compiler * Files `mir2c/mir2c.h` and `mir2c/mir2c.c` contain code for MIR to C compiler * Files `c2mir/c2mir.h`, `c2mir/c2mir.c`, `c2mir/c2mir-driver.c`, and `c2mir/mirc.h` contain code for C to MIR compiler. Files in directories `c2mir/x86_64` and `c2mir/aarch64`, `c2mir/ppc64`, `c2mir/s390x`, and `c2mir/riscv` contain correspondingly x86_64, aarch64, ppc64, s390x, and riscv machine-dependent code for C to MIR compiler - + ## Playing with current MIR project code * MIR project is far away from any serious usage * The current code can be used only to familiarize future users with the project From 7110b2194b7e0740256976df0e2c48646740b209 Mon Sep 17 00:00:00 2001 From: Tiago Teixeira Date: Mon, 15 Jan 2024 13:15:54 +0100 Subject: [PATCH 5/5] Fix for dynamic library loading in mir-run Initialize temp_string array and use malloc instead of alloca --- mir-run.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mir-run.c b/mir-run.c index c73bddbc58..32ca3be9ec 100644 --- a/mir-run.c +++ b/mir-run.c @@ -229,7 +229,7 @@ void lib_dirs_from_env_var(const char *env_var) { // copy to an allocated buffer int value_len = strlen(var_value); - char *value = (char*)alloca(value_len+1); + char *value = (char*)malloc(value_len+1); strcpy(value, var_value); // colon separated list @@ -270,7 +270,7 @@ void open_extra_libs(void) { return; int value_len = strlen(var_value); - char *value = (char*)alloca(value_len+1); + char *value = (char*)malloc(value_len+1); strcpy(value, var_value); char *value_ptr = value; @@ -299,6 +299,7 @@ int main (int argc, char **argv, char **envp) { MIR_val_t val; int exit_code; + VARR_CREATE (char, temp_string, 0); VARR_CREATE (lib_t, extra_libs, 16); VARR_CREATE (char_ptr_t, lib_dirs, 16); for(int i=0; i < sizeof(std_lib_dirs) / sizeof(char_ptr_t); i++)