From e81b5dbc3e68a32b8df5e812d83ea8adfdeb4461 Mon Sep 17 00:00:00 2001 From: Aaron Bembenek Date: Thu, 17 Oct 2024 14:49:47 +1100 Subject: [PATCH 1/2] Create pretty documentation. --- .gitignore | 3 + docs/00_language_ref.md | 13 -- docs/Gemfile | 7 ++ docs/Gemfile.lock | 91 ++++++++++++++ docs/_config.yml | 8 ++ docs/eval_modes/compile.md | 54 ++++++++ docs/eval_modes/eager.md | 26 ++++ docs/eval_modes/index.md | 10 ++ docs/eval_modes/options.md | 98 +++++++++++++++ docs/eval_modes/smt.md | 34 ++++++ docs/index.md | 13 ++ .../goal_directed_eval.md} | 31 +++-- docs/lang_ref/index.md | 12 ++ .../lang_basics.md} | 25 ++-- .../logical_formulas.md} | 31 +++-- .../program_safety.md} | 17 ++- docs/pubs.md | 57 +++++++++ docs/starting.md | 115 ++++++++++++++++++ docs/tutorial/{tutorial.md => index.md} | 14 ++- 19 files changed, 604 insertions(+), 55 deletions(-) delete mode 100644 docs/00_language_ref.md create mode 100644 docs/Gemfile create mode 100644 docs/Gemfile.lock create mode 100644 docs/_config.yml create mode 100644 docs/eval_modes/compile.md create mode 100644 docs/eval_modes/eager.md create mode 100644 docs/eval_modes/index.md create mode 100644 docs/eval_modes/options.md create mode 100644 docs/eval_modes/smt.md create mode 100644 docs/index.md rename docs/{03_goal_directed_evaluation.md => lang_ref/goal_directed_eval.md} (74%) create mode 100644 docs/lang_ref/index.md rename docs/{01_language_basics.md => lang_ref/lang_basics.md} (96%) rename docs/{04_logical_formulas.md => lang_ref/logical_formulas.md} (97%) rename docs/{02_program_safety.md => lang_ref/program_safety.md} (95%) create mode 100644 docs/pubs.md create mode 100644 docs/starting.md rename docs/tutorial/{tutorial.md => index.md} (97%) diff --git a/.gitignore b/.gitignore index 449ef2e5..53ce57bc 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ target/ /src/main/resources/codegen/build/ .idea/ + +# Jekyll +_site/ diff --git a/docs/00_language_ref.md b/docs/00_language_ref.md deleted file mode 100644 index 22835afd..00000000 --- a/docs/00_language_ref.md +++ /dev/null @@ -1,13 +0,0 @@ -# Formulog Language Reference - -This set of documents describes the Formulog language. -Please raise a [GitHub issue](https://github.com/HarvardPL/formulog/issues/new) if anything is unclear, incomplete, or incorrect :) - -For an overview of Formulog and its motivations, we recommend checking out our [OOPLSA'20 paper](https://aaronbembenek.github.io/papers/formulog-oopsla2020.pdf). - -## TOC - -1. [Language basics](./01_language_basics.md) -2. [Program safety](./02_program_safety.md) -3. [Goal-directed evaluation](./03_goal_directed_evaluation.md) -4. [Logical formulas](./04_logical_formulas.md) \ No newline at end of file diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 00000000..157e9183 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,7 @@ +source 'https://rubygems.org' + +gem "jekyll", "~> 4.3.4" # installed by `gem jekyll` +# gem "webrick" # required when using Ruby >= 3 and Jekyll <= 4.2.2 + +gem "just-the-docs", "0.10.0" # pinned to the current release +# gem "just-the-docs" # always download the latest release diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock new file mode 100644 index 00000000..dd64b4da --- /dev/null +++ b/docs/Gemfile.lock @@ -0,0 +1,91 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) + bigdecimal (3.1.8) + colorator (1.1.0) + concurrent-ruby (1.3.4) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + eventmachine (1.2.7) + ffi (1.17.0-arm64-darwin) + ffi (1.17.0-x86_64-linux-gnu) + forwardable-extended (2.6.0) + google-protobuf (4.28.1-arm64-darwin) + bigdecimal + rake (>= 13) + google-protobuf (4.28.1-x86_64-linux) + bigdecimal + rake (>= 13) + http_parser.rb (0.8.0) + i18n (1.14.6) + concurrent-ruby (~> 1.0) + jekyll (4.3.4) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 1.0) + jekyll-sass-converter (>= 2.0, < 4.0) + jekyll-watch (~> 2.0) + kramdown (~> 2.3, >= 2.3.1) + kramdown-parser-gfm (~> 1.0) + liquid (~> 4.0) + mercenary (>= 0.3.6, < 0.5) + pathutil (~> 0.9) + rouge (>= 3.0, < 5.0) + safe_yaml (~> 1.0) + terminal-table (>= 1.8, < 4.0) + webrick (~> 1.7) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-sass-converter (3.0.0) + sass-embedded (~> 1.54) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + just-the-docs (0.10.0) + jekyll (>= 3.8.5) + jekyll-include-cache + jekyll-seo-tag (>= 2.0) + rake (>= 12.3.1) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.9.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.4.0) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (6.0.1) + rake (13.2.1) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) + ffi (~> 1.0) + rexml (3.3.7) + rouge (4.3.0) + safe_yaml (1.0.5) + sass-embedded (1.78.0-arm64-darwin) + google-protobuf (~> 4.27) + sass-embedded (1.78.0-x86_64-linux-gnu) + google-protobuf (~> 4.27) + terminal-table (3.0.2) + unicode-display_width (>= 1.1.1, < 3) + unicode-display_width (2.6.0) + webrick (1.8.1) + +PLATFORMS + arm64-darwin + x86_64-linux-gnu + +DEPENDENCIES + jekyll (~> 4.3.4) + just-the-docs (= 0.10.0) + +BUNDLED WITH + 2.5.9 diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 00000000..cc03c6e3 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,8 @@ +title: Formulog +description: Datalog for SMT-based static analysis +theme: just-the-docs + +url: https://just-the-docs.github.io + +aux_links: + GitHub Repository: https://github.com/HarvardPL/formulog diff --git a/docs/eval_modes/compile.md b/docs/eval_modes/compile.md new file mode 100644 index 00000000..0469839e --- /dev/null +++ b/docs/eval_modes/compile.md @@ -0,0 +1,54 @@ +--- +title: Compilation +layout: page +parent: Evaluation Modes +nav_order: 3 +--- + +# Compilation + +As an alternative to being directly interpreted (the default), Formulog programs can be compiled into a mix of C++ and Souffle code, which can then in turn be compiled into an efficient executable. +To enable compilation, set the `--codegen` (`-c`) flag; generated code will be placed in the directory `./codegen/` (you can change this using the `--codegen-dir` option). +Within this directory you can use `cmake` to compile the generated code into a binary named `flg`. + +For example, to compile and execute the `greeting.flg` program from above, you can use these steps: + +``` +java -jar formulog.jar -c greeting.flg && \ + cd codegen && \ + cmake -B build -S . && \ + cmake --build build -j && \ + ./build/flg --dump-idb +``` + +This should produce output like the following: + +``` +Parsing... +Finished parsing (0.000s) +Evaluating... +Finished evaluating (0.029s) + +==================== SELECTED IDB RELATIONS ==================== + +---------- greeting (3) ---------- +greeting("Hello, Bob") +greeting("Hello, World") +greeting("Hello, Alice") +``` + +Use the command `./build/flg -h` see options available when running the executable. + +For more information about the Formulog compiler, see the OOPSLA'24 paper [Making Formulog Fast: An Argument for Unconventional Datalog Evaluation](https://dl.acm.org/doi/10.1145/3689754) by Aaron Bembenek, Michael Greenberg, and Stephen Chong. + +## Dependencies + +To build the generated code, you must have: + +- A C++ compiler that supports the C++17 standard (and OpenMP, if you want to produce a parallelized binary) +- `cmake` (v3.21+) +- [`boost`](https://www.boost.org/) (a version compatible with v1.81) +- [`oneTBB`](https://oneapi-src.github.io/oneTBB/) (v2021.8.0 is known to work) +- [`souffle`](https://souffle-lang.github.io/) (v2.3 is known to work; you have to use our [custom fork](https://github.com/aaronbembenek/souffle) if you want to combine compilation with [eager evaluation]({{ site.base_url }}{% link eval_modes/eager.md %}).) + +The Formulog Docker image already has these dependencies installed. \ No newline at end of file diff --git a/docs/eval_modes/eager.md b/docs/eval_modes/eager.md new file mode 100644 index 00000000..3ae5e514 --- /dev/null +++ b/docs/eval_modes/eager.md @@ -0,0 +1,26 @@ +--- +title: Eager Evaluation +layout: page +parent: Evaluation Modes +nav_order: 4 +--- + +# Eager Evaluation + +In addition to traditional semi-naive Datalog evaluation, Formulog supports _eager evaluation_, a novel concurrent evaluation algorithm for Datalog that is faster than semi-naive evaluation on some Formulog workloads (often because it induces a more favorable distribution of the SMT workload across SMT solvers). +Whereas semi-naive evaluation batches derived tuples to process them in explicit rounds, eager evaluation eagerly pursues the consequences of each tuple as it is derived. + +Using eager evaluation with the Formulog interpreter is easy: just add the `--eager-eval` flag. +Eager evaluation can also be used with the Formulog compiler, provided you install [our custom version of Souffle](https://github.com/aaronbembenek/souffle). +When you configure `cmake` on the generated code, you need to add `-DFLG_EAGER_EVAL=On`. +For example, to build a version of the greeting program that uses eager evaluation, use these commands: + +``` +java -jar formulog.jar -c greeting.flg && \ + cd codegen && \ + cmake -B build -S . -DFLG_EAGER_EVAL=On && \ + cmake --build build -j && \ + ./build/flg --dump-idb +``` + +For more information about eager evaluation, see the OOPSLA'24 paper [Making Formulog Fast: An Argument for Unconventional Datalog Evaluation](https://dl.acm.org/doi/10.1145/3689754) by Aaron Bembenek, Michael Greenberg, and Stephen Chong. \ No newline at end of file diff --git a/docs/eval_modes/index.md b/docs/eval_modes/index.md new file mode 100644 index 00000000..0e865b0b --- /dev/null +++ b/docs/eval_modes/index.md @@ -0,0 +1,10 @@ +--- +title: Evaluation Modes +layout: page +nav_order: 4 +--- + +# Evaluation Modes + +Given a Formulog program, there are many different ways to evaluate it. +These pages describe the primary options supported by our current implementation of Formulog. \ No newline at end of file diff --git a/docs/eval_modes/options.md b/docs/eval_modes/options.md new file mode 100644 index 00000000..d9665c62 --- /dev/null +++ b/docs/eval_modes/options.md @@ -0,0 +1,98 @@ +--- +title: Options and System Properties +layout: page +parent: Evaluation Modes +nav_order: 1 +--- + +# Options and System Properties + +Formulog evaluation is controlled by options and system properties. +For example, to interpret the test program with SMT debug information (the `debugSmt` property) and 2 +threads (the `-j 2` option), use + +``` +java -DdebugSmt -jar formulog.jar greeting.flg -j 2 +``` + +## Options + +Run Formulog with the `-h` flag to see a list of the command-line options that are currently available. +As of Formulog v0.8.0, they are: + +``` +Usage: formulog [-chV] [--dump-all] [--dump-idb] [--dump-query] [--dump-sizes] + [--eager-eval] [--smt-stats] [--codegen-dir=] + [-D=] [-j=] + [--smt-solver-mode=] + [--dump=]... [-F=]... +Runs Formulog. + Formulog program file. + -c, --codegen Compile the Formulog program. + --codegen-dir= + Directory for generated code (default: './codegen'). + -D, --output-dir= + Directory for .tsv output files (default: '.'). + --dump= + Print selected relations. + --dump-all Print all relations. + --dump-idb Print all IDB relations. + --dump-query Print query result. + --dump-sizes Print relation sizes. + --eager-eval Use eager evaluation (instead of traditional semi-naive + Datalog evaluation) + -F, --fact-dir= + Directory to look for fact .tsv files (default: '.'). + -h, --help Show this help message and exit. + -j, --parallelism= + Number of threads to use. + --smt-solver-mode= + Strategy to use when interacting with external SMT solvers + ('naive', 'push-pop', or 'check-sat-assuming'). + --smt-stats Report basic statistics related to SMT solver usage. + -V, --version Print version information and exit. +``` + +**Note:** Formulog does not print any results by default; use one of the +`--dump*` options to print results to the console, or annotate intensional +database (IDB) relations with `@disk` to dump them to disk. + +## System Properties + +In addition to options, there are many system properties that can be set using +the `-D` flag (as in `-DdebugSmt` or `-DuseDemandTransformation=false`). Some of +the most useful ones are: + +* `debugSmt` - log debugging information related to SMT calls to + the `solver_logs/` directory (defaults to false) +* `debugMst` - print debugging information related to the magic set + transformation (defaults to false) +* `debugRounds` - print statistics for each round of seminaive evaluation + (defaults to false) +* `useDemandTransformation` - apply the demand transformation as a + post-processing step after the magic set transformation (defaults to true) +* `softExceptions` - ignore exceptions during evaluation (i.e., treat them as + unification failures, and not as something that should stop evaluation; + defaults to false) +* `sequential` - run interpreter without a thread pool (helpful for debugging + runtime; defaults to false) +* `printRelSizes` - print final relation sizes (defaults to false) +* `printFinalRules` - print the final, transformed rules (defaults to false) +* `trackedRelations=REL_1,...,REL_n` - print facts from listed relations as they + are derived (defaults to the empty list) +* `smtLogic=LOGIC` - set the logic used by the external SMT solver (defaults to + `ALL`) +* `smtSolver=SOLVER` - set the external SMT solver to use; current options are + `z3` (default), `cvc4`, `yices`, and `boolector` +* `smtDeclareAdts` - whether to declare Formulog algebraic data types to the SMT + solver upon initialization; set this to false for logics that do not support + ADTs (defaults to true) + +### Alternative SMT Solvers + +While we have primarily used Formulog with Z3 as the backend solver, we also +have some experimental (not recently tested) support for other solvers; not all +these solvers handle the full range of Formulog features. To use a solver +besides Z3, you need to set the `smtSolver` system property (see above). For +each solver, the relevant binary needs to be on your path: `z3` for Z3, +`boolector` for Boolector, `cvc4` for CVC4, and `yices-smt2` for Yices. \ No newline at end of file diff --git a/docs/eval_modes/smt.md b/docs/eval_modes/smt.md new file mode 100644 index 00000000..cd87b498 --- /dev/null +++ b/docs/eval_modes/smt.md @@ -0,0 +1,34 @@ +--- +title: Solver Modes and Incremental SMT Solving +layout: page +parent: Evaluation Modes +nav_order: 2 +--- + +# Solver Modes and Incremental SMT Solving + +The Formulog runtime associates one external SMT solver process per Formulog +worker thread. Each SMT query is a list of conjuncts. If the SMT solver is +invoked via the `is_sat_opt` or `get_model_opt` function, this list is +explicitly given by the programmer; otherwise, if the solver is invoked via the +`is_sat` or `is_valid` function, the Formulog runtime breaks the supplied +proposition into conjuncts. + +Formulog supports three strategies for interacting with external SMT solvers; +they can be set using the `--smt-solver-mode` option. Consider two SMT queries +`x` and `y`, where `x` immediately precedes `y` and both are lists of conjuncts. + +- `naive`: reset the solver between queries `x` and `y` (do not use incremental +SMT solving). +- `push-pop`: try to use incremental SMT solving via the `push` and `pop` SMT +commands. This can work well when query `y` extends query `x`; e.g., `y = c :: +x`, where `c` is an additional conjunct; this situation most commonly occurs +when using [eager evaluation]({{ site.baseurl }}{% link eval_modes/eager.md %}). +- `check-sat-assuming`: try to use incremental SMT solving via the +`check-sat-assuming` SMT command. This caches conjuncts in the SMT solver in a +way such that they can be enabled or disabled per SMT query, and works well if +there are shared conjuncts between queries `x` and `y`, but query `x` is not +simply an extension of query `y` (e.g., it omits a conjunct in query `y`). + +For more info, see the ICLP'20 extended abstract [Datalog-Based Systems Can Use Incremental SMT Solving](https://aaronbembenek.github.io/papers/datalog-incr-smt-iclp2020.pdf) +by Aaron Bembenek, Michael Ballantyne, Michael Greenberg, and Nada Amin. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..5b7d9933 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,13 @@ +--- +title: Home +layout: home +nav_order: 1 +--- + +# Welcome to Formulog! + +Formulog is a programming language that extends the logic programming language Datalog with SMT solving and first-order functional programming. +Formulog was designed to be a domain-specific language for implementing SMT-based program analyses, like refinement type checking and symbolic execution. +But who knows---maybe it will also have other uses! + +Questions? Feedback? Please [raise a GitHub issue](https://github.com/HarvardPL/formulog/issues/new)! \ No newline at end of file diff --git a/docs/03_goal_directed_evaluation.md b/docs/lang_ref/goal_directed_eval.md similarity index 74% rename from docs/03_goal_directed_evaluation.md rename to docs/lang_ref/goal_directed_eval.md index 95f3ec47..cdf99261 100644 --- a/docs/03_goal_directed_evaluation.md +++ b/docs/lang_ref/goal_directed_eval.md @@ -1,4 +1,11 @@ -# Goal-directed evaluation +--- +title: Goal-Directed Evaluation +layout: page +parent: Language Reference +nav_order: 3 +--- + +# Goal-Directed Evaluation By default, Formulog uses a standard bottom-up, saturating evaluation strategy. However, you can also trigger a form of goal-directed, top-down execution if you @@ -6,14 +13,14 @@ include a query in your Formulog program. A query is a rule with an empty head and a single (non-negated) body atom; for example, `:- p(X, "a").` is a query. Each program can only have a single query. -The Formulog runtime will use the query to rewrite your program using the magic -set transformation technique. The rewritten program simulates top-down -evaluation when it is evaluated bottom-up. The rewriting happens after type -checking, but before program validation (i.e., before the checks described in -the "Program safety" section are run). This means that you are allowed to write -"invalid" Formulog programs, so long as that when they are rewritten, they pass -the validation checks. For example, this program is invalid evaluated bottom-up, -since there are unbound variables (in the head of the rules): +The Formulog runtime will use the query to rewrite your program using (a variant +of) the magic set transformation technique. The rewritten program simulates +top-down evaluation when it is evaluated bottom-up. The rewriting happens after +type checking, but before program validation (i.e., before the checks described +in the "Program safety" section are run). This means that you are allowed to +write "invalid" Formulog programs, so long as that when they are rewritten, they +pass the validation checks. For example, this program is invalid evaluated +bottom-up, since there are unbound variables (in the head of the rules): ``` rel member(i32, i32 list) @@ -38,17 +45,17 @@ also be stratified. However, the transformation can turn a program that terminates into a program that does not terminate. Also, it cannot be used with predicates that are "invoked" from the functional fragment of Formulog. -## Query syntax +## Query Syntax Queries are in the form `:- A.` where `A` is a positive (non-negated) atom. The typical rules about variable usage apply (see the "Anonymous variables" section -of the Program Safety document). If you want to have a query consisting of +of the [Program Safety page]({{ site.baseurl }}{% link lang_ref/program_safety.md %})). If you want to have a query consisting of multiple atoms, write a rule defining a new relation and then query that relation. For example, the hypothetical query `:- A, B.` could be rewritten as the rule `R :- A, B.` and query `:- R.`. There can be only one query per program. -## Partial goal-directed evaluation +## Partial Goal-Directed Evaluation It is also possible to only use goal-directed evaluation for part of a program. You can control this by annotating IDB relation declarations with diff --git a/docs/lang_ref/index.md b/docs/lang_ref/index.md new file mode 100644 index 00000000..a4e49da0 --- /dev/null +++ b/docs/lang_ref/index.md @@ -0,0 +1,12 @@ +--- +title: Language Reference +layout: page +nav_order: 5 +--- + +# Language Reference + +This set of documents describes the Formulog language (as opposed to its current implementation). +Please raise a [GitHub issue](https://github.com/HarvardPL/formulog/issues/new) if anything is unclear, incomplete, or incorrect :) + +For an overview of Formulog and its motivations, we recommend checking out our [publications]({{ site.baseurl }}{% link pubs.md %}). \ No newline at end of file diff --git a/docs/01_language_basics.md b/docs/lang_ref/lang_basics.md similarity index 96% rename from docs/01_language_basics.md rename to docs/lang_ref/lang_basics.md index 465042bf..40fd908f 100644 --- a/docs/01_language_basics.md +++ b/docs/lang_ref/lang_basics.md @@ -1,4 +1,11 @@ -# Language basics +--- +title: Language Basics +layout: page +parent: Language Reference +nav_order: 1 +--- + +# Language Basics Formulog is an extension of Datalog designed to support program analyses that use logical formulas, such as symbolic execution and refinement type checking. @@ -13,7 +20,7 @@ A Formulog program consists of three main components: Formulog has a strong, static type system. -### Built-in types +### Built-in Types Formulog has five built-in primitive types: @@ -46,9 +53,9 @@ type cmp = ``` It also has built-in types representing logical formulas, but a discussion of -these is delayed until the section on logical formulas. +these is delayed until the [section on logical formulas]({{ site.baseurl }}{% link lang_ref/logical_formulas.md %}). -#### List notation +#### List Notation Formulog provides special notation for terms of the `list` type. The `cons` constructor can be written using the infix notation `::`; i.e., `X :: Y` is @@ -59,7 +66,7 @@ term `[X, Y, Z]` is shorthand for `cons(X, cons(Y, cons(Z, nil)))`. The term Both notations can be used in pattern matching (described below). -### User-defined types +### User-Defined Types Formulog allows users to define their own (polymorphic) algebraic data types. For instance, this defines a list-like type: @@ -207,7 +214,7 @@ where the rule is translated to p :- foo(X), X = true. ``` -### Reading EDB relations from disk +### Reading EDB Relations from Disk It is possible to specify that an EDB relation should be read from an external file by annotating its declaration with `@disk`, as in @@ -262,7 +269,7 @@ bar("aloha"). Every fact directory must have a `.tsv` file for _every_ external input relation (the file can be empty). -### Writing IDB relations to disk +### Writing IDB Relations to Disk An IDB relation can be annotated with the annotation `@disk`, in which case Formulog will dump its contents into a `.tsv` file in the directory specified on @@ -347,7 +354,7 @@ const pi : fp64 = 3.14 (* same as `fun pi : fp64 = 3.14` *) ``` -### Lifted relations and aggregation +### Lifted Relations and Aggregation Formulog allows any relation (i.e., EDB relations, IDB relations, and the built-in relations `!=` and `=`) to be lifted to a boolean-returning function. @@ -375,7 +382,7 @@ relation `p` that relates a `bool` to an `i32`, we have: The use of lifted predicates must be stratified, as described in the "Program Safety" document. -### Built-in functions +### Built-in Functions Finally, Formulog already has a bunch of basic functions built-in (mostly to do with manipulating primitives): diff --git a/docs/04_logical_formulas.md b/docs/lang_ref/logical_formulas.md similarity index 97% rename from docs/04_logical_formulas.md rename to docs/lang_ref/logical_formulas.md index 61b43f4d..f215217d 100644 --- a/docs/04_logical_formulas.md +++ b/docs/lang_ref/logical_formulas.md @@ -1,4 +1,11 @@ -# Logical formulas +--- +title: Logical Formulas +layout: page +parent: Language Reference +nav_order: 4 +--- + +# Logical Formulas Formulog provides support for representing and reasoning about logical formulas. @@ -35,7 +42,7 @@ ok3 :- !is_sat(`E /\ F`). ``` -## Formula types +## Formula Types For every non-formula type τ, there are two corresponding types that are used to represent τ-valued logical formulas. The first is `τ smt`, which represents a @@ -79,7 +86,7 @@ of `q`). The second rule is fine since `X` is bound in a position that has type type checker currently uses the order that the rule was originally written; in the future, the type checker could try to reorder rules to make them well typed. -### Uninterpreted sorts +### Uninterpreted Sorts Formulog allows users to define uninterpreted sorts that can be used within formulas. For instance, you can declare a polymorphic uninterpreted sort like @@ -89,7 +96,7 @@ this: uninterpreted sort ('a, 'b) foo ``` -## Representing formulas +## Representing Formulas Formulas are constructed from a library of constructors and are typically quoted by backticks, which tells the type checker to use the "formula mode" of the @@ -133,13 +140,13 @@ There is also a shorthand syntax: a pound sign, followed by an identifier, followed by a type within square brackets, as in `#x[bool]`. This is the same thing as `#{"x"}[bool]`. -### Built-in formula terms +### Built-in Formula Terms Formulog provides built-in terms that are used to construct formulas that should be interpreted under a particular theory. For the most part, these constructors directly reflect the SMT-LIB standard. -#### Parameterized terms +#### Parameterized Terms You will see that many formula terms are parameterized with a type or natural number. For example, the constructor for formula equality, `smt_eq[τ]`, is @@ -180,7 +187,7 @@ In this case, one annotation is enough to clarify things: The parameters to a parameterized constructor have to be fully resolved at compile time. -#### Logical connectives +#### Logical Connectives Formulog has the standard first-order logic connectives: @@ -257,7 +264,7 @@ The notation supports patterns with multiple terms (they should be separated by commas); however, it does not support multiple patterns, in which case you need to use the explicit constructor described above. -#### Bit vectors +#### Bit Vectors We have bit vectors, where `bv[k] smt` is a `k`-bit symbolic bit vector: @@ -297,7 +304,7 @@ constraints on their parameters (for example, with `bv_concat[i,j,k]`, it must be that `i + j = k`). Illegal parameter choices are therefore not caught by the type system, and might lead to SMT solver crashes at runtime. -#### Floating point +#### Floating Point And we have floating point, where `fp[j,k] smt` is a symbolic floating point number with exponent length `j` and signficand length `k`: @@ -389,7 +396,7 @@ str_suffixof : string smt, string smt -> bool smt These operations follow the theory of strings supported by Z3 and CVC4. -### Using algebraic data types and records in formulas +### Using Algebraic Data Types and Records in Formulas Terms constructed from user-defined algebraic data types can also be used in formulas, where they are interpreted under the theory of algebraic data types. @@ -423,7 +430,7 @@ term. Symbolic getters are also created for records, where each getter is the name of the relevant label prefixed with `#`. -### Uninterpreted functions +### Uninterpreted Functions Formulog also provides a way to declare uninterpreted functions, as here: @@ -435,7 +442,7 @@ This effectively defines a new constructor for `bool smt` that expects a single argument of type `bv[32] smt`. Uninterpreted functions can only be used within formulas. -## Reasoning about formulas +## Reasoning about Formulas Formulog currently provides these functions for reasoning about/manipulating formulas: diff --git a/docs/02_program_safety.md b/docs/lang_ref/program_safety.md similarity index 95% rename from docs/02_program_safety.md rename to docs/lang_ref/program_safety.md index b07931f4..b58ee2b6 100644 --- a/docs/02_program_safety.md +++ b/docs/lang_ref/program_safety.md @@ -1,15 +1,22 @@ -# Program safety +--- +title: Program Safety +layout: page +parent: Language Reference +nav_order: 2 +--- + +# Program Safety Formulog imposes some restrictions on programs so that it can give guarantees about runtime behavior. Beyond the type system, these restrictions fall into two categories: restrictions on variable usage and restrictions on negation. -## Variable usage +## Variable Usage Correct variable usage is a tricky aspect of logic programming; this section describes Formulog's restrictions on this front. -### Anonymous variables +### Anonymous Variables To help catch bugs related to variable usage, Formulog requires that every variable that does not start with an underscore occurs more than once in its @@ -19,7 +26,7 @@ this reason, Formulog does not allow any variable that begins with an underscore to occur more than once in a scope, except for the traditional anonymous variable `_`. -### Binding variables +### Binding Variables Formulog requires that every variable in a rule is "bound." In what follows, we use the identifiers `p` for a relation, `c` for a constructor, and `f` for a @@ -81,7 +88,7 @@ any rewriting results in a rule that is still well-typed. Although we have not found our current approach to be a hindrance in practice, this is something that we could implement in the future. -## Negation, aggregation, and stratification +## Negation, Aggregation, and Stratification To ensure that every program has a least model, Formulog requires the use of stratified negation and aggregation, a common restriction in Datalog. diff --git a/docs/pubs.md b/docs/pubs.md new file mode 100644 index 00000000..a7d0787f --- /dev/null +++ b/docs/pubs.md @@ -0,0 +1,57 @@ +--- +title: Publications and Artifacts +layout: page +nav_order: 6 +--- + +# Publications and Artifacts + +Check out our academic papers to learn more about the science behind Formulog. + +## Formulog Language Design + +Our [OOPSLA'20 paper](https://dl.acm.org/doi/10.1145/3428209) introduces Formulog, motivates its language design, and demonstrates its use through three extensive case studies. + +To cite this paper, please use an entry like this one: + +``` +@article{Bembenek2020Formulog, + author = {Aaron Bembenek and Michael Greenberg and Stephen Chong}, + title = {Formulog: {D}atalog for {SMT}-{B}ased {S}tatic {A}nalysis}, + journal = {Proceedings of the {ACM} on Programming Languages}, + year = {2020}, + volume = {4}, + number = {OOPSLA}, + doi = {10.1145/3428209}, + pages = {141:1--141:31} +} +``` + +The refereed artifact for this paper is [available on Zenodo](https://zenodo.org/records/4039122). + +## Formulog Performance + +Our [OOPSLA'24 paper](https://dl.acm.org/doi/10.1145/3689754) describes speeding up Formulog via 1) compilation to Soufflé and 2) eager evaluation, a novel Datalog evaluation strategy that works well for some SMT-heavy workloads. + +To cite this paper, please use an entry like this one: + +``` +@article{Bembenek2024Making, + author = {Aaron Bembenek and Michael Greenberg and Stephen Chong}, + title = {Making {F}ormulog {F}ast: An {A}rgument for {U}nconventional {D}atalog {E}valuation}, + journal = {Proceedings of the {ACM} on Programming Languages}, + year = {2024}, + volume = {4}, + number = {OOPSLA2}, + doi = {10.1145/3689754}, + pages = {314:1--314:30} +} +``` + +The refereed artifact for this paper won a [distinguished artifact](https://2024.splashcon.org/track/splash-2024-oopsla-artifacts#distinguished-artifacts) award; it is [available on Zenodo](https://zenodo.org/records/13372573). + +## Short Papers + +For more information on how Formulog interfaces with external SMT solvers, see the ICLP'20 extended abstract [Datalog-Based Systems Can Use Incremental SMT Solving](https://arxiv.org/html/2009.09158v1/#EPTCS325.7) by Aaron Bembenek, Michael Ballantyne, Michael Greenberg, and Nada Amin ([PDF available here](https://aaronbembenek.github.io/papers/datalog-incr-smt-iclp2020.pdf)). + +For an introduction to Formulog geared towards an audience already well versed in Datalog, see the Datalog 2.0'22 short paper [Formulog: Datalog + SMT + FP](https://ceur-ws.org/Vol-3203/short2.pdf) by Aaron Bembenek, Michael Greenberg, and Stephen Chong. \ No newline at end of file diff --git a/docs/starting.md b/docs/starting.md new file mode 100644 index 00000000..dc0a8bf3 --- /dev/null +++ b/docs/starting.md @@ -0,0 +1,115 @@ +--- +title: Getting Started +layout: page +nav_order: 2 +--- + +# Getting Started + +Thanks for your interest in Formulog! +This page describes how to install Formulog and provides some pointers on writing Formulog programs. + +## Seting up Formulog + +There are three main ways to set up Formulog (listed in increasing order of number of dependencies): + +- Using the Docker image +- Downloading the JAR +- Building from source + +### Use the Docker image + +Prebuilt images are available on [Docker Hub](https://hub.docker.com/r/aaronbembenek/formulog). +If you have Docker installed, you can spin up an Ubuntu container with Formulog, our custom version of Soufflé, and some example programs by running this command (replace `X.Y.Z` with the latest version): + +```bash +docker run -it aaronbembenek/formulog:X.Y.Z # may require sudo +``` + +This should place you in the directory `/root/formulog/`, with a file `formulog.jar` and some example Formulog programs in the `examples/` directory. + +### Download the JAR + +Dependencies: + +- JRE 11+ +- Z3 (v4.12.2 is known to work; other recent versions should work too) + +You can find a prepackaged JAR file in the [Releases section](https://github.com/HarvardPL/formulog/releases) of the GitHub repository. + +### Build from Source + +Dependencies: + +- JDK 11+ +- Maven (v3.9.5 is known to work) +- Z3 (v4.12.2 is known to work; other recent versions should work too) + +To build an executable JAR, run the command `mvn package` from the project source directory. +This will create an executable JAR with a name like `formulog-X.Y.Z-SNAPSHOT-jar-with-dependencies.jar` in the `target/` directory. + +If `mvn package` fails during testing, it might mean that there is a problem connecting with Z3. +You can compile without testing by adding the `-DskipTests` flag. + +### Test Your Setup + +If you have set up Formulog, you should now have an executable JAR at your fingertips. +The JAR expects a single Formulog file as an argument. +Let's test it on the following Formulog program (available in the Docker image and the base repository directory as `examples/greeting.flg`): + +``` +@edb rel entity(string) +entity("Alice"). +entity("Bob"). +entity("World"). + +rel greeting(string) +greeting(Y) :- + entity(X), + some(M) = get_model([`#y[string] #= str_concat("Hello, ", X)`], none), + some(Y) = query_model(#y[string], M). +``` + +Run the following command (where `formulog.jar` is replaced with the name of the executable JAR): + +``` +java -jar formulog.jar examples/greeting.flg --dump-idb +``` + +You should get results like these, indicating that three `greeting` facts have been derived: + +``` +Parsing... +Finished parsing (0.202s) +Type checking... +Finished type checking (0.024s) +Rewriting and validating... +Finished rewriting and validating (0.253s) +Evaluating... +Finished evaluating (0.354s) + +==================== SELECTED IDB RELATIONS ==================== + +---------- greeting (3) ---------- +greeting("Hello, Alice") +greeting("Hello, Bob") +greeting("Hello, World") +``` + +## Writing Formulog Programs + +Now that you have Formulog set up, the fun part starts: writing Formulog programs! + +Check out our [tutorial]({{ site.baseurl }}{% link tutorial/index.md %}) for a walk-through of how to encode a refinement type system in Formulog. +Additional short-ish example programs can be found in the `examples/` directory (in the Docker image or repository base directory). +For examples of larger developments, see the case studies we have used in publications: + +- [a refinement type checker](https://github.com/aaronbembenek/making-formulog-fast/blob/main/benchmarks/dminor/bench.flg) +- [a bottom-up points-to analysis for Java](https://github.com/aaronbembenek/making-formulog-fast/blob/main/benchmarks/scuba/bench.flg) +- [a symbolic executor an LLVM fragment](https://github.com/aaronbembenek/making-formulog-fast/blob/main/benchmarks/symex/bench.flg) + +See the [language reference]({{ site.baseurl }}{% link lang_ref/index.md %}) for details about Formulog constructs. + +Syntax highlighting is available for Visual Studio Code (follow instructions [here](https://github.com/HarvardPL/formulog-syntax)) and Vim (install [misc/flg.vim](https://github.com/HarvardPL/formulog/blob/master/misc/flg.vim)). + +Finally, please raise a [GitHub issue](https://github.com/HarvardPL/formulog/issues/new) if you want to try out Formulog but would like additional information or assistance---we're happy to help! :) diff --git a/docs/tutorial/tutorial.md b/docs/tutorial/index.md similarity index 97% rename from docs/tutorial/tutorial.md rename to docs/tutorial/index.md index c841178b..a646b12a 100644 --- a/docs/tutorial/tutorial.md +++ b/docs/tutorial/index.md @@ -1,14 +1,20 @@ +--- +title: Tutorial +layout: page +nav_order: 3 +--- + # Tutorial: Building a Refinement Type Checker In this tutorial, we'll implement a type checker for a small (but still interesting) refinement type system in Formulog. In particular, we'll implement the declarative, bidirectional type checking rules for the first system in the article [Refinement Types: A Tutorial](https://arxiv.org/abs/2010.07763) by Ranjit Jhala and Niki Vazou [1]. Our hope is that our tutorial gives a good overview of many Formulog features, and a flavor of what it is like to program a nontrivial analysis in Formulog. -### Intended audience +### Intended Audience This tutorial is intended for the PL practitioner (e.g., a grad student, academic, or research engineer). We assume you are familiar with SMT solving, ML-like functional languages, and logic programming, and also have some level of comfort with formal programming language notation (like inference rules). -It is also probably helpful to have read one of our Formulog publications, which should (hopefully) give a good sense for the overall design of the language and its motivations. +It is also probably helpful to have read one of our [Formulog publications]({{ site.base_url }}{% link pubs.md %}), which should (hopefully) give a good sense for the overall design of the language and its motivations. If you are not familiar with refinement type checking or bidirectional type systems, you should probably skim the first few sections of the tutorial by Jhala and Vazou [1] (we'll focus on Sections 3.1 and 3.2). ### Help Improve This Tutorial @@ -743,7 +749,7 @@ We have found this trick to be useful in implementing more complex type systems. ### Check Out More Complex Formulog Examples -For our Formulog publications, we have built three substantial, relatively sophisticated SMT-based case studies. +For our [Formulog publications]({{ site.base_url }}{% link pubs.md %}), we have built three substantial, relatively sophisticated SMT-based case studies. After going through this tutorial, you might find it interesting to check out the code for these case studies. While the analyses are more complex than the tutorial example (and, admittedly, not as well documented as they could be), this tutorial will have hopefully armed you with the information to understand a lot of what's happening in them. @@ -759,7 +765,7 @@ It's neat to be able to program so close to the formal specification; as we've s Furthermore, now that you have a Formulog implementation of the analysis, you can rely on Formulog's language infrastructure to apply both high-level and low-level optimizations to the analysis. For example, Formulog's parallel evaluation techniques can speed up type checking in the presence of multiple code units. -Additionally, the compiler from Formulog to Soufflé makes it possible to automatically derive a decently efficient C++ version of the type checker. +Additionally, the [compiler]({{ site.base_url }}{% link eval_modes/compile.md %}) from Formulog to Soufflé makes it possible to automatically derive a decently efficient C++ version of the type checker. We hope you have enjoyed this dive into Formulog! As we mentioned earlier, please raise a [GitHub issue](https://github.com/HarvardPL/formulog/issues/new) for questions, comments, and feedback :) From 9f403ee832047b89e2cca50b522f1450f99bdbc9 Mon Sep 17 00:00:00 2001 From: Aaron Bembenek Date: Thu, 17 Oct 2024 15:12:21 +1100 Subject: [PATCH 2/2] Update GitHub Action. --- .github/workflows/pages.yml | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 .github/workflows/pages.yml diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 00000000..c2b9358a --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,69 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# Sample workflow for building and deploying a Jekyll site to GitHub Pages +name: Deploy Jekyll site to Pages + +on: + push: + branches: ["main"] + paths: ["docs/**"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + # Build job + build: + runs-on: ubuntu-latest + defaults: + run: + working-directory: docs + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.3' # Not needed with a .ruby-version file + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + cache-version: 0 # Increment this number if you need to re-download cached gems + working-directory: '${{ github.workspace }}/docs' + - name: Setup Pages + id: pages + uses: actions/configure-pages@v5 + - name: Build with Jekyll + # Outputs to the './_site' directory by default + run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" + env: + JEKYLL_ENV: production + - name: Upload artifact + # Automatically uploads an artifact from the './_site' directory by default + uses: actions/upload-pages-artifact@v3 + with: + path: "docs/_site/" + + # Deployment job + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4