From 492475a1b232497cd03fecba618ed0c09ef38bfc Mon Sep 17 00:00:00 2001 From: Derek Bailey Date: Mon, 23 Dec 2024 15:55:56 -0800 Subject: [PATCH] Add new Docs source files (#8461) --- docs/mkdocs.yml | 66 ++ docs/overrides/main.html | 6 + docs/source/assets/flatbuffers_logo.svg | 318 ++++++++++ docs/source/building.md | 55 ++ docs/source/evolution.md | 256 ++++++++ docs/source/flatc.md | 7 + docs/source/grammar.md | 73 +++ docs/source/index.md | 59 ++ docs/source/schema.md | 650 ++++++++++++++++++++ docs/source/tutorial.md | 765 ++++++++++++++++++++++++ 10 files changed, 2255 insertions(+) create mode 100644 docs/mkdocs.yml create mode 100644 docs/overrides/main.html create mode 100644 docs/source/assets/flatbuffers_logo.svg create mode 100644 docs/source/building.md create mode 100644 docs/source/evolution.md create mode 100644 docs/source/flatc.md create mode 100644 docs/source/grammar.md create mode 100644 docs/source/index.md create mode 100644 docs/source/schema.md create mode 100644 docs/source/tutorial.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml new file mode 100644 index 00000000000..83e22f6d990 --- /dev/null +++ b/docs/mkdocs.yml @@ -0,0 +1,66 @@ +site_name: FlatBuffers Docs +docs_dir: source +site_url: https://flatbuffers.dev +theme: + name: material + logo: assets/flatbuffers_logo.svg + custom_dir: overrides + palette: + # Palette toggle for light mode + - scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode + + features: + # Allows code block annotations + - content.code.annotate + + # Allows content tabs to link together + - content.tabs.link + + # Expand nav folders by default + - navigation.expand + + # Auto hide the header after scrolling + - header.autohide + + +markdown_extensions: + - admonition + - attr_list + - md_in_html + - pymdownx.critic + - pymdownx.details + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.snippets: + # Allows direct embedded of remote files + url_download: true + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + slugify: !!python/object/apply:pymdownx.slugs.slugify + kwds: + case: lower + - tables + + + +nav: + - Overview: "index.md" + - Tutorial: "tutorial.md" + - Compiler (flatc): + - Building: "building.md" + - Using: "flatc.md" + - Schema (.fbs): + - Overview: "schema.md" + - Evolution: "evolution.md" + - Grammar: "grammar.md" diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 00000000000..b97ab3d6c9c --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,6 @@ +{% extends "base.html" %} + + +{% block announce %} + View old documentation at https://dbaileychess.github.io/flatbuffers +{% endblock %} \ No newline at end of file diff --git a/docs/source/assets/flatbuffers_logo.svg b/docs/source/assets/flatbuffers_logo.svg new file mode 100644 index 00000000000..e302c07dff0 --- /dev/null +++ b/docs/source/assets/flatbuffers_logo.svg @@ -0,0 +1,318 @@ + + + + diff --git a/docs/source/building.md b/docs/source/building.md new file mode 100644 index 00000000000..cacca91afe6 --- /dev/null +++ b/docs/source/building.md @@ -0,0 +1,55 @@ +# Building + +## Building with CMake + +The distribution main build system is configured by +[`cmake`](https://www.cmake.org) which allows you to build the project for any +platform. + +### Configuration + +Use `cmake` to configure a project based on your environment and platform. + +=== "Unix" + + ```sh + cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release + ``` + + !!! note + + To use `clang` instead of `gcc` you may need to set prepend some + environment variables e.g. `CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake + -G "Unix MakeFiles"` + +=== "Windows" + + ```sh + cmake -G "Visual Studio 17 2022" -DCMAKE_BUILD_TYPE=Release + ``` + +=== "MacOS" + + ```sh + cmake -G "Xcode" -DCMAKE_BUILD_TYPE=Release + ``` + +### Building + +Once the project files are generated, build as normal for your platform. + +=== "Unix" + + ```sh + make flatc + ``` + +=== "Windows" + + ```sh + msbuild.exe FlatBuffers.sln + ``` + +## Building with Bazel + +## Building with VCPKG diff --git a/docs/source/evolution.md b/docs/source/evolution.md new file mode 100644 index 00000000000..4b8093d74ca --- /dev/null +++ b/docs/source/evolution.md @@ -0,0 +1,256 @@ +# Evolution + +FlatBuffers enables the [schema](schema.md) to evolve over time while still +maintaining forwards and backwards compatibility with old flatbuffers. + +Some rules must be followed to ensure the evolution of a schema is valid. + +## Rules + +Adding new tables, vectors, structs to the schema is always allowed. Its only +when you add a new field to a [`table`](schema.md#tables) that certain rules +must be followed. + +### Addition + +**New fields MUST be added to the end of the table definition.** + +This allows older data to still be read correctly (giving you the default value +of the added field if accessed). + +Older code will simply ignore the new field in the flatbuffer. + +!!! tip "Use `id` attributes" + + You can ignore this rule if you use the `id` attribute on all the fields of a table. This + +### Removal + +**You MUST not remove a field from the schema, even if you don't use it +anymore.** You simply stop writing them to the buffer. + +Its encouraged to mark the field deprecated by adding the `deprecated` +attribute. This will skip the generation of accessors and setters in the code, +to enforce the field not to be used any more. + +### Name Changes + +Its generally OK to change the name of tables and fields, as these are not +serialized to the buffer. It may break code that would have to be refactored +with the updated name. + +## Examples + +The following examples uses a base schema and attempts to evolve it a few times. +The versions are tracked by `V1`, `V2`, etc.. and `CodeV1` means code compiled +against the `V1` schema. + +### Table Evolution + +Lets start with a simple table `T` with two fields. + +```c++ title="Schema V1" +table T { + a:int; + b:int; +} +``` + +=== "Well Evolved" + + First lets extend the table with a new field. + + ```c++ title="Schema V2" + table T { + a:int; + b:int; + c:int; + } + ``` + + This is OK. `CodeV1` reading `V2` data will simply ignore the presence of the + new field `c`. `CodeV2` reading `V1` data will get a default value (0) when + reading `c`. + + ```c++ title="Schema V3" + table T { + a:int (deprecated); + b:int; + c:int; + } + ``` + + This is OK, removing field `a` via deprecation. `CodeV1`, `CodeV2` and `CodeV3` + reading `V3` data will now always get the default value of `a`, since it is not + present. `CodeV3` cannot write `a` anymore. `CodeV3` reading old data (`V1` or + `V2`) will not be able to access the field anymore, since no generated accessors + are omitted. + +=== "Improper Addition" + + Add a new field, but this time at the beginning. + + ```c++ title="Schema V2" + table T { + c:int; + a:int; + b:int; + } + ``` + + This is NOT OK, as it makes `V2` incompatible. `CodeV1` reading `V2` data + will access `a` but will read `c` data. + + `CodeV2` reading `V1` data will access `c` but will read `a` data. + +=== "Improper Deletion" + + Remove a field from the schema. + + ```c++ title="Schema V2" + table T { + b:int; + } + ``` + + This is NOT OK. `CodeV1` reading `V2` data will access `a` but read `b` data. + + `CodeV2` reading `V1` data will access `b` but will read `a` data. + +=== "Proper Reordering" + + Lets add a new field to the beginning, but use `id` attributes. + + ```c++ title="Schema V2" + table T { + c:int (id: 2); + a:int (id: 0); + b:int (id: 1); + } + ``` + + This is OK. This adds the a new field in the beginning, but because all the + `id` attributes were added, it is OK. + +=== "Changing Types" + + Let change the types of the fields. + + ```c++ title="Schema V2" + table T { + a:uint; + b:uint; + } + ``` + + This is MAYBE OK, and only in the case where the type change is the same + width. This is tricky if the `V1` data contained any negative numbers. So + this should be done with care. + +=== "Changing Defaults" + + Lets change the default values of the existing fields. + + ```c++ title="Schema V2" + table T { + a:int = 1; + b:int = 2; + } + ``` + + This is NOT OK. Any `V1` data that did not have a value written to the + buffer relied on generated code to provide the default value. + + There MAY be cases where this is OK, if you control all the producers and + consumers, and you can update them in tandem. + +=== "Renaming Fields" + + Lets change the name of the fields + + ```c++ title="Schema V2" + table T { + aa:int; + bb:int; + } + ``` + + This is generally OK. You've renamed fields will break all code and JSON + files that use this schema, but you can refactor those without affecting the + binary data, since the binary only address fields by id and offset, not by + names. + +### Union Evolution + +Lets start with a simple union `U` with two members. + +```c++ title="Schema V1" +union U { + A, + B +} +``` + +=== "Well Evolved" + + Lets add a another variant to the end. + + ```c++ title="Schema V2" + union U { + A, + B, + another_a: A + } + ``` + + This is OK. `CodeV1` will not recognize the `another_a`. + +=== "Improper Evolved" + + Lets add a another variant to the middle. + + ```c++ title="Schema V2" + union U { + A, + another_a: A, + B + } + ``` + + This is NOT OK. `CodeV1` reading `V2` data will interpret `B` as `another_a`. + `CodeV2` reading `V1` data will interpret `another_a` as `B`. + +=== "Evolved With Discriminant" + + Lets add a another variant to the middle, this time adding a union "discriminant". + + ```c++ title="Schema V2" + union U { + A = 1, + another_a: A = 3, + B = 2 + } + ``` + + This is OK. Its like you added it to the end, but using the discriminant + value to physically place it elsewhere in the union. + +## Version Control + +FlatBuffers relies on new field declarations being added at the end, and earlier +declarations to not be removed, but be marked deprecated when needed. We think +this is an improvement over the manual number assignment that happens in +Protocol Buffers (and which is still an option using the `id` attribute +mentioned above). + +One place where this is possibly problematic however is source control. If user +`A` adds a field, generates new binary data with this new schema, then tries to +commit both to source control after user `B` already committed a new field also, +and just auto-merges the schema, the binary files are now invalid compared to +the new schema. + +The solution of course is that you should not be generating binary data before +your schema changes have been committed, ensuring consistency with the rest of +the world. If this is not practical for you, use explicit field `id`s, which +should always generate a merge conflict if two people try to allocate the same +id. diff --git a/docs/source/flatc.md b/docs/source/flatc.md new file mode 100644 index 00000000000..0cca67f3f56 --- /dev/null +++ b/docs/source/flatc.md @@ -0,0 +1,7 @@ +# FlatBuffers Compiler (`flatc`) + +The main compiler for FlatBuffers is called `flatc` and is used as follows: + +```sh +flatc [ GENERATOR_OPTIONS ] [ -o PATH ] [- I PATH ] FILES... [ -- BINARY_FILES... ] +``` diff --git a/docs/source/grammar.md b/docs/source/grammar.md new file mode 100644 index 00000000000..ab0a5bf2ad6 --- /dev/null +++ b/docs/source/grammar.md @@ -0,0 +1,73 @@ +## EBNF + +```ebnf +schema = include* ( namespace_decl | type_decl | enum_decl | root_decl | + file_extension_decl | file_identifier_decl | + attribute_decl | rpc_decl | object )* + +include = `include` string_constant `;` + +namespace_decl = `namespace` ident ( `.` ident )* `;` + +attribute_decl = `attribute` ident | `"` ident `"` `;` + +type_decl = ( `table` | `struct` ) ident metadata `{` field_decl+ `}` + +enum_decl = ( `enum` ident `:` type | `union` ident ) metadata `{` +commasep( enumval_decl ) `}` + +root_decl = `root_type` ident `;` + +field_decl = ident `:` type [ `=` scalar ] metadata `;` + +rpc_decl = `rpc_service` ident `{` rpc_method+ `}` + +rpc_method = ident `(` ident `)` `:` ident metadata `;` + +type = `bool` | `byte` | `ubyte` | `short` | `ushort` | `int` | `uint` | + `float` | `long` | `ulong` | `double` | `int8` | `uint8` | `int16` | + `uint16` | `int32` | `uint32`| `int64` | `uint64` | `float32` | + `float64` | `string` | `[` type `]` | ident + +enumval_decl = ident [ `=` integer_constant ] metadata + +metadata = [ `(` commasep( ident [ `:` single_value ] ) `)` ] + +scalar = boolean_constant | integer_constant | float_constant + +object = `{` commasep( ident `:` value ) `}` + +single_value = scalar | string_constant + +value = single_value | object | `[` commasep( value ) `]` + +commasep(x) = [ x ( `,` x )\* ] + +file_extension_decl = `file_extension` string_constant `;` + +file_identifier_decl = `file_identifier` string_constant `;` + +string_constant = `\".*?\"` + +ident = `[a-zA-Z_][a-zA-Z0-9_]*` + +`[:digit:]` = `[0-9]` + +`[:xdigit:]` = `[0-9a-fA-F]` + +dec_integer_constant = `[-+]?[:digit:]+` + +hex_integer_constant = `[-+]?0[xX][:xdigit:]+` + +integer_constant = dec_integer_constant | hex_integer_constant + +dec_float_constant = `[-+]?(([.][:digit:]+)|([:digit:]+[.][:digit:]*)|([:digit:]+))([eE][-+]?[:digit:]+)?` + +hex_float_constant = `[-+]?0[xX](([.][:xdigit:]+)|([:xdigit:]+[.][:xdigit:]*)|([:xdigit:]+))([pP][-+]?[:digit:]+)` + +special_float_constant = `[-+]?(nan|inf|infinity)` + +float_constant = dec_float_constant | hex_float_constant | special_float_constant + +boolean_constant = `true` | `false` +``` diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 00000000000..269fda309b1 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,59 @@ +# Overview + +FlatBuffers is an efficient cross platform serialization library for C++, C#, C, +Go, Java, Kotlin, JavaScript, Lobster, Lua, TypeScript, PHP, Python, Rust and +Swift. It was originally created at Google for game development and other +performance-critical applications. + +It is available as Open Source on +[GitHub](https://github.com/google/flatbuffers) under the Apache license v2.0. + +## Why Use FlatBuffers? + +
+ +- :material-clock-fast:{ .lg .middle } **Access to serialized data without + parsing/unpacking** + + --- + Access the data directly without unpacking or parsing. + +- :material-memory:{ .lg .middle } **Memory Efficiency and Speed** + + --- + The only memory needed to access your data is that of the buffer. No heap is + required. + +- :material-compare-horizontal:{ .lg .middle } **Backwards and Forwards + Compatibility** + + --- + The only memory needed to access your data is that of the buffer. No heap is + required. + +- :material-scale-off:{ .lg .middle } **Small Footprint** + + --- + Minimal dependencies and small code footprint. + +
+ +## Why not use... + +=== "Protocol Buffers" + + Protocol Buffers is indeed relatively similar to FlatBuffers, with the primary + difference being that FlatBuffers does not need a parsing/unpacking step to a + secondary representation before you can access data, often coupled with + per-object memory allocation. The code is an order of magnitude bigger, too. + +=== "JSON" + + JSON is very readable (which is why we use it as our optional text format) and + very convenient when used together with dynamically typed languages (such as + JavaScript). When serializing data from statically typed languages, however, + JSON not only has the obvious drawback of runtime inefficiency, but also forces + you to write more code to access data (counterintuitively) due to its + dynamic-typing serialization system. In this context, it is only a better choice + for systems that have very little to no information ahead of time about what + data needs to be stored. diff --git a/docs/source/schema.md b/docs/source/schema.md new file mode 100644 index 00000000000..379ceac6f63 --- /dev/null +++ b/docs/source/schema.md @@ -0,0 +1,650 @@ +# Schema + +The syntax of the schema language (aka IDL, +[Interface Definition Language](https://en.wikipedia.org/wiki/Interface_description_language)) +should look quite familiar to users of any of the C family of languages, and +also to users of other IDLs. Let's look at an example first: + +```c title="monster.fbs" linenums="1" +// example IDL file + +namespace MyGame; + +attribute "priority"; + +enum Color : byte { Red = 1, Green, Blue } + +union Any { Monster, Weapon, Pickup } + +struct Vec3 { + x:float; + y:float; + z:float; +} + +table Monster { + pos:Vec3; + mana:short = 150; + hp:short = 100; + name:string; + friendly:bool = false (deprecated, priority: 1); + inventory:[ubyte]; + color:Color = Blue; + test:Any; +} + +table Weapon {} +table Pickup {} + +root_type Monster; +``` + +## Tables + +Tables are the main way of defining objects in FlatBuffers. + +```c title="monster.fbs - Example Table" linenums="17" +table Monster { + pos:Vec3; + mana:short = 150; + hp:short = 100; + name:string; + friendly:bool = false (deprecated, priority: 1); + inventory:[ubyte]; + color:Color = Blue; + test:Any; +} +``` + +They consist of a name (here `Monster`) and a list of [fields](#fields). This +field list can be appended to (and deprecated from) while still maintaining +compatibility. + +### Fields + +Table fields have a name identifier, a [type](#types), optional default value, +optional [attributes](#attributes) and ends with a `;`. See the +[grammer](grammer.md) for full details. + +```ebnf +field_decl = ident `:` type [ `=` scalar ] metadata `;` +``` + +Fields do not have to appear in the wire representation, and you can choose to +omit fields when constructing an object. You have the flexibility to add fields +without fear of bloating your data. This design is also FlatBuffer's mechanism +for forward and backwards compatibility. + +There are three, mutually exclusive, reactions to the non-presence of a table's +field in the binary data. + +#### 1. Default + +Default value fields with return the default value as defined in the schema. If +the default value is not specified in the schema, it will be `0` for scalar +types, or `null` for other types. + +```c++ +mana:short = 150; +hp:short; +inventory:[ubyte]; +``` + +Here `mana` would default to the value `150`, `hp` to value `0`, and `inventory` +to `null`, if those fields are not set. + +Only scalar values can have explicit defaults, non-scalar fields (strings, +vectors, tables) are `null` when not present. + +This is the normal mode that fields will take. + +??? danger "Don't change Default values" + + You generally do not want to change default values after they're initially + defined. Fields that have the default value are not actually stored in the + serialized data (see also Gotchas below). Values explicitly written by code + generated by the old schema old version, if they happen to be the default, will + be read as a different value by code generated with the new schema. This is + slightly less bad when converting an optional scalar into a default valued + scalar since non-presence would not be overloaded with a previous default value. + There are situations, however, where this may be desirable, especially if you + can ensure a simultaneous rebuild of all code. + +#### 2. Optional + +Optional value fields will return some form of `null` in the language generated. + +=== "C++" + + ```c++ + std::optional field; + ``` + +For optional scalars, just set the field default value to `null`. If the +producer of the buffer does not explicitly set that field, it will be marked +`null`. + +```c++ + hp:short = null; +``` + +!!! note + + Not every languages support scalar defaults yet + +#### 3. Required + +Required valued fields will cause an error if they are not set. The FlatBuffers +verifier would consider the whole buffer invalid. + +This is enabled by the [`required` attribute](#required-1) on the field. + +``` + hp:short (required) +``` + +You cannot have `required` set with an explicit default value, it will result in +a compiler error. + +## Structs + +Similar to a table, `structs` consist of fields are required (so no defaults +either), and fields may not be added or be deprecated. + +```c title="monster.fbs - Example Struct" linenums="11" +struct Vec3 { + x:float; + y:float; + z:float; +} +``` + +Structs may only contain scalars or other structs. Use this for simple objects +where you are very sure no changes will ever be made (as quite clear in the +example `Vec3`). Structs use less memory than tables and are even faster to +access (they are always stored in-line in their parent object, and use no +virtual table). + +### Arrays + +Arrays are a convenience short-hand for a fixed-length collection of elements. +Arrays allow the following syntax, while maintaining binary equivalency. + +
+ +- **Normal Syntax** + + === + + ```c++ + struct Vec3 { + x:float; + y:float; + z:float; + } + ``` + +- **Array Syntax** + + === + + ```c++ + struct Vec3 { + v:[float:3]; + } + ``` + +
+ +Arrays are currently only supported in a `struct`. + +## Types + +The following are the built-in types that can be used in FlatBuffers. + +### Scalars + +The standard assortment of fixed sized scalars are available. There are no +variable sized integers (e.g., `varints`). + +| Size | Signed | Unsigned | Floating Point | +| ------ | ----------------- | ------------------- | -------------------- | +| 8-bit | `byte`, `bool` | `ubyte` | | +| 16-bit | `short` (`int16`) | `ushort` (`uint16`) | +| 32-bit | `int` (`int32`) | `uint` (`uint32`) | `float` (`float32`) | +| 64-bit | `long` (`int64`) | `ulong` (`uint64`) | `double` (`float64`) | + +!!! note "Alias Types" + + The type names in parentheses are alias names such that for example `uint8` + can be used in place of `ubyte`, and `int32` can be used in place of `int` + without affecting code generation. + +### Non-scalars + +#### Vectors + +Vector of any other type (denoted with `[type]`). + +```c++ +inventory:[ubyte]; +``` + +!!! note "Nesting vectors" + + Nesting vectors is not supported, instead you can wrap the inner vector with + a table. + + ``` + table nest{ + a:[ubyte] + } + + table monster { + a:[nest] + } + ``` + +#### Strings + +Strings (indicated by `string`) are zero-terminated strings, prefixed by their +length. Strings may only hold UTF-8 or 7-bit ASCII. For other text encodings or +general binary data use vectors (`[byte]` or `[ubyte]`) instead. + +```c++ +name:string; +``` + +## Enums + +Define a sequence of named constants, each with a given value, or increasing by +one from the previous one. The default first value is `0`. As you can see in the +enum declaration, you specify the underlying integral type of the enum with `:` +(in this case `byte`), which then determines the type of any fields declared +with this enum type. + +Only integer types are allowed, i.e. `byte`, `ubyte`, `short` `ushort`, `int`, +`uint`, `long` and `ulong`. + +Typically, enum values should only ever be added, never removed (there is no +deprecation for enums). This requires code to handle forwards compatibility +itself, by handling unknown enum values. + +## Unions + +Unions share a lot of properties with enums, but instead of new names for +constants, you use names of tables. You can then declare a union field, which +can hold a reference to any of those types, and additionally a field with the +suffix `_type` is generated that holds the corresponding enum value, allowing +you to know which type to cast to at runtime. + +It's possible to give an alias name to a type union. This way a type can even be +used to mean different things depending on the name used: + +```txt +table PointPosition { x:uint; y:uint; } +table MarkerPosition {} +union Position { + Start:MarkerPosition, + Point:PointPosition, + Finish:MarkerPosition +} +``` + +Unions contain a special `NONE` marker to denote that no value is stored so that +name cannot be used as an alias. + +Unions are a good way to be able to send multiple message types as a FlatBuffer. +Note that because a union field is really two fields, it must always be part of +a table, it cannot be the root of a FlatBuffer by itself. + +If you have a need to distinguish between different FlatBuffers in a more +open-ended way, for example for use as files, see the file identification +feature below. + +There is an experimental support only in C++ for a vector of unions (and types). +In the example IDL file above, use [Any] to add a vector of Any to Monster +table. There is also experimental support for other types besides tables in +unions, in particular structs and strings. There's no direct support for scalars +in unions, but they can be wrapped in a struct at no space cost. + +## Namespaces + +These will generate the corresponding namespace in C++ for all helper code, and +packages in Java. You can use `.` to specify nested namespaces / packages. + +## Includes + +You can include other schemas files in your current one, e.g.: + +```txt +include "mydefinitions.fbs"; +``` + +This makes it easier to refer to types defined elsewhere. `include` +automatically ensures each file is parsed just once, even when referred to more +than once. + +When using the `flatc` compiler to generate code for schema definitions, only +definitions in the current file will be generated, not those from the included +files (those you still generate separately). + +## Root type + +This declares what you consider to be the root table of the serialized data. +This is particularly important for parsing JSON data, which doesn't include +object type information. + +## File identification and extension + +Typically, a FlatBuffer binary buffer is not self-describing, i.e. it needs you +to know its schema to parse it correctly. But if you want to use a FlatBuffer as +a file format, it would be convenient to be able to have a "magic number" in +there, like most file formats have, to be able to do a sanity check to see if +you're reading the kind of file you're expecting. + +Now, you can always prefix a FlatBuffer with your own file header, but +FlatBuffers has a built-in way to add an identifier to a FlatBuffer that takes +up minimal space, and keeps the buffer compatible with buffers that don't have +such an identifier. + +You can specify in a schema, similar to `root_type`, that you intend for this +type of FlatBuffer to be used as a file format: + +```txt +file_identifier "MYFI"; +``` + +Identifiers must always be exactly 4 characters long. These 4 characters will +end up as bytes at offsets 4-7 (inclusive) in the buffer. + +For any schema that has such an identifier, `flatc` will automatically add the +identifier to any binaries it generates (with `-b`), and generated calls like +`FinishMonsterBuffer` also add the identifier. If you have specified an +identifier and wish to generate a buffer without one, you can always still do so +by calling `FlatBufferBuilder::Finish` explicitly. + +After loading a buffer, you can use a call like `MonsterBufferHasIdentifier` to +check if the identifier is present. + +Note that this is best for open-ended uses such as files. If you simply wanted +to send one of a set of possible messages over a network for example, you'd be +better off with a union. + +Additionally, by default `flatc` will output binary files as `.bin`. This +declaration in the schema will change that to whatever you want: + +```txt +file_extension "ext"; +``` + +## RPC interface declarations + +You can declare RPC calls in a schema, that define a set of functions that take +a FlatBuffer as an argument (the request) and return a FlatBuffer as the +response (both of which must be table types): + +```txt +rpc_service MonsterStorage { + Store(Monster):StoreResponse; + Retrieve(MonsterId):Monster; +} +``` + +What code this produces and how it is used depends on language and RPC system +used, there is preliminary support for GRPC through the `--grpc` code generator, +see `grpc/tests` for an example. + +## Comments & documentation + +May be written as in most C-based languages. Additionally, a triple comment +(`///`) on a line by itself signals that a comment is documentation for whatever +is declared on the line after it (table/struct/field/enum/union/element), and +the comment is output in the corresponding C++ code. Multiple such lines per +item are allowed. + +## Attributes + +Attributes may be attached to a declaration, behind a field/enum value, or after +the name of a table/struct/enum/union. These may either have a value or not. +Some attributes like `deprecated` are understood by the compiler; user defined +ones need to be declared with the attribute declaration (like `priority` in the +example above), and are available to query if you parse the schema at runtime. +This is useful if you write your own code generators/editors etc., and you wish +to add additional information specific to your tool (such as a help text). + +Current understood attributes: + +- `id: n` (on a table field): manually set the field identifier to `n`. If you + use this attribute, you must use it on ALL fields of this table, and the + numbers must be a contiguous range from 0 onwards. Additionally, since a union + type effectively adds two fields, its id must be that of the second field (the + first field is the type field and not explicitly declared in the schema). For + example, if the last field before the union field had id 6, the union field + should have id 8, and the unions type field will implicitly be 7. IDs allow + the fields to be placed in any order in the schema. When a new field is added + to the schema it must use the next available ID. +- `deprecated` (on a field): do not generate accessors for this field anymore, + code should stop using this data. Old data may still contain this field, but + it won't be accessible anymore by newer code. Note that if you deprecate a + field that was previous required, old code may fail to validate new data (when + using the optional verifier). + +### `required` + +- `required` (on a non-scalar table field): this field must always be set. By + default, fields do not need to be present in the binary. This is desirable, as + it helps with forwards/backwards compatibility, and flexibility of data + structures. By specifying this attribute, you make non- presence in an error + for both reader and writer. The reading code may access the field directly, + without checking for null. If the constructing code does not initialize this + field, they will get an assert, and also the verifier will fail on buffers + that have missing required fields. Both adding and removing this attribute may + be forwards/backwards incompatible as readers will be unable read old or new + data, respectively, unless the data happens to always have the field set. +- `force_align: size` (on a struct): force the alignment of this struct to be + something higher than what it is naturally aligned to. Causes these structs to + be aligned to that amount inside a buffer, IF that buffer is allocated with + that alignment (which is not necessarily the case for buffers accessed + directly inside a `FlatBufferBuilder`). Note: currently not guaranteed to have + an effect when used with `--object-api`, since that may allocate objects at + alignments less than what you specify with `force_align`. +- `force_align: size` (on a vector): force the alignment of this vector to be + something different than what the element size would normally dictate. Note: + Now only work for generated C++ code. +- `bit_flags` (on an unsigned enum): the values of this field indicate bits, + meaning that any unsigned value N specified in the schema will end up + representing 1< weapon_one_name = builder.CreateString("Sword"); + flatbuffers::Offset weapon_two_name = builder.CreateString("Axe"); + ``` + +=== "C#" + + ```c# + Offset weaponOneName = builder.CreateString("Sword"); + Offset weaponTwoName = builder.CreateString("Axe"); + ``` + +This performs the actual serialization (the string data is copied into the +backing array) and returns an offset. Think of the offset as a handle to that +reference. It's just a "typed" numerical offset to where that data resides in +the buffer. + +#### Tables + +Now that we have some names serialized, we can serialize `Weapons`. Here we will +use one of the generated helper functions that was emitted by `flatc`. The +`CreateWeapon` function takes in the Builder object, as well as the offset to +the weapon's name and a numerical value for the damage field. + +=== "C++" + + ```c++ + short weapon_one_damage = 3; + short weapon_two_damage = 5; + + // Use the `CreateWeapon()` shortcut to create Weapons with all the fields set. + flatbuffers::Offset sword = + CreateWeapon(builder, weapon_one_name, weapon_one_damage); + flatbuffers::Offset axe = + CreateWeapon(builder, weapon_two_name, weapon_two_damage); + ``` + +=== "C#" + + ```c# + short weaponOneDamage = 3; + short weaponTwoDamage = 5; + + // Use the `CreateWeapon()` helper function to create the weapons, since we set every field. + Offset sword = + Weapon.CreateWeapon(builder, weaponOneName, weaponOneDamage); + Offset axe = + Weapon.CreateWeapon(builder, weaponTwoName, weaponTwoDamage); + ``` + +!!! Tip + + The generated functions from `flatc`, like `CreateWeapon`, are just composed + of various Builder API methods. So its not required to use the generated + code, but it does make things much simpler and compact. + +Just like the `CreateString` methods, the table serialization functions return +an offset to the location of the serialized `Weapon` table. + +Now that we have some `Weapons` serialized, we can serialize a `Monster`. +Looking at the schema again, this table has a lot more fields of various types. +Some of these need to be serialized beforehand, for the same reason we +serialized the name string before the weapon table. + +!!! note inline end + + There is no prescribed ordering of which table fields must be serialized + first, you could serialize in any order you want. You can also not serialize + a field to provide a `null` value, this is done by using an 0 offset value. + +```c title="monster.fbs" linenums="15" +table Monster { + pos:Vec3; + mana:short = 150; + hp:short = 100; + name:string; + friendly:bool = false (deprecated); + inventory:[ubyte]; + color:Color = Blue; + weapons:[Weapon]; + equipped:Equipment; + path:[Vec3]; +} +``` + +#### Vectors + +The `weapons` field is a `vector` of `Weapon` tables. We already have two +`Weapons` serialized, so we just need to serialize a `vector` of those offsets. +The Builder provides multiple ways to create `vectors`. + +=== "C++" + + ```c++ + // Create a std::vector of the offsets we had previous made. + std::vector> weapons_vector; + weapons_vector.push_back(sword); + weapons_vector.push_back(axe); + + // Then serialize that std::vector into the buffer and again get an Offset + // to that vector. Use `auto` here since the full type is long, and it just + // a "typed" number. + auto weapons = builder.CreateVector(weapons_vector); + ``` + +=== "C#" + + ```c# + // Create an array of the two weapon offsets. + var weaps = new Offset[2]; + weaps[0] = sword; + weaps[1] = axe; + + // Pass the `weaps` array into the `CreateWeaponsVector()` method to create + // a FlatBuffer vector. + var weapons = Monster.CreateWeaponsVector(builder, weaps); + ``` + +While we are at it, let us serialize the other two vector fields: the +`inventory` field is just a vector of scalars, and the `path` field is a vector +of structs (which are scalar data as well). So these vectors can be serialized a +bit more directly. + +=== "C++" + + ```c++ + // Construct an array of two `Vec3` structs. + Vec3 points[] = { Vec3(1.0f, 2.0f, 3.0f), Vec3(4.0f, 5.0f, 6.0f) }; + + // Serialize it as a vector of structs. + flatbuffers::Offset> path = + builder.CreateVectorOfStructs(points, 2); + + // Create a `vector` representing the inventory of the Orc. Each number + // could correspond to an item that can be claimed after he is slain. + unsigned char treasure[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + flatbuffers::Offset> inventory = + builder.CreateVector(treasure, 10); + + ``` + +=== "C#" + + ```c# + // Start building a path vector of length 2. + Monster.StartPathVector(fbb, 2); + + // Serialize the individual Vec3 structs + Vec3.CreateVec3(builder, 1.0f, 2.0f, 3.0f); + Vec3.CreateVec3(builder, 4.0f, 5.0f, 6.0f); + + // End the vector to get the offset + Offset> path = fbb.EndVector(); + + // Create a `vector` representing the inventory of the Orc. Each number + // could correspond to an item that can be claimed after he is slain. + // Note: Since we prepend the bytes, this loop iterates in reverse order. + Monster.StartInventoryVector(builder, 10); + for (int i = 9; i >= 0; i--) + { + builder.AddByte((byte)i); + } + Offset> inventory = builder.EndVector(); + ``` + +#### Unions + +The last non-scalar data for the `Monster` table is the `equipped` `union` +field. For this case, we will reuse an already serialized `Weapon` (the only +type in the union), without needing to reserialize it. Union fields implicitly +add a hidden `_type` field that stores the type of value stored in the union. +When serializing a union, you must explicitly set this type field, along with +providing the union value. + +We will also serialize the other scalar data at the same time, since we have all +the necessary values and Offsets to make a `Monster`. + +=== "C++" + + ```c++ + // Create the remaining data needed for the Monster. + auto name = builder.CreateString("Orc"); + + // Create the position struct + auto position = Vec3(1.0f, 2.0f, 3.0f); + + // Set his hit points to 300 and his mana to 150. + int hp = 300; + int mana = 150; + + // Finally, create the monster using the `CreateMonster` helper function + // to set all fields. + // + // Here we set the union field by using the `.Union()` method of the + // `Offset` axe we already serialized above. We just have to specify + // which type of object we put in the union, and do that with the + // auto-generated `Equipment_Weapon` enum. + flatbuffers::Offset orc = + CreateMonster(builder, &position, mana, hp, name, inventory, + Color_Red, weapons, Equipment_Weapon, axe.Union(), + path); + + ``` + +=== "C#" + + ```c# + // Create the remaining data needed for the Monster. + var name = builder.CreateString("Orc"); + + // Create our monster using `StartMonster()` and `EndMonster()`. + Monster.StartMonster(builder); + Monster.AddPos(builder, Vec3.CreateVec3(builder, 1.0f, 2.0f, 3.0f)); + Monster.AddHp(builder, (short)300); + Monster.AddName(builder, name); + Monster.AddInventory(builder, inv); + Monster.AddColor(builder, Color.Red); + Monster.AddWeapons(builder, weapons); + // For union fields, we explicitly add the auto-generated enum for the type + // of value stored in the union. + Monster.AddEquippedType(builder, Equipment.Weapon); + // And we just use the `.Value` property of the already serialized axe. + Monster.AddEquipped(builder, axe.Value); // Axe + Monster.AddPath(builder, path); + Offset orc = Monster.EndMonster(builder); + ``` + +!!! warning + + When serializing tables, you must fully serialize it before attempting to + serialize another reference type. If you try to serialize in a nested + manner, you will get an assert/exception/panic depending on your language. + +### Finishing + +At this point, we have serialized a `Monster` we've named "orc" to the +flatbuffer and have its offset. The `root_type` of the schema is also a +`Monster`, so we have everything we need to finish the serialization step. + +This is done by calling the appropriate `finish` method on the Builder, passing +in the orc offset to indicate this `table` is the "entry" point when +deserializing the buffer later. + +=== "C++" + + ```c++ + // Call `Finish()` to instruct the builder that this monster is complete. + // You could also call `FinishMonsterBuffer(builder, orc);` + builder.Finish(orc); + ``` + +=== "C#" + + ```c# + // Call `Finish()` to instruct the builder that this monster is complete. + // You could also call `Monster.FinishMonsterBuffer(builder, orc);` + builder.Finish(orc.Value); + ``` + +Once you finish a Builder, you can no longer serialize more data to it. + +#### Buffer Access + +The flatbuffer is now ready to be stored somewhere, sent over the network, +compressed, or whatever you would like to do with it. You access the raw buffer +like so: + +=== "C++" + + ```c++ + // This must be called after `Finish()`. + uint8_t *buf = builder.GetBufferPointer(); + + // Returns the size of the buffer that `GetBufferPointer()` points to. + int size = builder.GetSize(); + ``` + +=== "C#" + + ```c# + // This must be called after `Finish()`. + // + // The data in this ByteBuffer does NOT start at 0, but at buf.Position. + // The end of the data is marked by buf.Length, so the size is + // buf.Length - buf.Position. + FlatBuffers.ByteBuffer dataBuffer = builder.DataBuffer; + + // Alternatively this copies the above data out of the ByteBuffer for you: + byte[] buf = builder.SizedByteArray(); + ``` + +Now you can write the bytes to a file or send them over the network. The buffer +stays valid until the Builder is cleared or destroyed. + +!!! warning "BINARY Mode" + + Make sure your file mode (or transfer protocol) is set to BINARY, and not + TEXT. If you try to transfer a flatbuffer in TEXT mode, the buffer will be + corrupted and be hard to diagnose. + +## Deserialization + +!!! note "Misnomer" + + Deserialization is a bit of a misnomer, since FlatBuffers doesn't + deserialize the whole buffer when accessed. It just "decodes" the data that + is requested, leaving all the other data untouched. It is up to the + application to decide if the data is copied out or even read in the first + place. However, we continue to use the word `deserialize` to mean accessing + data from a binary flatbuffer. + +Now that we have successfully create an orc FlatBuffer, the data can be saved, +sent over a network, etc. At some point, the buffer will be accessed to obtain +the underlying data. + +The same application setup used for serialization is needed for deserialization +(see [application integration](#application-integration)). + +### Root Access + +All access to the data in the flatbuffer must first go through the root object. +There is only one root object per flatbuffer. The generated code provides +functions to get the root object given the buffer. + +=== "C++" + + ```c++ + uint8_t *buffer_pointer = /* the data you just read */; + + // Get an view to the root object inside the buffer. + Monster monster = GetMonster(buffer_pointer); + ``` + +=== "C#" + + ```c# + byte[] bytes = /* the data you just read */ + + // Get an view to the root object inside the buffer. + Monster monster = Monster.GetRootAsMonster(new ByteBuffer(bytes)); + ``` + +!!! warning "BINARY mode" + + Again, make sure you read the bytes in BINARY mode, otherwise the buffer may + be corrupted. + +In most languages, the returned object is just a "view" of the data with helpful +accessors. Data is typically not copied out of the backing buffer. This also +means the backing buffer must remain alive for the duration of the views. + +### Table Access + +If you look in the generated files emitted by `flatc`, you will see it generated +, for each `table`, accessors of all its non-`deprecated` fields. For example, +some of the accessors of the `Monster` root table would look like: + +=== "C++" + + ```c++ + auto hp = monster->hp(); + auto mana = monster->mana(); + auto name = monster->name()->c_str(); + ``` + +=== "C#" + + ```c# + // For C#, unlike most other languages support by FlatBuffers, most values + // (except for vectors and unions) are available as properties instead of + // accessor methods. + var hp = monster.Hp; + var mana = monster.Mana; + var name = monster.Name; + ``` + +These accessors should hold the values `300`, `150`, and `"Orc"` respectively. + +!!! note "Default Values" + + The default value of `150` wasn't stored in the `mana` field, but we are + still able to retrieve it. That is because the generated accessors return a + hard-coded default value when it doesn't find the value in the buffer. + +#### Nested Object Access + +Accessing nested objects is very similar, with the nested field pointing to +another object type. Be careful, the field could be `null` if not present. + +For example, accessing the `pos` `struct`, which is type `Vec3` you would do: + +=== "C++" + + ```c++ + auto pos = monster->pos(); + auto x = pos->x(); + auto y = pos->y(); + auto z = pos->z(); + ``` + +=== "C#" + + ```c# + var pos = monster.Pos.Value; + var x = pos.X; + var y = pos.Y; + var z = pos.Z; + ``` + +Where `x`, `y`, and `z` will contain `1.0`, `2.0`, and `3.0` respectively. + +### Vector Access + +Similarly, we can access elements of the `inventory` `vector` by indexing it. +You can also iterate over the length of the vector. + +=== "C++" + + ```c++ + flatbuffers::Vector inv = monster->inventory(); + auto inv_len = inv->size(); + auto third_item = inv->Get(2); + ``` + +=== "C#" + + ```c# + int invLength = monster.InventoryLength; + var thirdItem = monster.Inventory(2); + ``` + +For vectors of tables, you can access the elements like any other vector, except +you need to handle the result as a FlatBuffer table. Here we iterate over the +`weapons` vector that is houses `Weapon` `tables`. + +=== "C++" + + ```c++ + flatbuffers::Vector weapons = monster->weapons(); + auto weapon_len = weapons->size(); + auto second_weapon_name = weapons->Get(1)->name()->str(); + auto second_weapon_damage = weapons->Get(1)->damage() + ``` + +=== "C#" + + ```c# + int weaponsLength = monster.WeaponsLength; + var secondWeaponName = monster.Weapons(1).Name; + var secondWeaponDamage = monster.Weapons(1).Damage; + ``` + +### Union Access + +Lastly , we can access our `equipped` `union` field. Just like when we created +the union, we need to get both parts of the union: the type and the data. + +We can access the type to dynamically cast the data as needed (since the union +only stores a FlatBuffer `table`). + +=== "C++" + + ```c++ + auto union_type = monster.equipped_type(); + + if (union_type == Equipment_Weapon) { + // Requires `static_cast` to type `const Weapon*`. + auto weapon = static_cast(monster->equipped()); + + auto weapon_name = weapon->name()->str(); // "Axe" + auto weapon_damage = weapon->damage(); // 5 + } + ``` + +=== "C#" + + ```c# + var unionType = monster.EquippedType; + + if (unionType == Equipment.Weapon) { + var weapon = monster.Equipped().Value; + + var weaponName = weapon.Name; // "Axe" + var weaponDamage = weapon.Damage; // 5 + } + ```