From 733b4e3a9f30d1c4b886d594760d5d75efa5fac5 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 16:06:06 +0100 Subject: [PATCH 1/6] Add compression to object store This commit also cleans up formatting of the spec. Signed-off-by: Tomasz Pietrek --- adr/ADR-20.md | 85 ++++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/adr/ADR-20.md b/adr/ADR-20.md index 2369a40c..5002bc62 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -12,6 +12,7 @@ |--------|----|------|----| |1 |2021-11-03|@scottf|Initial design| |2 |2023-06-14|@Jarema|Add metadata| +|3 |2024-02-05|@Jarema|Add Compression| ## Context @@ -21,16 +22,17 @@ This document describes a design of a JetStream backed object store. This ADR is We intend to hit a basic initial feature set as below, with some future facing goals as indicated: -Initial feature list: +Current feature list: - Represent an object store. - Store a large quantity of related bytes in chunks as a single object. - Retrieve all the bytes from a single object - Store metadata regarding each object -- Store multiple objects in a single store +- Store multiple objects in a single store - Ability to specify chunk size - Ability to delete an object -- Ability to understand the state of the object store. +- Ability to understand the state of the object store +- Store compression (via Stream compression) Possible future features @@ -39,10 +41,10 @@ Possible future features - Archiving (tiered storage) - Searching/Indexing (tagging) - Versioning / Revisions -- Overriding digest algorithm +- Overriding digest algorithm - Capturing Content-Type (mime type) - Per chunk Content-Encoding (i.e. gzip) -- Read an individual chunk. +- Read an individual chunk. ## Basic Design @@ -57,7 +59,7 @@ Possible future features Protocol Naming Conventions are fully defined in [ADR-6](ADR-6.md) ### Object Store -The object store name or bucket name (`bucket`) will be used to formulate a stream name +The object store name or bucket name (`bucket`) will be used to formulate a stream name and is specified as: `restricted-term` (1 or more of `A-Z, a-z, 0-9, dash, underscore`) ### Object Id @@ -71,9 +73,9 @@ Currently `SHA-256` is the only supported digest. Please use the uppercase form when specifying the digest as in `SHA-256=IdgP4UYMGt47rgecOqFoLrd24AXukHf5-SVzqQ5Psg8=`. ### Modified Time -Modified time is never stored. +Modified time is never stored. * When putting an object or link into the store, the client should populate the ModTime with the current UTC time before returning it to the user. -* When getting an object or getting an object or link's info, the client should populate the ModTime with message time from the server. +* When getting an object or getting an object or link's info, the client should populate the ModTime with message time from the server. ### Default Settings @@ -98,6 +100,7 @@ type ObjectStoreConfig struct { Storage StorageType // stream storate_type Replicas int // stream replicas Placement Placement // stream placement + Compression bool // stream compression } ``` @@ -132,7 +135,8 @@ type ObjectStoreConfig struct { "placement": { "cluster": "clstr", "tags": ["tag1", "tag2"] - } + }, + compression: true } ``` @@ -144,7 +148,7 @@ type ObjectStoreConfig struct { type ObjectLink struct { // Bucket is the name of the other object store. Bucket string `json:"bucket"` - + // Name can be used to link to a single object. // If empty means this is a link to the whole store, like a directory. Name string `json:"name,omitempty"` @@ -160,7 +164,7 @@ type ObjectMetaOptions struct { } ``` -### ObjectMeta +### ObjectMeta Object Meta is high level information about an object. @@ -176,31 +180,31 @@ type ObjectMeta struct { } ``` -### ObjectInfo +### ObjectInfo -Object Info is meta plus instance information. -The fields in ObjectMeta are serialized in line as if they were -direct fields of ObjectInfo +Object Info is meta plus instance information. +The fields in ObjectMeta are serialized in line as if they were +direct fields of ObjectInfo ```go type ObjectInfo struct { ObjectMeta - + Bucket string `json:"bucket"` - + NUID string `json:"nuid"` - + // the total object size in bytes Size uint64 `json:"size"` - + ModTime time.Time `json:"mtime"` - + // the total number of chunks Chunks uint32 `json:"chunks"` - + // as in http, = Digest string `json:"digest,omitempty"` - + Deleted bool `json:"deleted,omitempty"` } ``` @@ -248,37 +252,40 @@ The status of an object type ObjectStoreStatus interface { // Bucket is the name of the bucket Bucket() string - + // Description is the description supplied when creating the bucket Description() string // Bucket-level metadata Metadata() map[string]string - + // TTL indicates how long objects are kept in the bucket TTL() time.Duration - + // Storage indicates the underlying JetStream storage technology used to store data Storage() StorageType - + // Replicas indicates how many storage replicas are kept for the data in the bucket Replicas() int - + // Sealed indicates the stream is sealed and cannot be modified in any way Sealed() bool - + // Size is the combined size of all data in the bucket including metadata, in bytes Size() uint64 - - // BackingStore provides details about the underlying storage. + + // BackingStore provides details about the underlying storage. // Currently the only supported value is `JetStream` BackingStore() string -} + + // IsCompressed indicates if the data is compressed on disk + IsCompressed() bool +} ``` ## Functional Interfaces -### ObjectStoreManager +### ObjectStoreManager Object Store Manager creates, loads and deletes Object Stores @@ -295,7 +302,7 @@ CreateObjectStore(cfg ObjectStoreConfig) -> ObjectStore DeleteObjectStore(bucket string) ``` -### ObjectStore +### ObjectStore Storing large objects efficiently. API are required unless noted as "Optional/Convenience". @@ -320,7 +327,7 @@ PutFile(file [string/file reference]) -> ObjectInfo _Notes_ On convenience methods accepting file information only, consider that the reference could have -operating specific path information that is not transferable. One solution would be to only +operating specific path information that is not transferable. One solution would be to only use the actual file name as the object name and discard any path information. **Get** @@ -347,8 +354,8 @@ GetFile(name string, file string) **GetInfo** -GetInfo will retrieve the current information for the object. -* Do not return info for deleted objects, except with optional convenience methods. +GetInfo will retrieve the current information for the object. +* Do not return info for deleted objects, except with optional convenience methods. ``` GetInfo(name string) -> ObjectInfo @@ -424,11 +431,11 @@ Status() -> ObjectStoreStatus ### ObjectStore Links -Links are currently under discussion whether they are necessary. +Links are currently under discussion whether they are necessary. Here is the required API as proposed. -Please note that in this version of the api, it is possible that +Please note that in this version of the api, it is possible that `obj ObjectInfo` or `bucket ObjectStore` could be stale, meaning their state -has changed since they were read, i.e. the object was deleted after it's info was read. +has changed since they were read, i.e. the object was deleted after it's info was read. **AddLink** From 317072a5f8c250f1c1d68d5f13c0116eb8f63431 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 16:15:24 +0100 Subject: [PATCH 2/6] Add Object Store to spec list Signed-off-by: Tomasz Pietrek --- README.md | 7 ++++--- adr/ADR-20.md | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bdcf2802..86bd4821 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ This repository captures Architecture, Design Specifications and Feature Guidanc |[ADR-17](adr/ADR-17.md)|jetstream, client|Ordered Consumer| |[ADR-18](adr/ADR-18.md)|client|URL support for all client options| |[ADR-19](adr/ADR-19.md)|jetstream, client, kv, objectstore|API prefixes for materialized JetStream views| -|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore|JetStream based Object Stores| +|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore, spec|JetStream based Object Stores| |[ADR-21](adr/ADR-21.md)|client|NATS Configuration Contexts| |[ADR-22](adr/ADR-22.md)|jetstream, client|JetStream Publish Retries on No Responders| |[ADR-31](adr/ADR-31.md)|jetstream, client, server|JetStream Direct Get| @@ -51,7 +51,7 @@ This repository captures Architecture, Design Specifications and Feature Guidanc |[ADR-15](adr/ADR-15.md)|jetstream, client|JetStream Subscribe Workflow| |[ADR-17](adr/ADR-17.md)|jetstream, client|Ordered Consumer| |[ADR-19](adr/ADR-19.md)|jetstream, client, kv, objectstore|API prefixes for materialized JetStream views| -|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore|JetStream based Object Stores| +|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore, spec|JetStream based Object Stores| |[ADR-22](adr/ADR-22.md)|jetstream, client|JetStream Publish Retries on No Responders| |[ADR-28](adr/ADR-28.md)|jetstream, server|JetStream RePublish| |[ADR-31](adr/ADR-31.md)|jetstream, client, server|JetStream Direct Get| @@ -72,7 +72,7 @@ This repository captures Architecture, Design Specifications and Feature Guidanc |Index|Tags|Description| |-----|----|-----------| |[ADR-19](adr/ADR-19.md)|jetstream, client, kv, objectstore|API prefixes for materialized JetStream views| -|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore|JetStream based Object Stores| +|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore, spec|JetStream based Object Stores| ## Observability @@ -117,6 +117,7 @@ This repository captures Architecture, Design Specifications and Feature Guidanc |Index|Tags|Description| |-----|----|-----------| |[ADR-8](adr/ADR-8.md)|jetstream, client, kv, spec|JetStream based Key-Value Stores| +|[ADR-20](adr/ADR-20.md)|jetstream, client, objectstore, spec|JetStream based Object Stores| |[ADR-32](adr/ADR-32.md)|client, spec|Service API| |[ADR-37](adr/ADR-37.md)|jetstream, client, spec|JetStream Simplification| |[ADR-40](adr/ADR-40.md)|client, server, spec|NATS Connection| diff --git a/adr/ADR-20.md b/adr/ADR-20.md index 5002bc62..e0d52e1d 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -5,7 +5,7 @@ |Date |2021-11-03| |Author |@scottf| |Status |Partially Implemented| -|Tags |jetstream, client, objectstore| +|Tags |jetstream, client, objectstore, spec| |Revision|Date|Author|Info| From 011ba2d85aae247ab9a1a7cd711c560660ca01f0 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 16:30:29 +0100 Subject: [PATCH 3/6] Fix indentation, add comment about interface method name Signed-off-by: Tomasz Pietrek --- adr/ADR-20.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/adr/ADR-20.md b/adr/ADR-20.md index e0d52e1d..1bc88be1 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -32,7 +32,7 @@ Current feature list: - Ability to specify chunk size - Ability to delete an object - Ability to understand the state of the object store -- Store compression (via Stream compression) +- Data Compression of Object Stores for NATS Server 2.10 Possible future features @@ -100,7 +100,7 @@ type ObjectStoreConfig struct { Storage StorageType // stream storate_type Replicas int // stream replicas Placement Placement // stream placement - Compression bool // stream compression + Compression bool // stream compression } ``` @@ -283,6 +283,9 @@ type ObjectStoreStatus interface { } ``` +The choice of `IsCompressed()` as a method name is idiomatic for Go, language maintainers can make a similar idiomatic +choice. + ## Functional Interfaces ### ObjectStoreManager From c483cd621952d271d326395f2cad5dc50488beb1 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 16:34:27 +0100 Subject: [PATCH 4/6] Add info about s2 compression Signed-off-by: Tomasz Pietrek --- adr/ADR-20.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/adr/ADR-20.md b/adr/ADR-20.md index 1bc88be1..08c6098d 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -100,7 +100,7 @@ type ObjectStoreConfig struct { Storage StorageType // stream storate_type Replicas int // stream replicas Placement Placement // stream placement - Compression bool // stream compression + Compression bool // stream compression, s2 or none } ``` @@ -136,7 +136,7 @@ type ObjectStoreConfig struct { "cluster": "clstr", "tags": ["tag1", "tag2"] }, - compression: true + compression: "s2" } ``` From 359e60ada13fedfb12895e51b3abb9b26b71df2f Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 16:37:08 +0100 Subject: [PATCH 5/6] Remove empty lines in Object Store struct definition Signed-off-by: Tomasz Pietrek --- adr/ADR-20.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/adr/ADR-20.md b/adr/ADR-20.md index 08c6098d..82dfbad7 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -189,22 +189,15 @@ direct fields of ObjectInfo ```go type ObjectInfo struct { ObjectMeta - Bucket string `json:"bucket"` - NUID string `json:"nuid"` - // the total object size in bytes Size uint64 `json:"size"` - ModTime time.Time `json:"mtime"` - // the total number of chunks Chunks uint32 `json:"chunks"` - // as in http, = Digest string `json:"digest,omitempty"` - Deleted bool `json:"deleted,omitempty"` } ``` From de869cffd6d62e197228fa52a999185dccc67894 Mon Sep 17 00:00:00 2001 From: Tomasz Pietrek Date: Mon, 5 Feb 2024 18:05:40 +0100 Subject: [PATCH 6/6] Clarify the Compression details Signed-off-by: Tomasz Pietrek --- adr/ADR-20.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/adr/ADR-20.md b/adr/ADR-20.md index 82dfbad7..192bd7bc 100644 --- a/adr/ADR-20.md +++ b/adr/ADR-20.md @@ -100,10 +100,13 @@ type ObjectStoreConfig struct { Storage StorageType // stream storate_type Replicas int // stream replicas Placement Placement // stream placement - Compression bool // stream compression, s2 or none + Compression bool // stream compression } ``` +* If Compression is requested in the configuration, set its value in the Stream config to `s2`. +Object Store does not expose internals of Stream config, therefore the bool value is used. + ### Stream Configuration and Subject Templates | Component | Template |