-
Notifications
You must be signed in to change notification settings - Fork 67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add DB serialization support #132
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,153 @@ | ||||||
package sqlite | ||||||
|
||||||
// #include <sqlite3.h> | ||||||
// #include <stdlib.h> | ||||||
import "C" | ||||||
import ( | ||||||
"runtime" | ||||||
"unsafe" | ||||||
) | ||||||
|
||||||
// Serialized contains schema and serialized data for a database. | ||||||
type Serialized struct { | ||||||
schema string | ||||||
data []byte | ||||||
sqliteOwnsData bool | ||||||
shouldFreeData bool | ||||||
} | ||||||
|
||||||
// NewSerialized creates a new serialized DB from the given schema and data. | ||||||
// | ||||||
// If copyToSqlite is true, the data will be copied. This should be set to true | ||||||
// if this will be used with SQLITE_DESERIALIZE_FREEONCLOSE. | ||||||
func NewSerialized(schema string, data []byte, copyToSqlite bool) *Serialized { | ||||||
s := &Serialized{ | ||||||
schema: schema, | ||||||
data: data, | ||||||
} | ||||||
if copyToSqlite { | ||||||
sqliteData := (*[1 << 28]uint8)(unsafe.Pointer(C.sqlite3_malloc(C.int(len(data)))))[:len(data):len(data)] | ||||||
copy(sqliteData, data) | ||||||
s.data = sqliteData | ||||||
s.shouldFreeData = true | ||||||
s.sqliteOwnsData = true | ||||||
runtime.SetFinalizer(s, func(s *Serialized) { s.free() }) | ||||||
} | ||||||
return s | ||||||
} | ||||||
|
||||||
// Schema returns the schema for this serialized DB. | ||||||
func (s *Serialized) Schema() string { | ||||||
if s.schema == "" { | ||||||
return "main" | ||||||
} | ||||||
return s.schema | ||||||
} | ||||||
|
||||||
// Bytes returns the serialized bytes. Do not mutate this value. This is only | ||||||
// valid for the life of its receiver and should be copied for any other | ||||||
// longer-term use. | ||||||
func (s *Serialized) Bytes() []byte { return s.data } | ||||||
|
||||||
func (s *Serialized) free() { | ||||||
if len(s.data) > 0 && s.shouldFreeData { | ||||||
s.shouldFreeData = false | ||||||
s.sqliteOwnsData = false | ||||||
C.sqlite3_free(unsafe.Pointer(&s.data[0])) | ||||||
} | ||||||
Comment on lines
+53
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. complex logic with no comments. Why is this necessary? |
||||||
s.data = nil | ||||||
} | ||||||
|
||||||
// SerializeFlags are flags used for Serialize. | ||||||
type SerializeFlags int | ||||||
|
||||||
const ( | ||||||
SQLITE_SERIALIZE_NOCOPY SerializeFlags = C.SQLITE_SERIALIZE_NOCOPY | ||||||
) | ||||||
|
||||||
// Serialize serializes the given schema. Returns nil on error. If | ||||||
// SQLITE_SERIALIZE_NOCOPY flag is set, the data may only be valid as long as | ||||||
// the database. | ||||||
// | ||||||
// https://www.sqlite.org/c3ref/serialize.html | ||||||
func (conn *Conn) Serialize(schema string, flags ...SerializeFlags) *Serialized { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Accept a writer and write the copied serialized data to it. |
||||||
var serializeFlags SerializeFlags | ||||||
for _, f := range flags { | ||||||
serializeFlags |= f | ||||||
} | ||||||
|
||||||
cschema := cmain | ||||||
if schema != "" && schema != "main" { | ||||||
cschema = C.CString(schema) | ||||||
defer C.free(unsafe.Pointer(cschema)) | ||||||
} | ||||||
|
||||||
var csize C.sqlite3_int64 | ||||||
res := C.sqlite3_serialize(conn.conn, cschema, &csize, C.uint(serializeFlags)) | ||||||
if res == nil { | ||||||
return nil | ||||||
} | ||||||
|
||||||
s := &Serialized{ | ||||||
schema: schema, | ||||||
data: (*[1 << 28]uint8)(unsafe.Pointer(res))[:csize:csize], | ||||||
sqliteOwnsData: true, | ||||||
} | ||||||
// Free the memory only if they didn't specify nocopy | ||||||
if serializeFlags&SQLITE_SERIALIZE_NOCOPY == 0 { | ||||||
s.shouldFreeData = true | ||||||
runtime.SetFinalizer(s, func(s *Serialized) { s.free() }) | ||||||
} | ||||||
return s | ||||||
} | ||||||
|
||||||
// DeserializeFlags are flags used for Deserialize. | ||||||
type DeserializeFlags int | ||||||
|
||||||
const ( | ||||||
SQLITE_DESERIALIZE_FREEONCLOSE DeserializeFlags = C.SQLITE_DESERIALIZE_FREEONCLOSE | ||||||
SQLITE_DESERIALIZE_RESIZEABLE DeserializeFlags = C.SQLITE_DESERIALIZE_RESIZEABLE | ||||||
SQLITE_DESERIALIZE_READONLY DeserializeFlags = C.SQLITE_DESERIALIZE_READONLY | ||||||
) | ||||||
Comment on lines
+107
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is too low level to expose in the Go API here. When we deserialize we are taking go memory with the serialized DB and malloc-ing a space of C memory for which sqlite can operate an in-memory database. I think the only option we should expose is READONLY and maybe RESIZEABLE. I think since we are managing the C memory, we should control FREEONCLOSE, which we should probably set, so we don't have to continue to manage that memory. |
||||||
|
||||||
// Reopens the database as in-memory representation of given serialized bytes. | ||||||
// The given *Serialized instance should remain referenced (i.e. not GC'd) for | ||||||
// the life of the DB since the bytes within are referenced directly. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not something the caller can guarantee, short of importing go4.org/unsafe/assume-no-moving-gc. Generally, it's probably not a good idea to let C and Go memory cross the boundaries like this. It adds complexity including the need for the finalizers, and the invalid state of Serialized values after being used. In Serialize, I'd always make a copy into Go memory unless SQLITE_SERIALIZE_NOCOPY is set. That way there is never a need for freeing the memory. If an application can't stand copies for performance reasons, they can use SQLITE_SERIALIZE_NOCOPY. Otherwise, they can probably tolerate two as well as one. In Deserialize, I would always copy the input into sqlite3_malloc64'd memory, and force SQLITE_DESERIALIZE_FREEONCLOSE on. This is unfortunate in terms of memory usage, especially when using //go:embed which is my use case, and basically precludes the use of the szBuf > szDb feature, but it is the only safe API because we can't rely on Go memory staying still. The good news is that this removes any need for the Serialized type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hrmm, yeah all my machinations here were to avoid copies, but it sounds like I can't have a stable pointer. So I guess we have to have copies. I did this for a side project (https://github.com/cretz/temporal-sdk-go-advanced/tree/main/temporalsqlite for SQLite on https://temporal.io/) so it may be a few days until I can get to simplifying. If needing sooner or you have the bandwidth, feel free to steal the impl here and simplify w/ always-copy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, actually it might be worth waiting for https://go.dev/issue/46787. I wonder if we can provide the same API I describe above, and implement it with copies in Go 1.18, and with Pinner in Go 1.19. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. golang/go#46787 has landed. |
||||||
// | ||||||
// Callers should only use SQLITE_DESERIALIZE_FREEONCLOSE and | ||||||
// SQLITE_DESERIALIZE_RESIZEABLE if the param came from Serialize or | ||||||
// copyToSqlite was given to NewSerialized. | ||||||
// | ||||||
// The Serialized parameter should no longer be used after this call. | ||||||
// | ||||||
// https://www.sqlite.org/c3ref/deserialize.html | ||||||
func (conn *Conn) Deserialize(s *Serialized, flags ...DeserializeFlags) error { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend this function signature:
Suggested change
And then use size+additional to malloc the C memory and then copy the contents of serialized into that memory. Set FREEONCLOSE so we don't have to continue to manage that C memory. |
||||||
var deserializeFlags DeserializeFlags | ||||||
for _, f := range flags { | ||||||
deserializeFlags |= f | ||||||
} | ||||||
|
||||||
cschema := cmain | ||||||
if s.schema != "" && s.schema != "main" { | ||||||
cschema = C.CString(s.schema) | ||||||
defer C.free(unsafe.Pointer(cschema)) | ||||||
} | ||||||
|
||||||
// If they set to free on close, remove the free flag from the param | ||||||
if deserializeFlags&SQLITE_DESERIALIZE_FREEONCLOSE == 1 { | ||||||
s.shouldFreeData = false | ||||||
} | ||||||
|
||||||
res := C.sqlite3_deserialize( | ||||||
conn.conn, | ||||||
cschema, | ||||||
(*C.uchar)(unsafe.Pointer(&s.data[0])), | ||||||
C.sqlite3_int64(len(s.data)), | ||||||
C.sqlite3_int64(len(s.data)), | ||||||
C.uint(deserializeFlags), | ||||||
) | ||||||
if res != C.SQLITE_OK { | ||||||
return conn.extreserr("Conn.Deserialize", "", res) | ||||||
} | ||||||
return nil | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package sqlite_test | ||
|
||
import ( | ||
"reflect" | ||
"strconv" | ||
"strings" | ||
"testing" | ||
|
||
"crawshaw.io/sqlite" | ||
"crawshaw.io/sqlite/sqlitex" | ||
) | ||
|
||
func TestSerialize(t *testing.T) { | ||
conn, err := sqlite.OpenConn(":memory:", 0) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
defer conn.Close() | ||
|
||
// Create table and insert a few records | ||
err = sqlitex.Exec(conn, "CREATE TABLE mytable (v1 PRIMARY KEY, v2, v3);", nil) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
err = sqlitex.Exec(conn, | ||
"INSERT INTO mytable (v1, v2, v3) VALUES ('foo', 'bar', 'baz'), ('foo2', 'bar2', 'baz2');", nil) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
// Serialize | ||
ser := conn.Serialize("") | ||
if ser == nil { | ||
t.Fatal("unexpected nil") | ||
} | ||
origLen := len(ser.Bytes()) | ||
t.Logf("Initial serialized size: %v", origLen) | ||
|
||
// Create new connection, confirm table not there | ||
conn, err = sqlite.OpenConn(":memory:", 0) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
defer conn.Close() | ||
err = sqlitex.Exec(conn, "SELECT * FROM mytable ORDER BY v1;", nil) | ||
if err == nil || !strings.Contains(err.Error(), "no such table") { | ||
t.Fatalf("expected no-table error, got: %v", err) | ||
} | ||
|
||
// Deserialize into connection and allow resizing | ||
err = conn.Deserialize(ser, sqlite.SQLITE_DESERIALIZE_FREEONCLOSE|sqlite.SQLITE_DESERIALIZE_RESIZEABLE) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
// Confirm data there | ||
data := [][3]string{} | ||
err = sqlitex.Exec(conn, "SELECT * FROM mytable ORDER BY v1;", func(stmt *sqlite.Stmt) error { | ||
data = append(data, [3]string{stmt.ColumnText(0), stmt.ColumnText(1), stmt.ColumnText(2)}) | ||
return nil | ||
}) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
expected := [][3]string{{"foo", "bar", "baz"}, {"foo2", "bar2", "baz2"}} | ||
if !reflect.DeepEqual(expected, data) { | ||
t.Fatalf("expected %v, got %v", expected, data) | ||
} | ||
|
||
// Confirm 1000 inserts can be made | ||
for i := 0; i < 1000; i++ { | ||
toAppend := strconv.Itoa(i + 3) | ||
err = sqlitex.Exec(conn, "INSERT INTO mytable (v1, v2, v3) VALUES ('foo"+ | ||
toAppend+"', 'bar"+toAppend+"', 'baz3"+toAppend+"')", nil) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
} | ||
|
||
// Serialize again, this time with no-copy | ||
ser = conn.Serialize("") | ||
if ser == nil { | ||
t.Fatal("unexpected nil") | ||
} | ||
newLen := len(ser.Bytes()) | ||
if newLen <= origLen { | ||
t.Fatalf("expected %v > %v", newLen, origLen) | ||
} | ||
t.Logf("New serialized size: %v", newLen) | ||
|
||
// Copy the serialized bytes but to not let sqlite own them | ||
ser = sqlite.NewSerialized(ser.Schema(), ser.Bytes(), false) | ||
|
||
// Create new conn, deserialize read only | ||
conn, err = sqlite.OpenConn(":memory:", 0) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
defer conn.Close() | ||
err = conn.Deserialize(ser, sqlite.SQLITE_DESERIALIZE_READONLY) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
// Count | ||
var total int64 | ||
err = sqlitex.Exec(conn, "SELECT COUNT(1) FROM mytable;", func(stmt *sqlite.Stmt) error { | ||
total = stmt.ColumnInt64(0) | ||
return nil | ||
}) | ||
if err != nil { | ||
t.Fatal(err) | ||
} else if total != 1002 { | ||
t.Fatalf("expected 1002, got %v", total) | ||
} | ||
|
||
// Try to insert again | ||
err = sqlitex.Exec(conn, "INSERT INTO mytable (v1, v2, v3) VALUES ('a', 'b', 'c');", nil) | ||
if err == nil || !strings.Contains(err.Error(), "readonly") { | ||
t.Fatalf("expected readonly error, got: %v", err) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would prefer more robust API semantics for this. This is too error prone. Assume the user of this API is not going to read your docs. How can we make it fool proof. Consider that there is virtually no case where someone is going to instantiate a
Serialized
object and not copy the data. So why bother with the intermediary? Why even have a Serialized object at all?IMO the ideal API should just accept a Writer where the serialized data is written, or accept a Reader where the serialized data can be read from so it can be instantiated into the database.