Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement: Alternative approach to avoid reflection on load #448

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

rafaeljusto
Copy link
Contributor

When loading information from the database gorp uses reflection to identify the field that matches the returned column name. This search can cause some CPU/memory overhead on large systems.

image
image

An alternative approach allows the target object to bypass this logic, forwarding the responsibility of building the slice of attribute pointers to the caller.

For example, the following type would bypass the reflection search with an extra method:

type Example struct {
  FieldA string
  FieldB int
  FieldC time.Time
}

func (e *Example) DBColumns(columnNames []string) ([]interface{}, error) {
  var columns []interface{}
  for _, columnName := range columnNames {
    switch columnName {
    case "fieldA":
      columns = append(columns, e.FieldA)
    case "fieldB":
      columns = append(columns, e.FieldB)
    case "fieldC":
      columns = append(columns, e.FieldC)
    default:
      return nil, fmt.Errorf("unknown column name %q", columnName)
    }
  }
  return columns, nil
}

Furthermore, the application could generate these DBColumns methods using go generate, keeping it as an automatic plugin code to speed up data loading.

PS: I've kept the use of interface{} instead of any to keep compatibility with old compilers.

@rafaeljusto
Copy link
Contributor Author

You can see that we have good improvements regarding memory usage and allocations:

$ GORP_TEST_DSN=root:dev@tcp(127.0.0.1)/gorp_test GORP_TEST_DIALECT=gomysql go test -tags integration -bench="Benchmark.*Holder" -run=^$ -benchmem -memprofile memprofile.out -cpuprofile profile.out
goos: darwin
goarch: arm64
pkg: github.com/go-gorp/gorp/v3
BenchmarkClassicHolder-10    	       3	 412402458 ns/op	    6226 B/op	     210 allocs/op
BenchmarkSmartHolder-10      	       3	 411568181 ns/op	    3128 B/op	      45 allocs/op
PASS
ok  	github.com/go-gorp/gorp/v3	19.910s

@rafaeljusto
Copy link
Contributor Author

rafaeljusto commented Aug 22, 2023

Example of a program to generate the required method:

package main

import (
	"bytes"
	"fmt"
	"go/ast"
	"go/parser"
	"go/token"
	"log"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strings"
)

var reIgnore = regexp.MustCompile(`gorp_reflection:\s*ignore`)

func main() {
	filename := os.Getenv("GOFILE")
	fset := token.NewFileSet()
	f, err := parser.ParseFile(fset, filename, nil, parser.ParseComments)
	if err != nil {
		log.Fatal(err)
	}

	var packageName string
	structTypes := make(map[string]*ast.StructType)
	structTypeKeys := make([]string, 0)
	ignoreStructTypeKeys := make(map[string]struct{}, 0)

	ast.Inspect(f, func(n ast.Node) bool {
		switch v := n.(type) {
		case *ast.File:
			packageName = v.Name.Name
		case *ast.GenDecl:
			if reIgnore.MatchString(v.Doc.Text()) && len(v.Specs) > 0 {
				if typeSpec, isTypeSpec := v.Specs[0].(*ast.TypeSpec); isTypeSpec {
					ignoreStructTypeKeys[typeSpec.Name.Name] = struct{}{}
				}
			}
		case *ast.TypeSpec:
			structType, isStructType := v.Type.(*ast.StructType)
			if !isStructType {
				return true
			}
			if _, ok := ignoreStructTypeKeys[v.Name.Name]; !ok {
				structTypes[v.Name.Name] = structType
				structTypeKeys = append(structTypeKeys, v.Name.Name)
			}
		}

		return true
	})

	// make sure we always generate the same output
	sort.Strings(structTypeKeys)

	var buffer bytes.Buffer
	fmt.Fprintf(&buffer, "// Code generated by gorp-reflection; DO NOT EDIT.\n\n")
	fmt.Fprintf(&buffer, "// Package %s is a package.\n", packageName)
	fmt.Fprintf(&buffer, "package %s\n\n", packageName)
	fmt.Fprintf(&buffer, "import (\n")
	fmt.Fprintf(&buffer, "	\"fmt\"\n")
	fmt.Fprintf(&buffer, "	\"strings\"\n")
	fmt.Fprintf(&buffer, ")\n\n")
	for _, name := range structTypeKeys {
		structType := structTypes[name]
		receiver := strings.ToLower(name[:1])

		fmt.Fprintf(&buffer, "// DBColumns returns the attribute references of the given columns.\n")
		fmt.Fprintf(&buffer, "func (%s *%s) DBColumns(columnNames []string) ([]any, error) {\n", receiver, name)
		fmt.Fprintf(&buffer, "	var columns []any\n")
		fmt.Fprintf(&buffer, "	for _, columnName := range columnNames {\n")
		fmt.Fprintf(&buffer, "		switch strings.ToLower(columnName) {\n")
		var embeddedTypes []string
		for _, field := range structType.Fields.List {
			// embedded types
			if len(field.Names) == 0 {
				switch fieldType := field.Type.(type) {
				case *ast.Ident:
					embeddedTypes = append(embeddedTypes, fieldType.Name)
				case *ast.SelectorExpr:
					embeddedTypes = append(embeddedTypes, fieldType.Sel.Name)
				}
				continue
			}
			fmt.Fprintf(&buffer, "		case \"%s\":\n", strings.ToLower(field.Names[0].String()))
			fmt.Fprintf(&buffer, "			columns = append(columns, &%s.%s)\n", receiver, field.Names[0])
		}
		fmt.Fprintf(&buffer, "		default:\n")
		fmt.Fprintf(&buffer, "			var found bool\n")
		if len(embeddedTypes) > 0 {
			fmt.Fprintf(&buffer, "			var err error\n")
			fmt.Fprintf(&buffer, "			var embeddedColumns []any\n\n")
		}
		for _, embeddedType := range embeddedTypes {
			fmt.Fprintf(&buffer, "			if !found {\n")
			fmt.Fprintf(&buffer, "				embeddedColumns, err = %s.%s.DBColumns([]string{columnName})\n", receiver, embeddedType)
			fmt.Fprintf(&buffer, "				if err == nil {\n")
			fmt.Fprintf(&buffer, "					columns = append(columns, embeddedColumns...)\n")
			fmt.Fprintf(&buffer, "					found = true\n")
			fmt.Fprintf(&buffer, "				}\n")
			fmt.Fprintf(&buffer, "			}\n")
		}
		fmt.Fprintf(&buffer, "			if !found {\n")
		fmt.Fprintf(&buffer, "				return columns, fmt.Errorf(\"unknown column name: %%s\", columnName)\n")
		fmt.Fprintf(&buffer, "			}\n")
		fmt.Fprintf(&buffer, "		}\n")
		fmt.Fprintf(&buffer, "	}\n")
		fmt.Fprintf(&buffer, "	return columns, nil\n")
		fmt.Fprintf(&buffer, "}\n\n")
	}

	outputFile := strings.TrimSuffix(filename, filepath.Ext(filename)) + "_gorp_gen" + filepath.Ext(filename)
	if err := os.WriteFile(outputFile, buffer.Bytes(), 0644); err != nil {
		log.Fatal(err)
	}
}

Then the required files only need to add the go generate call:

//go:generate gorp-reflection

When loading information from the database gorp uses reflection to identify the
field that matches with the returned column name. This search can cause some
CPU/memory overhead on large systems.

An alternative approach allows the target object to bypass this logic,
forwarding the responsability of building the slice of attribute pointers to
the caller.

For example, the following type would bypass the reflection search with an extra method:

```go
type Example struct {
  FieldA string
  FieldB int
  FieldC time.Time
}

func (e *Example) DBColumns(columnNames []string) ([]interface{}, error) {
  var columns []interface{}
  for _, columnName := range columnNames {
    switch columnName {
    case "fieldA":
      columns = append(columns, e.FieldA)
    case "fieldB":
      columns = append(columns, e.FieldB)
    case "fieldC":
      columns = append(columns, e.FieldC)
    default:
      return nil, fmt.Errorf("unknown column name %q", columnName)
    }
  }
  return columns, nil
}
```

Furthermore, the application could generate these `DBColumns` methods using `go
generate`, keeping it as an automatic plugin code to speed up data loading.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant