diff --git a/README.md b/README.md index 74ef7c8..c7e8129 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,7 @@ func segment(ctx context.Context, data string) *Searches { Model: openai.GPT4o, Messages: []openai.ChatCompletionMessage{ { - Role: instructor.RoleUser, + Role: openai.ChatMessageRoleUser, Content: fmt.Sprintf("Consider the data below: '\n%s' and segment it into multiple search queries", data), }, }, @@ -298,7 +298,7 @@ func assert(condition bool, message string) {
Images with OpenAI -![List of movies](https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/openai/books.png) +![List of books](https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/openai/books.png)
Running @@ -356,7 +356,7 @@ func main() { Model: openai.GPT4o, Messages: []openai.ChatCompletionMessage{ { - Role: instructor.RoleUser, + Role: openai.ChatMessageRoleUser, MultiContent: []openai.ChatMessagePart{ { Type: openai.ChatMessagePartTypeText, @@ -437,7 +437,7 @@ func main() {
Images with Anthropic -![List of books](https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/anthropic/movies.png) +![List of movies](https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/anthropic/movies.png)
Running @@ -504,7 +504,7 @@ func main() { Model: "claude-3-haiku-20240307", Messages: []anthropic.Message{ { - Role: instructor.RoleUser, + Role: anthropic.RoleUser, Content: []anthropic.MessageContent{ anthropic.NewImageMessageContent(anthropic.MessageContentImageSource{ Type: "base64", @@ -696,7 +696,7 @@ Preferred Shopping Times: Weekend Evenings Model: openai.GPT4o20240513, Messages: []openai.ChatCompletionMessage{ { - Role: instructor.RoleSystem, + Role: openai.ChatMessageRoleSystem, Content: fmt.Sprintf(` Generate the product recommendations from the product list based on the customer profile. Return in order of highest recommended first. @@ -704,7 +704,7 @@ Product list: %s`, productList), }, { - Role: instructor.RoleUser, + Role: openai.ChatMessageRoleUser, Content: fmt.Sprintf("User profile:\n%s", profileData), }, }, @@ -1090,6 +1090,183 @@ func main() { ```
+
+Receipt Item Extraction from Image (using OpenAI GPT-4o) + +

+ Receipt 1 + Receipt 2 +

+ +
+Running + +```bash +go run examples/vision/receipt/main.go +``` + +
+ +```go +package main + +import ( + "context" + "fmt" + "math" + "os" + + "github.com/instructor-ai/instructor-go/pkg/instructor" + openai "github.com/sashabaranov/go-openai" +) + +type Item struct { + Name string `json:"name" jsonschema:"title=Item Name,description=The name of the item,example=Apple,example=Banana"` + Price float64 `json:"price" jsonschema:"title=Item Price,description=The price of the item in dollars,example=1.99,example=2.50"` +} + +func (i Item) String() string { + return fmt.Sprintf(" Item: %s, Price: $%.2f", i.Name, i.Price) +} + +type Receipt struct { + Items []Item `json:"items" jsonschema:"title=Receipt Items,description=The list of items in the receipt"` + Total float64 `json:"total" jsonschema:"title=Receipt Total,description=The total cost of all items in the receipt,example=10.99,example=25.50"` +} + +func (r Receipt) String() string { + var result string + for _, item := range r.Items { + result += item.String() + "\n" + } + result += fmt.Sprintf("Total: $%.2f", r.Total) + return result +} + +func (r *Receipt) Validate() error { + calculatedTotal := 0.0 + for _, item := range r.Items { + calculatedTotal += item.Price + } + + calculatedTotal = math.Round(calculatedTotal*10) / 10 + expectedTotal := math.Round(r.Total*10) / 10 + + if calculatedTotal != expectedTotal { + return fmt.Errorf("total %f does not match the sum of item prices %f", r.Total, calculatedTotal) + } + return nil +} + +func extract(ctx context.Context, client *instructor.InstructorOpenAI, url string) (*Receipt, error) { + + var receipt Receipt + _, err := client.CreateChatCompletion( + ctx, + openai.ChatCompletionRequest{ + Model: openai.GPT4o, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: `Analyze the image and return the items (include tax and coupons as their own items) in the receipt and the total amount.`, + }, + { + Role: openai.ChatMessageRoleUser, + MultiContent: []openai.ChatMessagePart{ + { + Type: openai.ChatMessagePartTypeImageURL, + ImageURL: &openai.ChatMessageImageURL{ + URL: url, + }, + }, + }, + }, + }, + }, + &receipt, + ) + if err != nil { + return nil, err + } + + if err := receipt.Validate(); err != nil { + return &receipt, err + } + + return &receipt, nil +} + +func main() { + ctx := context.Background() + + client := instructor.FromOpenAI( + openai.NewClient(os.Getenv("OPENAI_API_KEY")), + instructor.WithMode(instructor.ModeJSON), + instructor.WithMaxRetries(3), + ) + + urls := []string{ + "https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg", + "https://ocr.space/Content/Images/receipt-ocr-original.jpg", + } + + for _, url := range urls { + receipt, err := extract(ctx, client, url) + fmt.Printf("Receipt:\n%s\n", receipt) + if err != nil { + fmt.Printf("Error: %v\n", err) + continue + } + fmt.Println("\n--------------------------------\n") + } +/* +Receipt: + Item: Lorem ipsum, Price: $9.20 + Item: Lorem ipsum dolor sit, Price: $19.20 + Item: Lorem ipsum dolor sit amet, Price: $15.00 + Item: Lorem ipsum, Price: $15.00 + Item: Lorem ipsum, Price: $15.00 + Item: Lorem ipsum dolor sit, Price: $15.00 + Item: Lorem ipsum, Price: $19.20 +Total: $107.60 + +-------------------------------- + +Receipt: + Item: PET TOY, Price: $1.97 + Item: FLOPPY PUPPY, Price: $1.97 + Item: SSSUPREME S, Price: $4.97 + Item: 2.5 SQUEAK, Price: $5.92 + Item: MUNCHY DMBEL, Price: $3.77 + Item: DOG TREAT, Price: $2.92 + Item: PED PCH 1, Price: $0.50 + Item: PED PCH 1, Price: $0.50 + Item: HNYMD SMORES, Price: $3.98 + Item: FRENCH DRSNG, Price: $1.98 + Item: 3 ORANGES, Price: $5.47 + Item: BABY CARROTS, Price: $1.48 + Item: COLLARDS, Price: $1.24 + Item: CALZONE, Price: $2.50 + Item: MM RVW MNT, Price: $19.77 + Item: STKOBRLPLIABL, Price: $1.97 + Item: STKOBRLPLIABL, Price: $1.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: BLING BEADS, Price: $0.97 + Item: GREAT VALUE, Price: $9.97 + Item: LIPTON, Price: $4.44 + Item: DRY DOG, Price: $12.44 + Item: COUPON 2310652, Price: $-1.00 + Item: TAX, Price: $4.59 +Total: $98.21 +*/ +} +``` + +
+ ## Providers Instructor Go supports the following LLM provider APIs: diff --git a/examples/vision/receipt/README.md b/examples/vision/receipt/README.md new file mode 100644 index 0000000..79caeed --- /dev/null +++ b/examples/vision/receipt/README.md @@ -0,0 +1,6 @@ +# Receipt Image Extractor + +

+ Receipt 1 + Receipt 2 +

diff --git a/examples/vision/receipt/main.go b/examples/vision/receipt/main.go index e88832d..147249d 100644 --- a/examples/vision/receipt/main.go +++ b/examples/vision/receipt/main.go @@ -21,7 +21,11 @@ type Receipt struct { Total float64 `json:"total"` } -// Validate method similar to the Pydantic model validator +func (r *Receipt) String() string { + s, _ := json.MarshalIndent(r, "", " ") + return string(s) +} + func (r *Receipt) Validate() error { calculatedTotal := 0.0 for _, item := range r.Items { @@ -33,7 +37,6 @@ func (r *Receipt) Validate() error { return nil } -// Function to extract receipt information from a URL func extract(ctx context.Context, client *instructor.InstructorOpenAI, url string) (*Receipt, error) { var receipt Receipt @@ -58,7 +61,6 @@ func extract(ctx context.Context, client *instructor.InstructorOpenAI, url strin return nil, err } - // Validate the receipt total if err := receipt.Validate(); err != nil { return &receipt, err } @@ -76,17 +78,66 @@ func main() { ) urls := []string{ - "https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg", - "https://ocr.space/Content/Images/receipt-ocr-original.jpg", + // source: https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg + "https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/receipt/supermarket-receipt-template.jpg", + // source: https://ocr.space/Content/Images/receipt-ocr-original.jpg + "https://raw.githubusercontent.com/instructor-ai/instructor-go/main/examples/vision/receipt/receipt-ocr-original.jpg", } for _, url := range urls { receipt, err := extract(ctx, client, url) + println("Receipt: ") + println(receipt.String()) if err != nil { - fmt.Printf("Error: %v\n", err) - continue + println("Error: " + err.Error()) } - receiptJson, _ := json.MarshalIndent(receipt, "", " ") - fmt.Printf("Receipt: %s\n", receiptJson) + println("\n--------------------------------\n") } + /* + Receipt: + + Item: Lorem ipsum, Price: $9.20 + Item: Lorem ipsum dolor sit, Price: $19.20 + Item: Lorem ipsum dolor sit amet, Price: $15.00 + Item: Lorem ipsum, Price: $15.00 + Item: Lorem ipsum, Price: $15.00 + Item: Lorem ipsum dolor sit, Price: $15.00 + Item: Lorem ipsum, Price: $19.20 + + Total: $107.60 + + -------------------------------- + + Receipt: + + Item: PET TOY, Price: $1.97 + Item: FLOPPY PUPPY, Price: $1.97 + Item: SSSUPREME S, Price: $4.97 + Item: 2.5 SQUEAK, Price: $5.92 + Item: MUNCHY DMBEL, Price: $3.77 + Item: DOG TREAT, Price: $2.92 + Item: PED PCH 1, Price: $0.50 + Item: PED PCH 1, Price: $0.50 + Item: HNYMD SMORES, Price: $3.98 + Item: FRENCH DRSNG, Price: $1.98 + Item: 3 ORANGES, Price: $5.47 + Item: BABY CARROTS, Price: $1.48 + Item: COLLARDS, Price: $1.24 + Item: CALZONE, Price: $2.50 + Item: MM RVW MNT, Price: $19.77 + Item: STKOBRLPLIABL, Price: $1.97 + Item: STKOBRLPLIABL, Price: $1.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: STKO SUNFLWR, Price: $0.97 + Item: BLING BEADS, Price: $0.97 + Item: GREAT VALUE, Price: $9.97 + Item: LIPTON, Price: $4.44 + Item: DRY DOG, Price: $12.44 + Item: COUPON 2310652, Price: $-1.00 + Item: TAX, Price: $4.59 + + Total: $98.21 + */ } diff --git a/examples/vision/receipt/receipt-ocr-original.jpg b/examples/vision/receipt/receipt-ocr-original.jpg new file mode 100644 index 0000000..8c5374d Binary files /dev/null and b/examples/vision/receipt/receipt-ocr-original.jpg differ diff --git a/examples/vision/receipt/supermarket-receipt-template.jpg b/examples/vision/receipt/supermarket-receipt-template.jpg new file mode 100644 index 0000000..8a1909e Binary files /dev/null and b/examples/vision/receipt/supermarket-receipt-template.jpg differ