From bd2a07b35a3778f250919a7686817d0a1631e87c Mon Sep 17 00:00:00 2001 From: xxj <346944475@qq.com> Date: Thu, 26 Oct 2023 15:58:20 +0800 Subject: [PATCH 1/2] add text bytes support --- src/grpc_generated/go/grpc_simple_client.go | 73 +++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/grpc_generated/go/grpc_simple_client.go b/src/grpc_generated/go/grpc_simple_client.go index 8a79a3490..662f09eca 100644 --- a/src/grpc_generated/go/grpc_simple_client.go +++ b/src/grpc_generated/go/grpc_simple_client.go @@ -157,6 +157,58 @@ func ModelInferRequest(client triton.GRPCInferenceServiceClient, rawInput [][]by return modelInferResponse } +func ModelInferRequestFromText(client triton.GRPCInferenceServiceClient, rawInput []string, modelName string, modelVersion string) *triton.ModelInferResponse { + /* + length in first 4 bytes followed,and little endian + text not set LittleEndian + */ + var _bytes []byte + for _, text := range rawInput { + l := len(text) + b := make([]byte, 4) + binary.LittleEndian.PutUint16(b, uint16(l)) + b = append(b, []byte(text)...) + _bytes = append(_bytes, b...) + } + + // Create context for our request with 10 second timeout + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Create request input tensors + inferInputs := []*triton.ModelInferRequest_InferInputTensor{ + &triton.ModelInferRequest_InferInputTensor{ + Name: "INPUT0", + Datatype: "BYTES", + Shape: []int64{1, int64(len(rawInput))}, + }, + } + + // Create request input output tensors + inferOutputs := []*triton.ModelInferRequest_InferRequestedOutputTensor{ + &triton.ModelInferRequest_InferRequestedOutputTensor{ + Name: "OUTPUT0", + }, + } + + // Create inference request for specific model/version + modelInferRequest := triton.ModelInferRequest{ + ModelName: modelName, + ModelVersion: modelVersion, + Inputs: inferInputs, + Outputs: inferOutputs, + } + + modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, _bytes) + + // Submit inference request to server + modelInferResponse, err := client.ModelInfer(ctx, &modelInferRequest) + if err != nil { + log.Fatalf("Error processing InferRequest: %v", err) + } + return modelInferResponse +} + // Convert int32 input data into raw bytes (assumes Little Endian) func Preprocess(inputs [][]int32) [][]byte { inputData0 := inputs[0] @@ -252,4 +304,25 @@ func main() { log.Fatalf("Incorrect results from inference") } } + + /* We use a simple model that takes 2 input utf-8 texts and returns 2 output tensors. */ + inferResponse = ModelInferRequestFromText(client, []string{"hello", "world!"}, FLAGS.ModelName, FLAGS.ModelVersion) + + /* We expect there to be 2 results (each with batch-size 1). Walk + over all 16 result elements and print the sum and difference + calculated by the model. */ + outputs = Postprocess(inferResponse) + outputData0 = outputs[0] + outputData1 = outputs[1] + + fmt.Println("\nChecking Inference Outputs\n--------------------------") + for i := 0; i < outputSize; i++ { + fmt.Printf("%d + %d = %d\n", inputData0[i], inputData1[i], outputData0[i]) + fmt.Printf("%d - %d = %d\n", inputData0[i], inputData1[i], outputData1[i]) + if (inputData0[i]+inputData1[i] != outputData0[i]) || + inputData0[i]-inputData1[i] != outputData1[i] { + log.Fatalf("Incorrect results from inference") + } + } + } From 75e6d1264f431cf249545d098634eaa3f05f64f4 Mon Sep 17 00:00:00 2001 From: xxj <346944475@qq.com> Date: Thu, 26 Oct 2023 16:09:13 +0800 Subject: [PATCH 2/2] add text bytes support --- src/grpc_generated/go/grpc_simple_client.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/grpc_generated/go/grpc_simple_client.go b/src/grpc_generated/go/grpc_simple_client.go index 662f09eca..e06b4f2e0 100644 --- a/src/grpc_generated/go/grpc_simple_client.go +++ b/src/grpc_generated/go/grpc_simple_client.go @@ -159,8 +159,9 @@ func ModelInferRequest(client triton.GRPCInferenceServiceClient, rawInput [][]by func ModelInferRequestFromText(client triton.GRPCInferenceServiceClient, rawInput []string, modelName string, modelVersion string) *triton.ModelInferResponse { /* - length in first 4 bytes followed,and little endian - text not set LittleEndian + The encoded bytes tensor where each element has its length in first 4 bytes followed by the content + first 4 bytes need to be little endian + content does not require encoding LittleEndian */ var _bytes []byte for _, text := range rawInput {