Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add text bytes support #426

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/grpc_generated/go/grpc_simple_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,59 @@ func ModelInferRequest(client triton.GRPCInferenceServiceClient, rawInput [][]by
return modelInferResponse
}

func ModelInferRequestFromText(client triton.GRPCInferenceServiceClient, rawInput []string, modelName string, modelVersion string) *triton.ModelInferResponse {
/*
The encoded bytes tensor where each element has its length in first 4 bytes followed by the content
first 4 bytes need to be little endian
content does not require encoding LittleEndian
*/
var _bytes []byte
for _, text := range rawInput {
l := len(text)
b := make([]byte, 4)
binary.LittleEndian.PutUint16(b, uint16(l))
b = append(b, []byte(text)...)
_bytes = append(_bytes, b...)
}

// Create context for our request with 10 second timeout
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

// Create request input tensors
inferInputs := []*triton.ModelInferRequest_InferInputTensor{
&triton.ModelInferRequest_InferInputTensor{
Name: "INPUT0",
Datatype: "BYTES",
Shape: []int64{1, int64(len(rawInput))},
},
}

// Create request input output tensors
inferOutputs := []*triton.ModelInferRequest_InferRequestedOutputTensor{
&triton.ModelInferRequest_InferRequestedOutputTensor{
Name: "OUTPUT0",
},
}

// Create inference request for specific model/version
modelInferRequest := triton.ModelInferRequest{
ModelName: modelName,
ModelVersion: modelVersion,
Inputs: inferInputs,
Outputs: inferOutputs,
}

modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, _bytes)

// Submit inference request to server
modelInferResponse, err := client.ModelInfer(ctx, &modelInferRequest)
if err != nil {
log.Fatalf("Error processing InferRequest: %v", err)
}
return modelInferResponse
}

// Convert int32 input data into raw bytes (assumes Little Endian)
func Preprocess(inputs [][]int32) [][]byte {
inputData0 := inputs[0]
Expand Down Expand Up @@ -252,4 +305,25 @@ func main() {
log.Fatalf("Incorrect results from inference")
}
}

/* We use a simple model that takes 2 input utf-8 texts and returns 2 output tensors. */
inferResponse = ModelInferRequestFromText(client, []string{"hello", "world!"}, FLAGS.ModelName, FLAGS.ModelVersion)

/* We expect there to be 2 results (each with batch-size 1). Walk
over all 16 result elements and print the sum and difference
calculated by the model. */
outputs = Postprocess(inferResponse)
outputData0 = outputs[0]
outputData1 = outputs[1]

fmt.Println("\nChecking Inference Outputs\n--------------------------")
for i := 0; i < outputSize; i++ {
fmt.Printf("%d + %d = %d\n", inputData0[i], inputData1[i], outputData0[i])
fmt.Printf("%d - %d = %d\n", inputData0[i], inputData1[i], outputData1[i])
if (inputData0[i]+inputData1[i] != outputData0[i]) ||
inputData0[i]-inputData1[i] != outputData1[i] {
log.Fatalf("Incorrect results from inference")
}
}

}