Skip to content

Commit

Permalink
enh: increase core CPU, reduce replicas and split tokenize batch requ…
Browse files Browse the repository at this point in the history
…ests (#6238)

* enh: increase core CPU and reduce replicas

* chunk requests

---------

Co-authored-by: Henry Fontanier <[email protected]>
  • Loading branch information
fontanierh and Henry Fontanier authored Jul 16, 2024
1 parent 9895bc6 commit 8d1d3b1
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 13 deletions.
34 changes: 24 additions & 10 deletions front/lib/tokenization.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { Result } from "@dust-tt/types";
import { CoreAPI, Err, Ok, safeSubstring } from "@dust-tt/types";
import _ from "lodash";

import logger from "@app/logger/logger";

Expand All @@ -9,20 +10,33 @@ export async function tokenCountForTexts(
texts: string[],
model: { providerId: string; modelId: string }
): Promise<Result<Array<number>, Error>> {
const BATCHES_COUNT = 3;
try {
const coreAPI = new CoreAPI(config.getCoreAPIConfig(), logger);
const res = await coreAPI.tokenizeBatch({
texts,
providerId: model.providerId,
modelId: model.modelId,
});
if (res.isErr()) {
return new Err(
new Error(`Error tokenizing model message: ${res.error.message}`)
);
const batches = _.chunk(texts, Math.ceil(texts.length / BATCHES_COUNT));
const batchResults = await Promise.all(
batches.map((batch) =>
coreAPI.tokenizeBatch({
texts: batch,
providerId: model.providerId,
modelId: model.modelId,
})
)
);

const counts: number[] = [];
for (const res of batchResults) {
if (res.isErr()) {
return new Err(
new Error(`Error tokenizing model message: ${res.error.message}`)
);
}
for (const tokens of res.value.tokens) {
counts.push(tokens.length);
}
}

return new Ok(res.value.tokens.map((t) => t.length));
return new Ok(counts);
} catch (err) {
return new Err(new Error(`Error tokenizing model message: ${err}`));
}
Expand Down
6 changes: 3 additions & 3 deletions k8s/deployments/core-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: core-deployment
spec:
replicas: 6
replicas: 3
selector:
matchLabels:
app: core
Expand Down Expand Up @@ -48,10 +48,10 @@ spec:

resources:
requests:
cpu: 1250m
cpu: 4000m
memory: 8Gi
limits:
cpu: 1250m
cpu: 4000m
memory: 8Gi

volumes:
Expand Down

0 comments on commit 8d1d3b1

Please sign in to comment.