Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Chroma support for vector db #63

Merged
merged 4 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions javascript-sdk/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "rebuff",
"version": "0.0.4",
"version": "0.1.0",
"description": "Rebuff's Javascript client SDK for Node and the browser",
"main": "src/index.ts",
"type": "module",
Expand All @@ -25,8 +25,9 @@
"dependencies": {
"@pinecone-database/pinecone": "^0.1.6",
"chai": "^4.3.7",
"chromadb": "^1.5.6",
"crypto": "^1.0.1",
"langchain": "^0.0.146",
"langchain": "^0.0.159",
"node-fetch": "^3.3.1",
"openai": "^3.2.1",
"string-similarity": "^4.0.4"
Expand Down
13 changes: 11 additions & 2 deletions javascript-sdk/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,25 @@ export interface ApiConfig {
apiUrl?: string;
}

export interface SdkConfig {
export type VectorDbConfig = {
pinecone: {
apikey: string;
environment: string;
index: string;
};
} | {
chroma: {
url: string;
collectionName: string;
};
};

export interface SdkConfig {
vectorDB: VectorDbConfig
openai: {
apikey: string;
model: string;
};
}
};

export type RebuffConfig = ApiConfig | SdkConfig;
6 changes: 6 additions & 0 deletions javascript-sdk/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
export { default as RebuffApi } from "./api";
export { default as RebuffSdk } from "./sdk";
export {
ApiConfig,
SdkConfig,
RebuffConfig,
VectorDbConfig,
} from "./config";
84 changes: 77 additions & 7 deletions javascript-sdk/src/lib/vectordb.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,47 @@
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { PineconeClient } from "@pinecone-database/pinecone";
import { PineconeStore } from "langchain/vectorstores/pinecone";
import { VectorStore } from "langchain/vectorstores/base";
import { PineconeClient } from "@pinecone-database/pinecone";
import { Chroma } from "langchain/vectorstores/chroma";
import { Document } from "langchain/document.js";
import { SdkConfig } from "../config";

// Our code expects a similarity score where similar vectors are close to 1, but Chroma returns a distance score
// where similar vectors are close to 0. Note that this class may not work if using a distance metric other than
// cosine.
class ChromaCosineSimilarity extends Chroma {
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document<Record<string, any>>, number][]> {
const results = await super.similaritySearchVectorWithScore(query, k, filter);
return results.map(([id, score]) => [id, 1 - score]);
}
}

export default async function initPinecone(
async function initPinecone(
environment: string,
apiKey: string,
index: string,
openaiApiKey: string,
openaiEmbeddings: OpenAIEmbeddings,
): Promise<PineconeStore> {
if (!environment) {
throw new Error("Pinecone environment definition missing");
}
if (!apiKey) {
throw new Error("Pinecone apikey definition missing");
}
if (!index) {
throw new Error("Pinecone index definition missing");
}
try {
const pinecone = new PineconeClient();

await pinecone.init({
environment,
apiKey,
});
const openaiEmbeddings = new OpenAIEmbeddings({
openAIApiKey: openaiApiKey,
modelName: "text-embedding-ada-002"
});
const pineconeIndex = pinecone.Index(index);
const vectorStore = await PineconeStore.fromExistingIndex(
openaiEmbeddings,
Expand All @@ -38,3 +54,57 @@ export default async function initPinecone(
throw new Error("Failed to initialize Pinecone Client");
}
}

async function initChroma(
collectionName: string,
url: string,
openaiEmbeddings: OpenAIEmbeddings,
): Promise<ChromaCosineSimilarity> {
if (!url) {
throw new Error("Chroma url definition missing");
}
if (!collectionName) {
throw new Error("Chroma collectionName definition missing");
}
try {
const vectorStore = new ChromaCosineSimilarity(
openaiEmbeddings,
{
collectionName,
url,
numDimensions: 1536,
collectionMetadata: {
"hnsw:space": "cosine"
},
}
);
await vectorStore.ensureCollection();
return vectorStore;
} catch (error) {
console.log("error", error);
throw new Error("Failed to initialize Chroma client");
}
}

export default async function initVectorStore(
config: SdkConfig
): Promise<VectorStore> {
const openaiEmbeddings = new OpenAIEmbeddings({
openAIApiKey: config.openai.apikey,
modelName: "text-embedding-ada-002"
});
if ("pinecone" in config.vectorDB) {
return await initPinecone(
config.vectorDB.pinecone.environment,
config.vectorDB.pinecone.apikey,
config.vectorDB.pinecone.index,
openaiEmbeddings
);
} else {
return await initChroma(
config.vectorDB.chroma.collectionName,
config.vectorDB.chroma.url,
openaiEmbeddings
);
}
}
9 changes: 2 additions & 7 deletions javascript-sdk/src/sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
} from "./interface";
import crypto from "crypto";
import { SdkConfig } from "./config";
import initPinecone from "./lib/vectordb";
import initVectorStore from "./lib/vectordb";
import {
callOpenAiToDetectPI,
detectPiUsingVectorDatabase,
Expand Down Expand Up @@ -160,12 +160,7 @@ export default class RebuffSdk implements Rebuff {
if (this.vectorStore) {
return this.vectorStore;
}
this.vectorStore = await initPinecone(
this.sdkConfig.pinecone.environment,
this.sdkConfig.pinecone.apikey,
this.sdkConfig.pinecone.index,
this.sdkConfig.openai.apikey
);
this.vectorStore = await initVectorStore(this.sdkConfig);
return this.vectorStore
}

Expand Down
13 changes: 8 additions & 5 deletions javascript-sdk/tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ const rb = new RebuffSDK({
apikey: getEnvironmentVariable("OPENAI_API_KEY"),
model: "gpt-3.5-turbo",
},
pinecone: {
environment: getEnvironmentVariable("PINECONE_ENVIRONMENT"),
apikey: getEnvironmentVariable("PINECONE_API_KEY"),
index: getEnvironmentVariable("PINECONE_INDEX_NAME"),
},
vectorDB: {
pinecone: {
environment: getEnvironmentVariable("PINECONE_ENVIRONMENT"),
apikey: getEnvironmentVariable("PINECONE_API_KEY"),
index: getEnvironmentVariable("PINECONE_INDEX_NAME"),
}
}
});

const benign_inputs = ["How many customers bought more than 10 items in the last month?",
Expand Down Expand Up @@ -357,3 +359,4 @@ describe("Rebuff API tests", function () {
});
});
});

81 changes: 47 additions & 34 deletions javascript-sdk/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,17 @@
integrity sha512-RsOPImTriV/OE4A9qKjMtk2MnXiuLLbcO3nCXK+kvq4nr0iMfFgpjaX3MPLb6f7+EL1FGSelYvuJMV6REH+ZPQ==

"@types/node-fetch@^2.6.4":
version "2.6.8"
resolved "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.8.tgz"
integrity sha512-nnH5lV9QCMPsbEVdTb5Y+F3GQxLSw1xQgIydrb2gSfEavRPs50FnMr+KUaa+LoPSqibm2N+ZZxH7lavZlAT4GA==
version "2.6.7"
resolved "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.7.tgz"
integrity sha512-lX17GZVpJ/fuCjguZ5b3TjEbSENxmEk1B2z02yoXSK9WMEWRivhdSY73wWMn6bpcCDAOh6qAdktpKHIlkDk2lg==
dependencies:
"@types/node" "*"
form-data "^4.0.0"

"@types/node@*", "@types/node@^18.11.18":
version "18.18.8"
resolved "https://registry.npmjs.org/@types/node/-/node-18.18.8.tgz"
integrity sha512-OLGBaaK5V3VRBS1bAkMVP2/W9B+H8meUfl866OrMNQqt7wDgdpWPp5o6gmIc9pB+lIQHSq4ZL8ypeH1vPxcPaQ==
dependencies:
undici-types "~5.26.4"
version "18.18.6"
resolved "https://registry.npmjs.org/@types/node/-/node-18.18.6.tgz"
integrity sha512-wf3Vz+jCmOQ2HV1YUJuCWdL64adYxumkrxtc+H1VUQlnQI04+5HtH+qZCOE21lBE7gIrt+CwX2Wv8Acrw5Ak6w==

"@types/[email protected]":
version "0.12.0"
Expand Down Expand Up @@ -143,14 +141,7 @@ ansi-regex@^5.0.1:
resolved "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz"
integrity sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==

ansi-styles@^4.0.0:
version "4.3.0"
resolved "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz"
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
dependencies:
color-convert "^2.0.1"

ansi-styles@^4.1.0:
ansi-styles@^4.0.0, ansi-styles@^4.1.0:
version "4.3.0"
resolved "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz"
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
Expand Down Expand Up @@ -302,6 +293,13 @@ [email protected]:
optionalDependencies:
fsevents "~2.3.2"

chromadb@*, chromadb@^1.5.6:
version "1.5.11"
resolved "https://registry.npmjs.org/chromadb/-/chromadb-1.5.11.tgz"
integrity sha512-mIg0DtWZFsf4hl05orRixIw3lBYKUc4AGydbUQVwd7sjwaCsgP1j0qHwYxoopj2H1L7yc8OUyFKhi79MoFMoMQ==
dependencies:
isomorphic-fetch "^3.0.0"

cliui@^7.0.2:
version "7.0.4"
resolved "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz"
Expand Down Expand Up @@ -624,6 +622,14 @@ is-unicode-supported@^0.1.0:
resolved "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz"
integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==

isomorphic-fetch@^3.0.0:
version "3.0.0"
resolved "https://registry.npmjs.org/isomorphic-fetch/-/isomorphic-fetch-3.0.0.tgz"
integrity sha512-qvUtwJ3j6qwsF3jLxkZ72qCgjMysPzDfeV240JHiGZsANBYd+EEuu35v7dfrJ9Up0Ak07D7GGSkGhCHTqg/5wA==
dependencies:
node-fetch "^2.6.1"
whatwg-fetch "^3.4.1"

js-tiktoken@^1.0.7:
version "1.0.7"
resolved "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.7.tgz"
Expand All @@ -643,10 +649,10 @@ jsonpointer@^5.0.1:
resolved "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz"
integrity sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==

langchain@^0.0.146:
version "0.0.146"
resolved "https://registry.npmjs.org/langchain/-/langchain-0.0.146.tgz"
integrity sha512-WPkvlYNBH1cqPH0/mcDzfNjht5VJSD7dJMHCjhagLneVD7v69yMiNNkrme+DKHOqnPBbNUHDmlXGLAnTs4gpDQ==
langchain@^0.0.159:
version "0.0.159"
resolved "https://registry.npmjs.org/langchain/-/langchain-0.0.159.tgz"
integrity sha512-+etcFu4QiFkYsGPMtWv3ggBU4LSQxVi/0h648Z7RippqaOuqlIm1lV+U+HCaJOLuMVF4ddEyXnxRVFug4JMkMg==
dependencies:
"@anthropic-ai/sdk" "^0.6.2"
ansi-styles "^5.0.0"
Expand All @@ -668,7 +674,7 @@ langchain@^0.0.146:
p-retry "4"
uuid "^9.0.0"
yaml "^2.2.1"
zod "^3.21.4"
zod "^3.22.3"
zod-to-json-schema "^3.20.4"

langchainhub@~0.0.6:
Expand All @@ -677,9 +683,9 @@ langchainhub@~0.0.6:
integrity sha512-SW6105T+YP1cTe0yMf//7kyshCgvCTyFBMTgH2H3s9rTAR4e+78DA/BBrUL/Mt4Q5eMWui7iGuAYb3pgGsdQ9w==

langsmith@~0.0.31:
version "0.0.48"
resolved "https://registry.npmjs.org/langsmith/-/langsmith-0.0.48.tgz"
integrity sha512-s0hW8iZ90Q9XLTnDK0Pgee245URV3b1cXQjPDj5OKm1+KN7iSK1pKx+4CO7RcFLz58Ixe7Mt+mVcomYqUuryxQ==
version "0.0.44"
resolved "https://registry.npmjs.org/langsmith/-/langsmith-0.0.44.tgz"
integrity sha512-y0K3g6Lua7bvwxLB1Wn8kJR1LYp6cumRg8xX8TooLBmvK8srfv/tyCtlhZ+w1daOQmm1otVMKQ7CHJf83f9ryQ==
dependencies:
"@types/uuid" "^9.0.1"
commander "^10.0.1"
Expand Down Expand Up @@ -832,6 +838,13 @@ node-domexception@^1.0.0, [email protected]:
resolved "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz"
integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==

node-fetch@^2.6.1:
version "2.7.0"
resolved "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz"
integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==
dependencies:
whatwg-url "^5.0.0"

node-fetch@^2.6.12:
version "2.7.0"
resolved "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz"
Expand Down Expand Up @@ -877,7 +890,7 @@ once@^1.3.0:
dependencies:
wrappy "1"

openai@^3.2.1:
"openai@^3.0.0 || ^4.0.0", openai@^3.2.1:
version "3.3.0"
resolved "https://registry.npmjs.org/openai/-/openai-3.3.0.tgz"
integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
Expand Down Expand Up @@ -1083,11 +1096,6 @@ typescript@>=2.7:
resolved "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz"
integrity sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==

undici-types@~5.26.4:
version "5.26.5"
resolved "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz"
integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==

uuid@^9.0.0:
version "9.0.1"
resolved "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz"
Expand All @@ -1113,6 +1121,11 @@ webidl-conversions@^3.0.0:
resolved "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz"
integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==

whatwg-fetch@^3.4.1:
version "3.6.19"
resolved "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.19.tgz"
integrity sha512-d67JP4dHSbm2TrpFj8AbO8DnL1JXL5J9u0Kq2xW6d0TFDbCA3Muhdt8orXC22utleTVj7Prqt82baN6RBvnEgw==

whatwg-url@^5.0.0:
version "5.0.0"
resolved "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz"
Expand Down Expand Up @@ -1146,9 +1159,9 @@ y18n@^5.0.5:
integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==

yaml@^2.2.1:
version "2.3.4"
resolved "https://registry.npmjs.org/yaml/-/yaml-2.3.4.tgz"
integrity sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==
version "2.3.3"
resolved "https://registry.npmjs.org/yaml/-/yaml-2.3.3.tgz"
integrity sha512-zw0VAJxgeZ6+++/su5AFoqBbZbrEakwu+X0M5HmcwUiBL7AzcuPKjj5we4xfQLp78LkEMpD0cOnUhmgOVy3KdQ==

yargs-parser@^20.2.2, [email protected]:
version "20.2.4"
Expand Down Expand Up @@ -1193,7 +1206,7 @@ zod-to-json-schema@^3.20.4:
resolved "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.4.tgz"
integrity sha512-fjUZh4nQ1s6HMccgIeE0VP4QG/YRGPmyjO9sAh890aQKPEk3nqbfUXhMFaC+Dr5KvYBm8BCyvfpZf2jY9aGSsw==

zod@^3.21.4:
zod@^3.21.4, zod@^3.22.3:
version "3.22.4"
resolved "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz"
integrity sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==
Loading