Skip to content

Commit

Permalink
Add Chroma support for vector db
Browse files Browse the repository at this point in the history
  • Loading branch information
Risto McGehee committed Oct 29, 2023
1 parent 90630cb commit e343f94
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 234 deletions.
3 changes: 2 additions & 1 deletion javascript-sdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
"dependencies": {
"@pinecone-database/pinecone": "^0.1.6",
"chai": "^4.3.7",
"chromadb": "^1.5.6",
"crypto": "^1.0.1",
"langchain": "^0.0.146",
"langchain": "^0.0.159",
"node-fetch": "^3.3.1",
"openai": "^3.2.1",
"string-similarity": "^4.0.4"
Expand Down
13 changes: 11 additions & 2 deletions javascript-sdk/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,25 @@ export interface ApiConfig {
apiUrl?: string;
}

export interface SdkConfig {
export type VectorDbConfig = {
pinecone: {
apikey: string;
environment: string;
index: string;
};
} | {
chroma: {
url: string;
collectionName: string;
};
};

export interface SdkConfig {
vectorDB: VectorDbConfig
openai: {
apikey: string;
model: string;
};
}
};

export type RebuffConfig = ApiConfig | SdkConfig;
6 changes: 6 additions & 0 deletions javascript-sdk/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
export { default as RebuffApi } from "./api";
export { default as RebuffSdk } from "./sdk";
export {
ApiConfig,
SdkConfig,
RebuffConfig,
VectorDbConfig,
} from "./config";
84 changes: 77 additions & 7 deletions javascript-sdk/src/lib/vectordb.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,47 @@
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { PineconeClient } from "@pinecone-database/pinecone";
import { PineconeStore } from "langchain/vectorstores/pinecone";
import { VectorStore } from "langchain/vectorstores/base";
import { PineconeClient } from "@pinecone-database/pinecone";
import { Chroma } from "langchain/vectorstores/chroma";
import { Document } from "langchain/document.js";
import { SdkConfig } from "../config";

// Our code expects a similarity score where similar vectors are close to 1, but Chroma returns a distance score
// where similar vectors are close to 0. Note that this class may not work if using a distance metric other than
// cosine.
class ChromaCosineSimilarity extends Chroma {
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document<Record<string, any>>, number][]> {
const results = await super.similaritySearchVectorWithScore(query, k, filter);
return results.map(([id, score]) => [id, 1 - score]);
}
}

export default async function initPinecone(
async function initPinecone(
environment: string,
apiKey: string,
index: string,
openaiApiKey: string,
openaiEmbeddings: OpenAIEmbeddings,
): Promise<PineconeStore> {
if (!environment) {
throw new Error("Pinecone environment definition missing");
}
if (!apiKey) {
throw new Error("Pinecone apikey definition missing");
}
if (!index) {
throw new Error("Pinecone index definition missing");
}
try {
const pinecone = new PineconeClient();

await pinecone.init({
environment,
apiKey,
});
const openaiEmbeddings = new OpenAIEmbeddings({
openAIApiKey: openaiApiKey,
modelName: "text-embedding-ada-002"
});
const pineconeIndex = pinecone.Index(index);
const vectorStore = await PineconeStore.fromExistingIndex(
openaiEmbeddings,
Expand All @@ -38,3 +54,57 @@ export default async function initPinecone(
throw new Error("Failed to initialize Pinecone Client");
}
}

async function initChroma(
collectionName: string,
url: string,
openaiEmbeddings: OpenAIEmbeddings,
): Promise<ChromaCosineSimilarity> {
if (!url) {
throw new Error("Chroma url definition missing");
}
if (!collectionName) {
throw new Error("Chroma collectionName definition missing");
}
try {
const vectorStore = new ChromaCosineSimilarity(
openaiEmbeddings,
{
collectionName,
url,
numDimensions: 1536,
collectionMetadata: {
"hnsw:space": "cosine"
},
}
);
await vectorStore.ensureCollection();
return vectorStore;
} catch (error) {
console.log("error", error);
throw new Error("Failed to initialize Chroma client");
}
}

export default async function initVectorStore(
config: SdkConfig
): Promise<VectorStore> {
const openaiEmbeddings = new OpenAIEmbeddings({
openAIApiKey: config.openai.apikey,
modelName: "text-embedding-ada-002"
});
if ("pinecone" in config.vectorDB) {
return await initPinecone(
config.vectorDB.pinecone.environment,
config.vectorDB.pinecone.apikey,
config.vectorDB.pinecone.index,
openaiEmbeddings
);
} else {
return await initChroma(
config.vectorDB.chroma.collectionName,
config.vectorDB.chroma.url,
openaiEmbeddings
);
}
}
9 changes: 2 additions & 7 deletions javascript-sdk/src/sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
} from "./interface";
import crypto from "crypto";
import { SdkConfig } from "./config";
import initPinecone from "./lib/vectordb";
import initVectorStore from "./lib/vectordb";
import {
callOpenAiToDetectPI,
detectPiUsingVectorDatabase,
Expand Down Expand Up @@ -160,12 +160,7 @@ export default class RebuffSdk implements Rebuff {
if (this.vectorStore) {
return this.vectorStore;
}
this.vectorStore = await initPinecone(
this.sdkConfig.pinecone.environment,
this.sdkConfig.pinecone.apikey,
this.sdkConfig.pinecone.index,
this.sdkConfig.openai.apikey
);
this.vectorStore = await initVectorStore(this.sdkConfig);
return this.vectorStore
}

Expand Down
Loading

0 comments on commit e343f94

Please sign in to comment.