From d369d85a160a75e83c1c98712e26346d90053256 Mon Sep 17 00:00:00 2001 From: Fei-Guo Date: Tue, 17 Sep 2024 10:41:21 -0700 Subject: [PATCH 01/42] feat: Add RAGEngine CRD --- api/v1alpha1/ragengine_types.go | 109 +++++++++ api/v1alpha1/zz_generated.deepcopy.go | 205 +++++++++++++++++ config/crd/bases/kaito.sh_ragengines.yaml | 269 ++++++++++++++++++++++ presets/models/falcon/model.go | 4 +- presets/models/mistral/model.go | 4 +- presets/models/phi2/model.go | 4 +- presets/models/phi3/model.go | 4 +- 7 files changed, 591 insertions(+), 8 deletions(-) create mode 100644 api/v1alpha1/ragengine_types.go create mode 100644 config/crd/bases/kaito.sh_ragengines.yaml diff --git a/api/v1alpha1/ragengine_types.go b/api/v1alpha1/ragengine_types.go new file mode 100644 index 000000000..a5d35205e --- /dev/null +++ b/api/v1alpha1/ragengine_types.go @@ -0,0 +1,109 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type StorageSpec struct { + //TODO: add vendor specific APIs for accessing vector DB services here. +} + +type RemoteEmbeddingSpec struct { + // URL points to a publicly available embedding service, such as OpenAI. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type LocalEmbeddingSpec struct { + // Image is the name of the containerized embedding model image. + // +optional + Image string `json:"image,omitempty"` + // +optional + ImagePullSecret string `json:"imagePullSecret,omitempty"` + // ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5. + // When this field is specified, the RAG engine will download the embedding model + // from huggingface repository during startup. The embedding model will not persist in local storage. + // Note that if Image is specified, ModelID should not be specified and vice versa. + // +optional + ModelID string `json:"modelID,omitempty"` + // ModelAccessSecret is the name of the secret that contains the huggingface access token. + // +optional + ModelAccessSecret string `json:"modelAccessSecret,omitempty"` +} + +type EmbeddingSpec struct { + // Remote specifies how to generate embeddings for index data using a remote service. + // Note that either Remote or Local needs to be specified, not both. + // +optional + Remote *RemoteEmbeddingSpec `json:"remote,omitempty"` + // Local specifies how to generate embeddings for index data using a model run locally. + // +optional + Local *LocalEmbeddingSpec `json:"local,omitempty"` +} + +type InferenceServiceSpec struct { + // URL points to a running inference service endpoint which accepts http(s) payload. + URL string `json:"url"` + // AccessSecret is the name of the secret that contains the service access token. + // +optional + AccessSecret string `json:"accessSecret,omitempty"` +} + +type RAGEngineSpec struct { + // Compute specifies the dedicated GPU resource used by an embedding model running locally if required. + // +optional + Compute *ResourceSpec `json:"compute,omitempty"` + // Storage specifies how to access the vector database used to save the embedding vectors. + // If this field is not specified, by default, an in-memory vector DB will be used. + // The data will not be persisted. + // +optional + Storage *StorageSpec `json:"storage,omitempty"` + // Embedding specifies whether the RAG engine generates embedding vectors using a remote service + // or using a embedding model running locally. + Embedding *EmbeddingSpec `json:"embedding"` + InferenceService *InferenceServiceSpec `json:"inferenceService"` + // QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + // inference service. If not specified, a default service name will be created by the RAG engine. + // +optional + QueryServiceName string `json:"queryServiceName,omitempty"` + // IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + // to generate embeddings. If not specified, a default service name will be created by the RAG engine. + // +optional + IndexServiceName string `json:"indexServiceName,omitempty"` +} + +// RAGEngineStatus defines the observed state of RAGEngine +type RAGEngineStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// RAGEngine is the Schema for the ragengine API +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine +// +kubebuilder:storageversion +type RAGEngine struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec *RAGEngineSpec `json:"spec,omitempty"` + + Status RAGEngineStatus `json:"status,omitempty"` +} + +// RAGEngineList contains a list of RAGEngine +// +kubebuilder:object:root=true +type RAGEngineList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []RAGEngine `json:"items"` +} + +func init() { + SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 4a0517171..ef55fed6a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -104,6 +104,31 @@ func (in *DataSource) DeepCopy() *DataSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingSpec) DeepCopyInto(out *EmbeddingSpec) { + *out = *in + if in.Remote != nil { + in, out := &in.Remote, &out.Remote + *out = new(RemoteEmbeddingSpec) + **out = **in + } + if in.Local != nil { + in, out := &in.Local, &out.Local + *out = new(LocalEmbeddingSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingSpec. +func (in *EmbeddingSpec) DeepCopy() *EmbeddingSpec { + if in == nil { + return nil + } + out := new(EmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPUConfig) DeepCopyInto(out *GPUConfig) { *out = *in @@ -124,6 +149,21 @@ func (in *GPUConfig) DeepCopy() *GPUConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec. +func (in *InferenceServiceSpec) DeepCopy() *InferenceServiceSpec { + if in == nil { + return nil + } + out := new(InferenceServiceSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceSpec) DeepCopyInto(out *InferenceSpec) { *out = *in @@ -156,6 +196,21 @@ func (in *InferenceSpec) DeepCopy() *InferenceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LocalEmbeddingSpec) DeepCopyInto(out *LocalEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalEmbeddingSpec. +func (in *LocalEmbeddingSpec) DeepCopy() *LocalEmbeddingSpec { + if in == nil { + return nil + } + out := new(LocalEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PresetMeta) DeepCopyInto(out *PresetMeta) { *out = *in @@ -208,6 +263,141 @@ func (in *PresetSpec) DeepCopy() *PresetSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngine) DeepCopyInto(out *RAGEngine) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.Spec != nil { + in, out := &in.Spec, &out.Spec + *out = new(RAGEngineSpec) + (*in).DeepCopyInto(*out) + } + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngine. +func (in *RAGEngine) DeepCopy() *RAGEngine { + if in == nil { + return nil + } + out := new(RAGEngine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineList) DeepCopyInto(out *RAGEngineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]RAGEngine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineList. +func (in *RAGEngineList) DeepCopy() *RAGEngineList { + if in == nil { + return nil + } + out := new(RAGEngineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *RAGEngineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineSpec) DeepCopyInto(out *RAGEngineSpec) { + *out = *in + if in.Compute != nil { + in, out := &in.Compute, &out.Compute + *out = new(ResourceSpec) + (*in).DeepCopyInto(*out) + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + *out = new(StorageSpec) + **out = **in + } + if in.Embedding != nil { + in, out := &in.Embedding, &out.Embedding + *out = new(EmbeddingSpec) + (*in).DeepCopyInto(*out) + } + if in.InferenceService != nil { + in, out := &in.InferenceService, &out.InferenceService + *out = new(InferenceServiceSpec) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineSpec. +func (in *RAGEngineSpec) DeepCopy() *RAGEngineSpec { + if in == nil { + return nil + } + out := new(RAGEngineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RAGEngineStatus) DeepCopyInto(out *RAGEngineStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineStatus. +func (in *RAGEngineStatus) DeepCopy() *RAGEngineStatus { + if in == nil { + return nil + } + out := new(RAGEngineStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemoteEmbeddingSpec) DeepCopyInto(out *RemoteEmbeddingSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteEmbeddingSpec. +func (in *RemoteEmbeddingSpec) DeepCopy() *RemoteEmbeddingSpec { + if in == nil { + return nil + } + out := new(RemoteEmbeddingSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) { *out = *in @@ -238,6 +428,21 @@ func (in *ResourceSpec) DeepCopy() *ResourceSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StorageSpec) DeepCopyInto(out *StorageSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec. +func (in *StorageSpec) DeepCopy() *StorageSpec { + if in == nil { + return nil + } + out := new(StorageSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TrainingConfig) DeepCopyInto(out *TrainingConfig) { *out = *in diff --git a/config/crd/bases/kaito.sh_ragengines.yaml b/config/crd/bases/kaito.sh_ragengines.yaml new file mode 100644 index 000000000..7b1ec3f55 --- /dev/null +++ b/config/crd/bases/kaito.sh_ragengines.yaml @@ -0,0 +1,269 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: ragengines.kaito.sh +spec: + group: kaito.sh + names: + categories: + - ragengine + kind: RAGEngine + listKind: RAGEngineList + plural: ragengines + singular: ragengine + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: RAGEngine is the Schema for the ragengine API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + compute: + description: Compute specifies the dedicated GPU resource used by + an embedding model running locally if required. + properties: + count: + default: 1 + description: Count is the required number of GPU nodes. + type: integer + instanceType: + default: Standard_NC12s_v3 + description: |- + InstanceType specifies the GPU node SKU. + This field defaults to "Standard_NC12s_v3" if not specified. + type: string + labelSelector: + description: LabelSelector specifies the required labels for the + GPU nodes. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + preferredNodes: + description: |- + PreferredNodes is an optional node list specified by the user. + If a node in the list does not have the required labels or + the required instanceType, it will be ignored. + items: + type: string + type: array + required: + - labelSelector + type: object + embedding: + description: |- + Embedding specifies whether the RAG engine generates embedding vectors using a remote service + or using a embedding model running locally. + properties: + local: + description: Local specifies how to generate embeddings for index + data using a model run locally. + properties: + image: + description: Image is the name of the containerized embedding + model image. + type: string + imagePullSecret: + type: string + modelAccessSecret: + description: ModelAccessSecret is the name of the secret that + contains the huggingface access token. + type: string + modelID: + description: |- + ModelID is the ID of the embedding model hosted by huggingface. + When this field is specified, the RAG engine will download the embedding model + from huggingface repository during startup. The embedding model will not persist in local storage. + Note that if Image is specified, ModelID should not be specified and vice versa. + type: string + type: object + remote: + description: |- + Remote specifies how to generate embeddings for index data using a remote service. + Note that either Remote or Local needs to be specified, not both. + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a publicly available embedding + service, such as OpenAI. + type: string + required: + - url + type: object + type: object + indexServiceName: + description: |- + IndexServiceName is the name of the service which exposes the endpoint for user to input the index data + to generate embeddings. If not specified, a default service name will be created by the RAG engine. + type: string + inferencService: + properties: + accessSecret: + description: AccessSecret is the name of the secret that contains + the service access token. + type: string + url: + description: URL points to a running inference service endpoint + which accepts http(s) payload. + type: string + required: + - url + type: object + queryServiceName: + description: |- + QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the + inference service. If not specified, a default service name will be created by the RAG engine. + type: string + storage: + description: |- + Storage specifies how to access the vector database used to save the embedding vectors. + If this field is not specified, by default, an in-memoty vector DB will be used. + The data will not be persisted. + type: object + required: + - embedding + - inferencService + type: object + status: + description: RAGEngineStatus defines the observed state of RAGEngine + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go index a94fa81f2..74c39995f 100644 --- a/presets/models/falcon/model.go +++ b/presets/models/falcon/model.go @@ -45,8 +45,8 @@ var ( } baseCommandPresetFalconInference = "accelerate launch" - baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" - falconRunParams = map[string]string{ + baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch" + falconRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/mistral/model.go b/presets/models/mistral/model.go index ebab6fbe9..b4581d6f1 100644 --- a/presets/models/mistral/model.go +++ b/presets/models/mistral/model.go @@ -32,8 +32,8 @@ var ( } baseCommandPresetMistralInference = "accelerate launch" - baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" - mistralRunParams = map[string]string{ + baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch" + mistralRunParams = map[string]string{ "torch_dtype": "bfloat16", "pipeline": "text-generation", } diff --git a/presets/models/phi2/model.go b/presets/models/phi2/model.go index 731043f11..07fb8e0d2 100644 --- a/presets/models/phi2/model.go +++ b/presets/models/phi2/model.go @@ -26,8 +26,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "float16", "pipeline": "text-generation", } diff --git a/presets/models/phi3/model.go b/presets/models/phi3/model.go index c645b99e5..5656fc15a 100644 --- a/presets/models/phi3/model.go +++ b/presets/models/phi3/model.go @@ -44,8 +44,8 @@ var ( } baseCommandPresetPhiInference = "accelerate launch" - baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" - phiRunParams = map[string]string{ + baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch" + phiRunParams = map[string]string{ "torch_dtype": "auto", "pipeline": "text-generation", "trust_remote_code": "", From 47c1ce6a030ce6d5c95eb5e16c10977fa0af3c40 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 20 Sep 2024 01:44:31 -0500 Subject: [PATCH 02/42] feat: New RAG Service Signed-off-by: ishaansehgal99 --- presets/rag_service/__init__.py | 0 presets/rag_service/config.py | 10 +++ presets/rag_service/crud/__init__.py | 0 presets/rag_service/crud/operations.py | 39 +++++++++ presets/rag_service/embedding/__init__.py | 0 presets/rag_service/embedding/base.py | 7 ++ .../embedding/huggingface_local.py | 11 +++ .../embedding/huggingface_remote.py | 12 +++ presets/rag_service/main.py | 81 +++++++++++++++++++ presets/rag_service/models.py | 29 +++++++ presets/rag_service/vector_store/__init__.py | 0 presets/rag_service/vector_store/base.py | 42 ++++++++++ .../rag_service/vector_store/faiss_store.py | 61 ++++++++++++++ 13 files changed, 292 insertions(+) create mode 100644 presets/rag_service/__init__.py create mode 100644 presets/rag_service/config.py create mode 100644 presets/rag_service/crud/__init__.py create mode 100644 presets/rag_service/crud/operations.py create mode 100644 presets/rag_service/embedding/__init__.py create mode 100644 presets/rag_service/embedding/base.py create mode 100644 presets/rag_service/embedding/huggingface_local.py create mode 100644 presets/rag_service/embedding/huggingface_remote.py create mode 100644 presets/rag_service/main.py create mode 100644 presets/rag_service/models.py create mode 100644 presets/rag_service/vector_store/__init__.py create mode 100644 presets/rag_service/vector_store/base.py create mode 100644 presets/rag_service/vector_store/faiss_store.py diff --git a/presets/rag_service/__init__.py b/presets/rag_service/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py new file mode 100644 index 000000000..e5086fed0 --- /dev/null +++ b/presets/rag_service/config.py @@ -0,0 +1,10 @@ +# config.py +import os + +EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local") +EMBEDDING_URL = os.getenv("EMBEDDING_URL") +MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5") +VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss") +INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service") +ACCESS_SECRET = os.getenv("ACCESS_SECRET") +PERSIST_DIR = "./storage" \ No newline at end of file diff --git a/presets/rag_service/crud/__init__.py b/presets/rag_service/crud/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py new file mode 100644 index 000000000..9a5003de7 --- /dev/null +++ b/presets/rag_service/crud/operations.py @@ -0,0 +1,39 @@ +from typing import Dict, List + +from models import Document +from vector_store.base import BaseVectorStore + + +class RAGOperations: + def __init__(self, vector_store: BaseVectorStore): + self.vector_store = vector_store + + def create(self, documents: List[Document]) -> List[str]: + return self.vector_store.index_documents(documents) + + def read(self, query: str, top_k: int): + return self.vector_store.query(query, top_k) + + def update(self, documents: List[Document]) -> Dict[str, List[str]]: + updated_docs = [] + new_docs = [] + for doc in documents: + if doc.doc_id and self.vector_store.document_exists(doc.doc_id): + self.vector_store.update_document(doc) + updated_docs.append(doc.doc_id) + else: + self.vector_store.add_document(doc) + new_docs.extend(doc.doc_id) + return {"updated": updated_docs, "inserted": new_docs} + + def delete(self, doc_id: str): + return self.vector_store.delete_document(doc_id) + + def get(self, doc_id: str) -> Document: + return self.vector_store.get_document(doc_id) + + def list_all(self) -> Dict[str, Document]: + return self.vector_store.list_documents() + + def refresh(self, documents: List[Document]) -> List[bool]: + return self.vector_store.refresh_documents(documents) diff --git a/presets/rag_service/embedding/__init__.py b/presets/rag_service/embedding/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/embedding/base.py b/presets/rag_service/embedding/base.py new file mode 100644 index 000000000..ba5a8573e --- /dev/null +++ b/presets/rag_service/embedding/base.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class BaseEmbeddingModel(ABC): + @abstractmethod + def get_text_embedding(self, text: str): + pass \ No newline at end of file diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py new file mode 100644 index 000000000..be380a8d5 --- /dev/null +++ b/presets/rag_service/embedding/huggingface_local.py @@ -0,0 +1,11 @@ +from llama_index.embeddings.huggingface import HuggingFaceEmbedding + +from .base import BaseEmbeddingModel + + +class LocalHuggingFaceEmbedding(BaseEmbeddingModel): + def __init__(self, model_name: str): + self.model = HuggingFaceEmbedding(model_name=model_name) + + def get_text_embedding(self, text: str): + return self.model.get_text_embedding(text) diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py new file mode 100644 index 000000000..c3314ccb6 --- /dev/null +++ b/presets/rag_service/embedding/huggingface_remote.py @@ -0,0 +1,12 @@ +from llama_index.embeddings.huggingface_api import \ + HuggingFaceInferenceAPIEmbedding + +from .base import BaseEmbeddingModel + + +class RemoteHuggingFaceEmbedding(BaseEmbeddingModel): + def __init__(self, model_name: str, api_key: str): + self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key) + + def get_text_embedding(self, text: str): + return self.model.get_text_embedding(text) diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py new file mode 100644 index 000000000..953926cdf --- /dev/null +++ b/presets/rag_service/main.py @@ -0,0 +1,81 @@ +from typing import Dict, List + +from crud.operations import RAGOperations +from embedding import get_embedding_model +from fastapi import FastAPI, HTTPException +from models import (DocumentResponse, IndexRequest, ListDocumentsResponse, + QueryRequest, RefreshRequest, UpdateRequest) +from vector_store.faiss_store import FaissVectorStoreManager + +from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID + +app = FastAPI() + +# Initialize embedding model +embed_model = get_embedding_model(EMBEDDING_TYPE, MODEL_ID, ACCESS_SECRET) + +# Initialize vector store +vector_store = FaissVectorStoreManager(dimension=384, embed_model=embed_model) + +# Initialize RAG operations +rag_ops = RAGOperations(vector_store) + +@app.post("/index", response_model=List[str]) +async def index_documents(request: IndexRequest): + try: + doc_ids = rag_ops.create(request.documents) + return doc_ids + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/query") +async def query_index(request: QueryRequest): + try: + response = rag_ops.read(request.query, request.top_k) + return {"response": str(response)} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.put("/update", response_model=Dict[str, List[str]]) +async def update_documents(request: UpdateRequest): + try: + result = rag_ops.update(request.documents) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/refresh", response_model=List[bool]) +async def refresh_documents(request: RefreshRequest): + try: + result = rag_ops.refresh(request.documents) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/document/{doc_id}") +async def delete_document(doc_id: str): + try: + rag_ops.delete(doc_id) + return {"message": "Document deleted successfully"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/document/{doc_id}", response_model=DocumentResponse) +async def get_document(doc_id: str): + try: + document = rag_ops.get(doc_id) + return DocumentResponse(doc_id=doc_id, document=document) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/documents", response_model=ListDocumentsResponse) +async def list_documents(): + try: + documents = rag_ops.list_all() + return ListDocumentsResponse(documents=documents) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/presets/rag_service/models.py b/presets/rag_service/models.py new file mode 100644 index 000000000..a1d21537b --- /dev/null +++ b/presets/rag_service/models.py @@ -0,0 +1,29 @@ +from typing import Dict, List, Optional + +from pydantic import BaseModel + + +class Document(BaseModel): + text: str + metadata: Optional[dict] = {} + doc_id: Optional[str] = None + +class IndexRequest(BaseModel): + documents: List[Document] + +class QueryRequest(BaseModel): + query: str + top_k: int = 10 + +class UpdateRequest(BaseModel): + documents: List[Document] + +class RefreshRequest(BaseModel): + documents: List[Document] + +class DocumentResponse(BaseModel): + doc_id: str + document: Document + +class ListDocumentsResponse(BaseModel): + documents: Dict[str, Document] \ No newline at end of file diff --git a/presets/rag_service/vector_store/__init__.py b/presets/rag_service/vector_store/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py new file mode 100644 index 000000000..b448bc213 --- /dev/null +++ b/presets/rag_service/vector_store/base.py @@ -0,0 +1,42 @@ +from abc import ABC, abstractmethod +from typing import Dict, List + +from models import Document + + +class BaseVectorStore(ABC): + @abstractmethod + def index_documents(self, documents: List[Document]) -> List[str]: + pass + + @abstractmethod + def query(self, query: str, top_k: int): + pass + + @abstractmethod + def add_document(self, document: Document): + pass + + @abstractmethod + def delete_document(self, doc_id: str): + pass + + @abstractmethod + def update_document(self, document: Document) -> str: + pass + + @abstractmethod + def get_document(self, doc_id: str) -> Document: + pass + + @abstractmethod + def list_documents(self) -> Dict[str, Document]: + pass + + @abstractmethod + def document_exists(self, doc_id: str) -> bool: + pass + + @abstractmethod + def refresh_documents(self, documents: List[Document]) -> List[bool]: + pass \ No newline at end of file diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py new file mode 100644 index 000000000..6024f024d --- /dev/null +++ b/presets/rag_service/vector_store/faiss_store.py @@ -0,0 +1,61 @@ +import os + +import faiss +from llama_index.core import Document as LlamaDocument +from llama_index.core import StorageContext, VectorStoreIndex +from llama_index.vector_stores.faiss import FaissVectorStore +from models import Document + +from config import PERSIST_DIR + +from .base import BaseVectorStore + + +class FaissVectorStoreManager(BaseVectorStore): + def __init__(self, dimension: int, embed_model): + self.dimension = dimension + self.embed_model = embed_model + self.faiss_index = faiss.IndexFlatL2(self.dimension) + self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) + self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + + if not os.path.exists(PERSIST_DIR): + os.makedirs(PERSIST_DIR) + + def index_documents(self, documents: List[Document]): + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] + index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) + self.storage_context.persist(persist_dir=PERSIST_DIR) + return index + + def query(self, query: str, top_k: int): + index = self._load_index() + query_engine = index.as_query_engine(top_k=top_k) + return query_engine.query(query) + + def add_document(self, document: Document): + index = self._load_index() + index.insert(document) + + def delete_document(self, doc_id: str): + index = self._load_index() + index.delete_ref_doc(doc_id, delete_from_docstore=True) + self.storage_context.persist(persist_dir=PERSIST_DIR) + + def update_document(self, document: Document): + index = self._load_index() + llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) + index.update_ref_doc(llama_doc) + self.storage_context.persist(persist_dir=PERSIST_DIR) + + def get_document(self, doc_id: str): + index = self._load_index() + doc = index.docstore.get_document(doc_id) + if not doc: + raise ValueError(f"Document with ID {doc_id} not found.") + return doc + + def _load_index(self): + vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR) + storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR) + return VectorStoreIndex.from_storage(storage_context) From c8bfa185f1a26a588c24a56419834ecfd6ed9eb1 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 20 Sep 2024 02:06:30 -0500 Subject: [PATCH 03/42] feat: New RAG Service Signed-off-by: ishaansehgal99 --- presets/rag_service/crud/operations.py | 11 ++++- presets/rag_service/vector_store/base.py | 8 ++++ .../rag_service/vector_store/faiss_store.py | 45 ++++++++++++++++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py index 9a5003de7..e5c670e0e 100644 --- a/presets/rag_service/crud/operations.py +++ b/presets/rag_service/crud/operations.py @@ -9,12 +9,15 @@ def __init__(self, vector_store: BaseVectorStore): self.vector_store = vector_store def create(self, documents: List[Document]) -> List[str]: + """Index new documents.""" return self.vector_store.index_documents(documents) def read(self, query: str, top_k: int): + """Query the indexed documents.""" return self.vector_store.query(query, top_k) def update(self, documents: List[Document]) -> Dict[str, List[str]]: + """Update existing documents, or insert new ones if they don’t exist.""" updated_docs = [] new_docs = [] for doc in documents: @@ -22,18 +25,22 @@ def update(self, documents: List[Document]) -> Dict[str, List[str]]: self.vector_store.update_document(doc) updated_docs.append(doc.doc_id) else: - self.vector_store.add_document(doc) - new_docs.extend(doc.doc_id) + self.vector_store.add_document(doc) # Only inserts new document, no reindex + new_docs.append(doc.doc_id) return {"updated": updated_docs, "inserted": new_docs} def delete(self, doc_id: str): + """Delete a document by ID.""" return self.vector_store.delete_document(doc_id) def get(self, doc_id: str) -> Document: + """Retrieve a document by ID.""" return self.vector_store.get_document(doc_id) def list_all(self) -> Dict[str, Document]: + """List all documents.""" return self.vector_store.list_documents() def refresh(self, documents: List[Document]) -> List[bool]: + """Dummy method for refresh, if needed.""" return self.vector_store.refresh_documents(documents) diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py index b448bc213..b791bb7e6 100644 --- a/presets/rag_service/vector_store/base.py +++ b/presets/rag_service/vector_store/base.py @@ -39,4 +39,12 @@ def document_exists(self, doc_id: str) -> bool: @abstractmethod def refresh_documents(self, documents: List[Document]) -> List[bool]: + pass + + @abstractmethod + def list_documents(self) -> Dict[str, Document]: + pass + + @abstractmethod + def document_exists(self, doc_id: str) -> bool: pass \ No newline at end of file diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index 6024f024d..b7c274345 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -1,4 +1,5 @@ import os +from typing import Dict, List import faiss from llama_index.core import Document as LlamaDocument @@ -18,44 +19,76 @@ def __init__(self, dimension: int, embed_model): self.faiss_index = faiss.IndexFlatL2(self.dimension) self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - + if not os.path.exists(PERSIST_DIR): os.makedirs(PERSIST_DIR) def index_documents(self, documents: List[Document]): + """Indexes new documents.""" llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) self.storage_context.persist(persist_dir=PERSIST_DIR) - return index + return [doc.doc_id for doc in documents] + + def add_document(self, document: Document): + """Inserts a single document into the existing FAISS index.""" + llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) + index = self._load_index() + index.insert(llama_doc) + self.storage_context.persist(persist_dir=PERSIST_DIR) def query(self, query: str, top_k: int): + """Queries the FAISS vector store.""" index = self._load_index() query_engine = index.as_query_engine(top_k=top_k) return query_engine.query(query) - - def add_document(self, document: Document): - index = self._load_index() - index.insert(document) def delete_document(self, doc_id: str): + """Deletes a document from the FAISS vector store.""" index = self._load_index() index.delete_ref_doc(doc_id, delete_from_docstore=True) self.storage_context.persist(persist_dir=PERSIST_DIR) def update_document(self, document: Document): + """Updates an existing document in the FAISS vector store.""" index = self._load_index() llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) index.update_ref_doc(llama_doc) self.storage_context.persist(persist_dir=PERSIST_DIR) def get_document(self, doc_id: str): + """Retrieves a document by its ID.""" index = self._load_index() doc = index.docstore.get_document(doc_id) if not doc: raise ValueError(f"Document with ID {doc_id} not found.") return doc + def refresh_documents(self, documents: List[Document]) -> List[bool]: + """Updates existing documents and inserts new documents in the vector store.""" + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] + refresh_results = self.index.refresh_ref_docs(llama_docs) + self._persist() + # Returns a list of booleans indicating whether each document was successfully refreshed. + return refresh_results + + def list_documents(self) -> Dict[str, Document]: + """Lists all documents in the vector store.""" + index = self._load_index() + return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) + for doc_id, doc in index.docstore.docs.items()} + + def document_exists(self, doc_id: str) -> bool: + """Checks if a document exists in the vector store.""" + index = self._load_index() + return doc_id in index.docstore.docs + def _load_index(self): + """Loads the existing FAISS index from disk.""" vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR) storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR) return VectorStoreIndex.from_storage(storage_context) + + def _persist(self): + """Saves the existing FAISS index to disk.""" + self.storage_context.persist(persist_dir=PERSIST_DIR) From a28a8d5ec0743dd3e466e7481e25facd39437693 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 20 Sep 2024 11:07:24 -0500 Subject: [PATCH 04/42] fix: Use local index object Signed-off-by: ishaansehgal99 --- .../rag_service/vector_store/faiss_store.py | 45 ++++++++++++------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index b7c274345..a75ce00b7 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -19,53 +19,62 @@ def __init__(self, dimension: int, embed_model): self.faiss_index = faiss.IndexFlatL2(self.dimension) self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + self.index = None # Use to store the in-memory index if not os.path.exists(PERSIST_DIR): os.makedirs(PERSIST_DIR) def index_documents(self, documents: List[Document]): - """Indexes new documents.""" + """Recreates the entire FAISS index and vector store with new documents.""" llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) - self.storage_context.persist(persist_dir=PERSIST_DIR) + self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) + self._persist() + # Return the document IDs that were indexed return [doc.doc_id for doc in documents] def add_document(self, document: Document): """Inserts a single document into the existing FAISS index.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - index = self._load_index() - index.insert(llama_doc) + self.index.insert(llama_doc) self.storage_context.persist(persist_dir=PERSIST_DIR) def query(self, query: str, top_k: int): """Queries the FAISS vector store.""" - index = self._load_index() - query_engine = index.as_query_engine(top_k=top_k) + if self.index is None: + self.index = self._load_index() # Load if not already in memory + query_engine = self.index.as_query_engine(top_k=top_k) return query_engine.query(query) def delete_document(self, doc_id: str): """Deletes a document from the FAISS vector store.""" - index = self._load_index() - index.delete_ref_doc(doc_id, delete_from_docstore=True) + if self.index is None: + self.index = self._load_index() # Load if not already in memory + self.index.delete_ref_doc(doc_id, delete_from_docstore=True) self.storage_context.persist(persist_dir=PERSIST_DIR) def update_document(self, document: Document): """Updates an existing document in the FAISS vector store.""" - index = self._load_index() + if self.index is None: + self.index = self._load_index() # Load if not already in memory llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - index.update_ref_doc(llama_doc) + self.index.update_ref_doc(llama_doc) self.storage_context.persist(persist_dir=PERSIST_DIR) def get_document(self, doc_id: str): """Retrieves a document by its ID.""" - index = self._load_index() - doc = index.docstore.get_document(doc_id) + if self.index is None: + self.index = self._load_index() # Load if not already in memory + doc = self.index.docstore.get_document(doc_id) if not doc: raise ValueError(f"Document with ID {doc_id} not found.") return doc def refresh_documents(self, documents: List[Document]) -> List[bool]: """Updates existing documents and inserts new documents in the vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] refresh_results = self.index.refresh_ref_docs(llama_docs) self._persist() @@ -74,14 +83,16 @@ def refresh_documents(self, documents: List[Document]) -> List[bool]: def list_documents(self) -> Dict[str, Document]: """Lists all documents in the vector store.""" - index = self._load_index() + if self.index is None: + self.index = self._load_index() # Load if not already in memory return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) - for doc_id, doc in index.docstore.docs.items()} + for doc_id, doc in self.index.docstore.docs.items()} def document_exists(self, doc_id: str) -> bool: """Checks if a document exists in the vector store.""" - index = self._load_index() - return doc_id in index.docstore.docs + if self.index is None: + self.index = self._load_index() # Load if not already in memory + return doc_id in self.index.docstore.docs def _load_index(self): """Loads the existing FAISS index from disk.""" From 63ef83d8d925286efa978bc62687165436d4811f Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 20 Sep 2024 11:11:04 -0500 Subject: [PATCH 05/42] fix: Load Index Signed-off-by: ishaansehgal99 --- presets/rag_service/vector_store/faiss_store.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index a75ce00b7..ef6e5483d 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -3,7 +3,9 @@ import faiss from llama_index.core import Document as LlamaDocument -from llama_index.core import StorageContext, VectorStoreIndex +from llama_index.core import (StorageContext, VectorStoreIndex, + load_graph_from_storage, load_index_from_storage, + load_indices_from_storage) from llama_index.vector_stores.faiss import FaissVectorStore from models import Document @@ -97,8 +99,10 @@ def document_exists(self, doc_id: str) -> bool: def _load_index(self): """Loads the existing FAISS index from disk.""" vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR) - storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR) - return VectorStoreIndex.from_storage(storage_context) + storage_context = StorageContext.from_defaults( + vector_store=vector_store, persist_dir=PERSIST_DIR + ) + return load_index_from_storage(storage_context=storage_context) def _persist(self): """Saves the existing FAISS index to disk.""" From ff03456f422652ec4cf6caabb38c33d17e830db0 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 23 Sep 2024 19:07:36 -0700 Subject: [PATCH 06/42] fix: Add ChromaDB VectorStore Signed-off-by: ishaansehgal99 --- .../vector_store/chromadb_playground.py | 62 ++++++++++ .../vector_store/chromadb_store.py | 110 ++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 presets/rag_service/vector_store/chromadb_playground.py create mode 100644 presets/rag_service/vector_store/chromadb_store.py diff --git a/presets/rag_service/vector_store/chromadb_playground.py b/presets/rag_service/vector_store/chromadb_playground.py new file mode 100644 index 000000000..31a5af077 --- /dev/null +++ b/presets/rag_service/vector_store/chromadb_playground.py @@ -0,0 +1,62 @@ +from llama_index.core import Settings +from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI + +remote_llm_api = HuggingFaceInferenceAPI( + model_name="HuggingFaceH4/zephyr-7b-alpha" +) + +Settings.llm = remote_llm_api + +import logging + +import chromadb +from IPython.display import Markdown, display +from llama_index.core import (SimpleDirectoryReader, StorageContext, + VectorStoreIndex) +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.vector_stores.chroma import ChromaVectorStore + +# Enable DEBUG logging for ChromaDB +logging.basicConfig(level=logging.DEBUG) + +# create ChromaDB client and a new collection +chroma_client = chromadb.EphemeralClient() +chroma_collection = chroma_client.create_collection("quickstart") + +# define embedding function +embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5") + +# load documents from directory +documents = SimpleDirectoryReader("./data/paul_graham/").load_data() + +# set up ChromaVectorStore and load in data +vector_store = ChromaVectorStore(chroma_collection=chroma_collection) +storage_context = StorageContext.from_defaults(vector_store=vector_store) +index = VectorStoreIndex.from_documents( + documents, storage_context=storage_context, embed_model=embed_model +) + +# Log collection contents before querying +logging.debug("Documents in ChromaDB collection before querying:") +all_documents = chroma_collection.get(include=["documents"]) +logging.debug(all_documents["documents"]) + +# Query Data +query_engine = index.as_query_engine() +response = query_engine.query("What did the author do growing up?") +display(Markdown(f"{response}")) + +# Log collection contents after querying +logging.debug("Documents in ChromaDB collection after querying:") +all_documents_after_query = chroma_collection.get(include=["documents"]) +logging.debug(all_documents_after_query["documents"]) + +# Log embeddings stored in ChromaDB +logging.debug("Embeddings stored in ChromaDB:") +all_embeddings = chroma_collection.get(include=["embeddings"]) +logging.debug(all_embeddings["embeddings"]) + +# Log metadata stored in ChromaDB +logging.debug("Metadata stored in ChromaDB:") +all_metadata = chroma_collection.get(include=["metadatas"]) +logging.debug(all_metadata["metadatas"]) diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py new file mode 100644 index 000000000..acb940747 --- /dev/null +++ b/presets/rag_service/vector_store/chromadb_store.py @@ -0,0 +1,110 @@ +import os +from typing import Dict, List + +import chromadb +from llama_index.core import Document as LlamaDocument +from llama_index.core import (StorageContext, VectorStoreIndex, + load_index_from_storage) +from llama_index.vector_stores.chroma import ChromaVectorStore +from models import Document + +from config import PERSIST_DIR + +from .base import BaseVectorStore + + +class ChromaDBVectorStoreManager(BaseVectorStore): + def __init__(self, embed_model): + self.embed_model = embed_model + # Initialize ChromaDB client and collection + self.chroma_client = chromadb.EphemeralClient() + self.collection_name = "quickstart" + self.chroma_collection = self.chroma_client.create_collection(self.collection_name) + self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection) + self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace) + + if not os.path.exists(PERSIST_DIR): + os.makedirs(PERSIST_DIR) + + def index_documents(self, documents: List[Document]): + """Recreates the entire ChromaDB index and vector store with new documents.""" + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] + self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) + self._persist() + # Return the document IDs that were indexed + return [doc.doc_id for doc in documents] + + def add_document(self, document: Document): + """Inserts a single document into the existing ChromaDB index.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) + self.index.insert(llama_doc) + self.storage_context.persist(persist_dir=PERSIST_DIR) + + def query(self, query: str, top_k: int): + """Queries the ChromaDB vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + query_engine = self.index.as_query_engine(top_k=top_k) + return query_engine.query(query) + + def delete_document(self, doc_id: str): + """Deletes a document from the ChromaDB vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + self.index.delete_ref_doc(doc_id, delete_from_docstore=True) + self.storage_context.persist(persist_dir=PERSIST_DIR) + + def update_document(self, document: Document): + """Updates an existing document in the ChromaDB vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) + self.index.update_ref_doc(llama_doc) + self.storage_context.persist(persist_dir=PERSIST_DIR) + + def get_document(self, doc_id: str): + """Retrieves a document by its ID from ChromaDB.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + doc = self.index.docstore.get_document(doc_id) + if not doc: + raise ValueError(f"Document with ID {doc_id} not found.") + return doc + + def refresh_documents(self, documents: List[Document]) -> List[bool]: + """Updates existing documents and inserts new documents in the vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] + refresh_results = self.index.refresh_ref_docs(llama_docs) + self._persist() + # Returns a list of booleans indicating whether each document was successfully refreshed. + return refresh_results + + def list_documents(self) -> Dict[str, Document]: + """Lists all documents in the ChromaDB vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) + for doc_id, doc in self.index.docstore.docs.items()} + + def document_exists(self, doc_id: str) -> bool: + """Checks if a document exists in the ChromaDB vector store.""" + if self.index is None: + self.index = self._load_index() # Load if not already in memory + return doc_id in self.index.docstore.docs + + def _load_index(self): + """Loads the existing ChromaDB index from disk.""" + vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection) + storage_context = StorageContext.from_defaults( + vector_store=vector_store, persist_dir=PERSIST_DIR + ) + return load_index_from_storage(storage_context=storage_context) + + def _persist(self): + """Saves the existing ChromaDB index to disk.""" + self.storage_context.persist(persist_dir=PERSIST_DIR) From d02391aa735fb0d6606930cc0f8f3ffcb3088a95 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 23 Sep 2024 19:08:30 -0700 Subject: [PATCH 07/42] fix: Add TODOs and comments Signed-off-by: ishaansehgal99 --- presets/rag_service/crud/operations.py | 2 +- presets/rag_service/main.py | 2 +- presets/rag_service/vector_store/faiss_store.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py index e5c670e0e..5218e4508 100644 --- a/presets/rag_service/crud/operations.py +++ b/presets/rag_service/crud/operations.py @@ -42,5 +42,5 @@ def list_all(self) -> Dict[str, Document]: return self.vector_store.list_documents() def refresh(self, documents: List[Document]) -> List[bool]: - """Dummy method for refresh, if needed.""" + """Refresh Documents.""" return self.vector_store.refresh_documents(documents) diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py index 953926cdf..80f6da87f 100644 --- a/presets/rag_service/main.py +++ b/presets/rag_service/main.py @@ -29,7 +29,7 @@ async def index_documents(request: IndexRequest): raise HTTPException(status_code=500, detail=str(e)) @app.post("/query") -async def query_index(request: QueryRequest): +async def query_index(request: QueryRequest): # TODO: Research async/sync what to use (inference is calling) try: response = rag_ops.read(request.query, request.top_k) return {"response": str(response)} diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index ef6e5483d..df44e6c8f 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -16,12 +16,12 @@ class FaissVectorStoreManager(BaseVectorStore): def __init__(self, dimension: int, embed_model): - self.dimension = dimension + self.dimension = dimension # TODO: Automatically needs to configure dim based on embed_model self.embed_model = embed_model self.faiss_index = faiss.IndexFlatL2(self.dimension) self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - self.index = None # Use to store the in-memory index + self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace) if not os.path.exists(PERSIST_DIR): os.makedirs(PERSIST_DIR) From d82897d8419ed3e9f2fcf9fdafc45867030a7fcf Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 24 Sep 2024 11:44:18 -0700 Subject: [PATCH 08/42] fix: Add function for getting embedding dim Signed-off-by: ishaansehgal99 --- presets/rag_service/embedding/base.py | 6 ++++++ presets/rag_service/embedding/huggingface_local.py | 9 +++++++++ presets/rag_service/embedding/huggingface_remote.py | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/presets/rag_service/embedding/base.py b/presets/rag_service/embedding/base.py index ba5a8573e..a1c371937 100644 --- a/presets/rag_service/embedding/base.py +++ b/presets/rag_service/embedding/base.py @@ -4,4 +4,10 @@ class BaseEmbeddingModel(ABC): @abstractmethod def get_text_embedding(self, text: str): + """Returns the text embedding for a given input string.""" + pass + + @abstractmethod + def get_embedding_dimension(self) -> int: + """Returns the embedding dimension for the model.""" pass \ No newline at end of file diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py index be380a8d5..a18798a2c 100644 --- a/presets/rag_service/embedding/huggingface_local.py +++ b/presets/rag_service/embedding/huggingface_local.py @@ -8,4 +8,13 @@ def __init__(self, model_name: str): self.model = HuggingFaceEmbedding(model_name=model_name) def get_text_embedding(self, text: str): + """Returns the text embedding for a given input string.""" return self.model.get_text_embedding(text) + + def get_embedding_dimension(self) -> int: + """Infers the embedding dimension by making a local call to get the embedding of a dummy text.""" + dummy_input = "This is a dummy sentence." + embedding = self.get_text_embedding(dummy_input) + + # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size) + return len(embedding) \ No newline at end of file diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py index c3314ccb6..341a1d03b 100644 --- a/presets/rag_service/embedding/huggingface_remote.py +++ b/presets/rag_service/embedding/huggingface_remote.py @@ -9,4 +9,13 @@ def __init__(self, model_name: str, api_key: str): self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key) def get_text_embedding(self, text: str): + """Returns the text embedding for a given input string.""" return self.model.get_text_embedding(text) + + def get_embedding_dimension(self) -> int: + """Infers the embedding dimension by making a remote call to get the embedding of a dummy text.""" + dummy_input = "This is a dummy sentence." + embedding = self.get_text_embedding(dummy_input) + + # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size) + return len(embedding) From cd9cbab69800322ce5fe3f917b4e86027b6a04d7 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 24 Sep 2024 11:46:04 -0700 Subject: [PATCH 09/42] fix: Updates faiss store to handle multiple indices and dynamically get embedding dim Signed-off-by: ishaansehgal99 --- presets/rag_service/vector_store/base.py | 22 ++-- .../vector_store/chromadb_store.py | 9 +- .../rag_service/vector_store/faiss_store.py | 119 ++++++++++-------- .../{ => playground}/chromadb_playground.py | 0 4 files changed, 84 insertions(+), 66 deletions(-) rename presets/rag_service/vector_store/{ => playground}/chromadb_playground.py (100%) diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py index b791bb7e6..d9b92315c 100644 --- a/presets/rag_service/vector_store/base.py +++ b/presets/rag_service/vector_store/base.py @@ -6,45 +6,45 @@ class BaseVectorStore(ABC): @abstractmethod - def index_documents(self, documents: List[Document]) -> List[str]: + def index_documents(self, documents: List[Document], index_name: str) -> List[str]: pass @abstractmethod - def query(self, query: str, top_k: int): + def query(self, query: str, top_k: int, index_name: str): pass @abstractmethod - def add_document(self, document: Document): + def add_document(self, document: Document, index_name: str): pass @abstractmethod - def delete_document(self, doc_id: str): + def delete_document(self, doc_id: str, index_name: str): pass @abstractmethod - def update_document(self, document: Document) -> str: + def update_document(self, document: Document, index_name: str) -> str: pass @abstractmethod - def get_document(self, doc_id: str) -> Document: + def get_document(self, doc_id: str, index_name: str) -> Document: pass @abstractmethod - def list_documents(self) -> Dict[str, Document]: + def list_documents(self, index_name: str) -> Dict[str, Document]: pass @abstractmethod - def document_exists(self, doc_id: str) -> bool: + def document_exists(self, doc_id: str, index_name: str) -> bool: pass @abstractmethod - def refresh_documents(self, documents: List[Document]) -> List[bool]: + def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: pass @abstractmethod - def list_documents(self) -> Dict[str, Document]: + def list_documents(self, index_name: str) -> Dict[str, Document]: pass @abstractmethod - def document_exists(self, doc_id: str) -> bool: + def document_exists(self, doc_id: str, index_name: str) -> bool: pass \ No newline at end of file diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py index acb940747..927318202 100644 --- a/presets/rag_service/vector_store/chromadb_store.py +++ b/presets/rag_service/vector_store/chromadb_store.py @@ -22,15 +22,16 @@ def __init__(self, embed_model): self.chroma_collection = self.chroma_client.create_collection(self.collection_name) self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection) self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace) + self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index) if not os.path.exists(PERSIST_DIR): os.makedirs(PERSIST_DIR) - def index_documents(self, documents: List[Document]): - """Recreates the entire ChromaDB index and vector store with new documents.""" + def index_documents(self, documents: List[Document], index_name: str): + """Recreates the entire FAISS index and vector store with new documents.""" llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) + # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above + self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) self._persist() # Return the document IDs that were indexed return [doc.doc_id for doc in documents] diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index df44e6c8f..e33b3904a 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -15,95 +15,112 @@ class FaissVectorStoreManager(BaseVectorStore): - def __init__(self, dimension: int, embed_model): - self.dimension = dimension # TODO: Automatically needs to configure dim based on embed_model + def __init__(self, embed_model): self.embed_model = embed_model - self.faiss_index = faiss.IndexFlatL2(self.dimension) - self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) - self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace) + self.dimension = self.embed_model.get_embedding_dimension() + # TODO: Consider allowing user custom indexing method e.g. + """ + # Choose the FAISS index type based on the provided index_method + if index_method == 'FlatL2': + faiss_index = faiss.IndexFlatL2(self.dimension) # L2 (Euclidean distance) index + elif index_method == 'FlatIP': + faiss_index = faiss.IndexFlatIP(self.dimension) # Inner product (cosine similarity) index + elif index_method == 'IVFFlat': + quantizer = faiss.IndexFlatL2(self.dimension) # Quantizer for IVF + faiss_index = faiss.IndexIVFFlat(quantizer, self.dimension, 100) # IVF with flat quantization + elif index_method == 'HNSW': + faiss_index = faiss.IndexHNSWFlat(self.dimension, 32) # HNSW index with 32 neighbors + else: + raise ValueError(f"Unknown index method: {index_method}") + """ + # TODO: We need to test if sharing storage_context is viable/correct or if we should make a new one for each index + self.faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) + self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings + self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) # Used to persist the vector store and its underlying data across sessions + self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index) if not os.path.exists(PERSIST_DIR): os.makedirs(PERSIST_DIR) - def index_documents(self, documents: List[Document]): + def index_documents(self, documents: List[Document], index_name: str): """Recreates the entire FAISS index and vector store with new documents.""" + if index_name in self.indices: + print(f"Index {index_name} already exists. Overwriting.") llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) - self._persist() + # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above + self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) + self._persist(index_name) # Return the document IDs that were indexed return [doc.doc_id for doc in documents] - def add_document(self, document: Document): + def add_document(self, document: Document, index_name: str): """Inserts a single document into the existing FAISS index.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory + assert index_name in self.indices, f"No such index: '{index_name}' exists." llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.index.insert(llama_doc) - self.storage_context.persist(persist_dir=PERSIST_DIR) + self.indices[index_name].insert(llama_doc) + self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) - def query(self, query: str, top_k: int): + def query(self, query: str, top_k: int, index_name: str): """Queries the FAISS vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - query_engine = self.index.as_query_engine(top_k=top_k) + assert index_name in self.indices, f"No such index: '{index_name}' exists." + query_engine = self.indices[index_name].as_query_engine(top_k=top_k) return query_engine.query(query) - def delete_document(self, doc_id: str): + def delete_document(self, doc_id: str, index_name: str): """Deletes a document from the FAISS vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - self.index.delete_ref_doc(doc_id, delete_from_docstore=True) - self.storage_context.persist(persist_dir=PERSIST_DIR) + assert index_name in self.indices, f"No such index: '{index_name}' exists." + self.indices[index_name].delete_ref_doc(doc_id, delete_from_docstore=True) + self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) - def update_document(self, document: Document): + def update_document(self, document: Document, index_name: str): """Updates an existing document in the FAISS vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory + assert index_name in self.indices, f"No such index: '{index_name}' exists." llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.index.update_ref_doc(llama_doc) - self.storage_context.persist(persist_dir=PERSIST_DIR) + self.indices[index_name].update_ref_doc(llama_doc) + self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) - def get_document(self, doc_id: str): + def get_document(self, doc_id: str, index_name: str): """Retrieves a document by its ID.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - doc = self.index.docstore.get_document(doc_id) + assert index_name in self.indices, f"No such index: '{index_name}' exists." + doc = self.indices[index_name].docstore.get_document(doc_id) if not doc: raise ValueError(f"Document with ID {doc_id} not found.") return doc - def refresh_documents(self, documents: List[Document]) -> List[bool]: + def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: """Updates existing documents and inserts new documents in the vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory + assert index_name in self.indices, f"No such index: '{index_name}' exists." llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - refresh_results = self.index.refresh_ref_docs(llama_docs) - self._persist() + refresh_results = self.indices[index_name].refresh_ref_docs(llama_docs) + self._persist(index_name) # Returns a list of booleans indicating whether each document was successfully refreshed. return refresh_results - def list_documents(self) -> Dict[str, Document]: + def list_documents(self, index_name: str) -> Dict[str, Document]: """Lists all documents in the vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory + assert index_name in self.indices, f"No such index: '{index_name}' exists." return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) - for doc_id, doc in self.index.docstore.docs.items()} + for doc_id, doc in self.indices[index_name].docstore.docs.items()} - def document_exists(self, doc_id: str) -> bool: + def document_exists(self, doc_id: str, index_name: str) -> bool: """Checks if a document exists in the vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - return doc_id in self.index.docstore.docs + assert index_name in self.indices, f"No such index: '{index_name}' exists." + return doc_id in self.indices[index_name].docstore.docs - def _load_index(self): + def _load_index(self, index_name: str): """Loads the existing FAISS index from disk.""" - vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR) + persist_dir = os.path.join(PERSIST_DIR, index_name) + if not os.path.exists(persist_dir): + raise ValueError(f"No persisted index found for '{index_name}'") + vector_store = FaissVectorStore.from_persist_dir(persist_dir) storage_context = StorageContext.from_defaults( - vector_store=vector_store, persist_dir=PERSIST_DIR + vector_store=vector_store, persist_dir=persist_dir ) - return load_index_from_storage(storage_context=storage_context) + self.indices[index_name] = load_index_from_storage(storage_context=storage_context) + return self.indices[index_name] - def _persist(self): + def _persist(self, index_name: str): """Saves the existing FAISS index to disk.""" - self.storage_context.persist(persist_dir=PERSIST_DIR) + assert index_name in self.indices, f"No such index: '{index_name}' exists." + storage_context = self.indices[index_name].storage_context + storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name)) diff --git a/presets/rag_service/vector_store/chromadb_playground.py b/presets/rag_service/vector_store/playground/chromadb_playground.py similarity index 100% rename from presets/rag_service/vector_store/chromadb_playground.py rename to presets/rag_service/vector_store/playground/chromadb_playground.py From 33669fcbde2613ace53b05f681e7fa7788977cdc Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 27 Sep 2024 16:06:08 -0700 Subject: [PATCH 10/42] feat: Add requirements Signed-off-by: ishaansehgal99 --- presets/rag_service/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 presets/rag_service/requirements.txt diff --git a/presets/rag_service/requirements.txt b/presets/rag_service/requirements.txt new file mode 100644 index 000000000..bd210b6c8 --- /dev/null +++ b/presets/rag_service/requirements.txt @@ -0,0 +1,6 @@ +llama-index +llama-index-embeddings-huggingface +fastapi +faiss-cpu +llama-index-vector-stores-faiss +uvicorn From 7165ccf50dbc6842edd26ccac282406643e66888 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 27 Sep 2024 16:06:43 -0700 Subject: [PATCH 11/42] feat: fix typos, syntax errors and bugs --- presets/rag_service/embedding/huggingface_remote.py | 2 +- presets/rag_service/main.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py index 341a1d03b..f45e08c2c 100644 --- a/presets/rag_service/embedding/huggingface_remote.py +++ b/presets/rag_service/embedding/huggingface_remote.py @@ -6,7 +6,7 @@ class RemoteHuggingFaceEmbedding(BaseEmbeddingModel): def __init__(self, model_name: str, api_key: str): - self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key) + self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, token=api_key) def get_text_embedding(self, text: str): """Returns the text embedding for a given input string.""" diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py index 80f6da87f..97fed9151 100644 --- a/presets/rag_service/main.py +++ b/presets/rag_service/main.py @@ -1,7 +1,7 @@ from typing import Dict, List from crud.operations import RAGOperations -from embedding import get_embedding_model +from embedding.huggingface_local import LocalHuggingFaceEmbedding from fastapi import FastAPI, HTTPException from models import (DocumentResponse, IndexRequest, ListDocumentsResponse, QueryRequest, RefreshRequest, UpdateRequest) @@ -12,10 +12,10 @@ app = FastAPI() # Initialize embedding model -embed_model = get_embedding_model(EMBEDDING_TYPE, MODEL_ID, ACCESS_SECRET) +embed_model = LocalHuggingFaceEmbedding(MODEL_ID) # Initialize vector store -vector_store = FaissVectorStoreManager(dimension=384, embed_model=embed_model) +vector_store = FaissVectorStoreManager(embed_model=embed_model) # Initialize RAG operations rag_ops = RAGOperations(vector_store) From 7f399399d525df7af67b2fb371e3fb2b3063bd8b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Fri, 27 Sep 2024 17:59:51 -0700 Subject: [PATCH 12/42] fix: Bugs fixed for managing embeddings Signed-off-by: ishaansehgal99 --- .../embedding/huggingface_local.py | 1 - .../embedding/huggingface_remote.py | 1 - presets/rag_service/tests/__init__.py | 0 presets/rag_service/tests/conftest.py | 5 ++ presets/rag_service/tests/test_faiss_store.py | 64 +++++++++++++++++++ .../rag_service/vector_store/faiss_store.py | 7 +- 6 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 presets/rag_service/tests/__init__.py create mode 100644 presets/rag_service/tests/conftest.py create mode 100644 presets/rag_service/tests/test_faiss_store.py diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py index a18798a2c..cf58c7a3e 100644 --- a/presets/rag_service/embedding/huggingface_local.py +++ b/presets/rag_service/embedding/huggingface_local.py @@ -16,5 +16,4 @@ def get_embedding_dimension(self) -> int: dummy_input = "This is a dummy sentence." embedding = self.get_text_embedding(dummy_input) - # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size) return len(embedding) \ No newline at end of file diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py index f45e08c2c..0f8e79181 100644 --- a/presets/rag_service/embedding/huggingface_remote.py +++ b/presets/rag_service/embedding/huggingface_remote.py @@ -17,5 +17,4 @@ def get_embedding_dimension(self) -> int: dummy_input = "This is a dummy sentence." embedding = self.get_text_embedding(dummy_input) - # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size) return len(embedding) diff --git a/presets/rag_service/tests/__init__.py b/presets/rag_service/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/tests/conftest.py b/presets/rag_service/tests/conftest.py new file mode 100644 index 000000000..3c7c9c6ab --- /dev/null +++ b/presets/rag_service/tests/conftest.py @@ -0,0 +1,5 @@ +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing +os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py new file mode 100644 index 000000000..49c33273f --- /dev/null +++ b/presets/rag_service/tests/test_faiss_store.py @@ -0,0 +1,64 @@ +import os +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest +from vector_store.faiss_store import FaissVectorStoreManager +from models import Document +from embedding.huggingface_local import LocalHuggingFaceEmbedding +from config import MODEL_ID + +@pytest.fixture(scope='session') +def init_embed_manager(): + return LocalHuggingFaceEmbedding(MODEL_ID) + +@pytest.fixture +def vector_store_manager(init_embed_manager): + with TemporaryDirectory() as temp_dir: + # Mock the persistence directory + os.environ['PERSIST_DIR'] = temp_dir + yield FaissVectorStoreManager(init_embed_manager) + + +def test_index_documents(vector_store_manager): + documents = [ + Document(doc_id="1", text="First document", metadata={"type": "text"}), + Document(doc_id="2", text="Second document", metadata={"type": "text"}) + ] + + doc_ids = vector_store_manager.index_documents(documents, index_name="test_index") + + assert len(doc_ids) == 2 + assert doc_ids == ["1", "2"] + + +def test_query_documents(vector_store_manager): + # Add documents to index + documents = [ + Document(doc_id="1", text="First document", metadata={"type": "text"}), + Document(doc_id="2", text="Second document", metadata={"type": "text"}) + ] + vector_store_manager.index_documents(documents, index_name="test_index") + + # Mock query and results + query_result = vector_store_manager.query("First", top_k=1, index_name="test_index") + + assert query_result is not None + + +def test_add_and_delete_document(vector_store_manager): + document = Document(doc_id="3", text="Third document", metadata={"type": "text"}) + vector_store_manager.index_documents([document], index_name="test_index") + + # Add a document to the existing index + new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"}) + vector_store_manager.add_document(new_document, index_name="test_index") + + # Assert that the document exists + assert vector_store_manager.document_exists("4", "test_index") + + # Delete the document + vector_store_manager.delete_document("4", "test_index") + + # Assert that the document no longer exists + assert not vector_store_manager.document_exists("4", "test_index") diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index e33b3904a..b6c79292b 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -15,9 +15,10 @@ class FaissVectorStoreManager(BaseVectorStore): - def __init__(self, embed_model): - self.embed_model = embed_model - self.dimension = self.embed_model.get_embedding_dimension() + def __init__(self, embedding_manager): + self.embedding_manager = embedding_manager + self.embed_model = self.embedding_manager.model + self.dimension = self.embedding_manager.get_embedding_dimension() # TODO: Consider allowing user custom indexing method e.g. """ # Choose the FAISS index type based on the provided index_method From 1e07beb034f2e8453ef16ba03010cc380214c45f Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 30 Sep 2024 21:38:39 -0700 Subject: [PATCH 13/42] feat: Use a global SimpleIndexStore and seperate StorageContexts Signed-off-by: ishaansehgal99 --- presets/rag_service/tests/conftest.py | 1 + presets/rag_service/tests/test_faiss_store.py | 26 ++++ .../rag_service/vector_store/faiss_store.py | 117 ++++++++++++------ 3 files changed, 105 insertions(+), 39 deletions(-) diff --git a/presets/rag_service/tests/conftest.py b/presets/rag_service/tests/conftest.py index 3c7c9c6ab..afb6c4713 100644 --- a/presets/rag_service/tests/conftest.py +++ b/presets/rag_service/tests/conftest.py @@ -3,3 +3,4 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model +os.environ["MKL_NUM_THREADS"] = "1" # Force MKL to use a single thread diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py index 49c33273f..1cf63bdcf 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/presets/rag_service/tests/test_faiss_store.py @@ -15,6 +15,7 @@ def init_embed_manager(): @pytest.fixture def vector_store_manager(init_embed_manager): with TemporaryDirectory() as temp_dir: + print(f"Saving Temporary Test Storage at: {temp_dir}") # Mock the persistence directory os.environ['PERSIST_DIR'] = temp_dir yield FaissVectorStoreManager(init_embed_manager) @@ -31,6 +32,31 @@ def test_index_documents(vector_store_manager): assert len(doc_ids) == 2 assert doc_ids == ["1", "2"] +def test_index_documents_isolation(vector_store_manager): + doc_1_id, doc_2_id = "1", "2" + documents1 = [ + Document(doc_id=doc_1_id, text="First document in index1", metadata={"type": "text"}), + ] + documents2 = [ + Document(doc_id=doc_2_id, text="First document in index2", metadata={"type": "text"}), + ] + + # Index documents in separate indices + index_name_1, index_name_2 = "index1", "index2" + vector_store_manager.index_documents(documents1, index_name=index_name_1) + vector_store_manager.index_documents(documents2, index_name=index_name_2) + + # Ensure documents are correctly persisted and separated by index + doc_1 = vector_store_manager.get_document(doc_1_id, index_name=index_name_1) + assert doc_1 and doc_1.node_ids # Ensure documents were created + + doc_2 = vector_store_manager.get_document(doc_2_id, index_name=index_name_2) + assert doc_2 and doc_2.node_ids # Ensure documents were created + + # Ensure that the documents do not mix between indices + assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}" + assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}" + def test_query_documents(vector_store_manager): # Add documents to index diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index b6c79292b..8a89977a9 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -6,7 +6,9 @@ from llama_index.core import (StorageContext, VectorStoreIndex, load_graph_from_storage, load_index_from_storage, load_indices_from_storage) +from llama_index.core.storage.index_store import SimpleIndexStore from llama_index.vector_stores.faiss import FaissVectorStore +from llama_index.core.data_structs.data_structs import IndexStruct from models import Document from config import PERSIST_DIR @@ -34,79 +36,113 @@ def __init__(self, embedding_manager): else: raise ValueError(f"Unknown index method: {index_method}") """ - # TODO: We need to test if sharing storage_context is viable/correct or if we should make a new one for each index - self.faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) - self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings - self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) # Used to persist the vector store and its underlying data across sessions - self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index) - - if not os.path.exists(PERSIST_DIR): - os.makedirs(PERSIST_DIR) + self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index) + self.index_store = SimpleIndexStore() # Use to store global index metadata def index_documents(self, documents: List[Document], index_name: str): """Recreates the entire FAISS index and vector store with new documents.""" - if index_name in self.indices: + if index_name in self.index_map: + del self.index_map[index_name] + self.index_store.delete_index_struct(self.index_map[index_name]) print(f"Index {index_name} already exists. Overwriting.") + + faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) + vector_store = FaissVectorStore(faiss_index=faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings + storage_context = StorageContext.from_defaults(vector_store=vector_store) # Used to persist the vector store and its underlying data across sessions + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above - self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) - self._persist(index_name) + index = VectorStoreIndex.from_documents( + llama_docs, + storage_context=storage_context, + embed_model=self.embed_model, + use_async=True # Indexing Process Performed Async + ) + index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154 + self.index_map[index_name] = index + self.index_store.add_index_struct(index.index_struct) + self._persist(index_name) # TODO: Consider just persisting the index as opposed to shared index_store # Return the document IDs that were indexed return [doc.doc_id for doc in documents] def add_document(self, document: Document, index_name: str): """Inserts a single document into the existing FAISS index.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.indices[index_name].insert(llama_doc) - self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) + self.index_map[index_name].insert(llama_doc) + self._persist(index_name) def query(self, query: str, top_k: int, index_name: str): """Queries the FAISS vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - query_engine = self.indices[index_name].as_query_engine(top_k=top_k) + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + query_engine = self.index_map[index_name].as_query_engine(top_k=top_k) return query_engine.query(query) def delete_document(self, doc_id: str, index_name: str): """Deletes a document from the FAISS vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - self.indices[index_name].delete_ref_doc(doc_id, delete_from_docstore=True) - self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True) + self._persist(index_name) def update_document(self, document: Document, index_name: str): """Updates an existing document in the FAISS vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.indices[index_name].update_ref_doc(llama_doc) - self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR) + self.index_map[index_name].update_ref_doc(llama_doc) + self._persist(index_name) def get_document(self, doc_id: str, index_name: str): - """Retrieves a document by its ID.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - doc = self.indices[index_name].docstore.get_document(doc_id) - if not doc: - raise ValueError(f"Document with ID {doc_id} not found.") - return doc + """Retrieves a document's RefDocInfo by its ID.""" + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + + # Try to retrieve the RefDocInfo associated with the doc_id + ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id) + + if ref_doc_info is None: + print(f"Document with ID {doc_id} not found in index '{index_name}'.") + return None + + return ref_doc_info + + def get_nodes_by_ref_doc_id(self, doc_id: str, index_name: str): + """Retrieve nodes associated with a given document's ref ID.""" + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + + ref_doc_info = self.get_document(doc_id, index_name) + if ref_doc_info is None: + return None + + return ref_doc_info.node_ids def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: """Updates existing documents and inserts new documents in the vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - refresh_results = self.indices[index_name].refresh_ref_docs(llama_docs) + refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs) self._persist(index_name) # Returns a list of booleans indicating whether each document was successfully refreshed. return refresh_results def list_documents(self, index_name: str) -> Dict[str, Document]: """Lists all documents in the vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) - for doc_id, doc in self.indices[index_name].docstore.docs.items()} + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + pass + # return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) + # for doc_id, doc in self.index_map[index_name].docstore.docs.items()} def document_exists(self, doc_id: str, index_name: str) -> bool: """Checks if a document exists in the vector store.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - return doc_id in self.indices[index_name].docstore.docs + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + return doc_id in self.index_map[index_name].ref_doc_info def _load_index(self, index_name: str): """Loads the existing FAISS index from disk.""" @@ -117,11 +153,14 @@ def _load_index(self, index_name: str): storage_context = StorageContext.from_defaults( vector_store=vector_store, persist_dir=persist_dir ) - self.indices[index_name] = load_index_from_storage(storage_context=storage_context) - return self.indices[index_name] + self.index_map[index_name] = load_index_from_storage(storage_context=storage_context) + return self.index_map[index_name] def _persist(self, index_name: str): """Saves the existing FAISS index to disk.""" - assert index_name in self.indices, f"No such index: '{index_name}' exists." - storage_context = self.indices[index_name].storage_context + self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store + assert index_name in self.index_map, f"No such index: '{index_name}' exists." + + # Persist each index's storage context separately + storage_context = self.index_map[index_name].storage_context storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name)) From 746c1564d3a4f134293a9cf8edbb1745ad72a835 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 30 Sep 2024 21:55:07 -0700 Subject: [PATCH 14/42] feat: Add the load and list indexing functions Signed-off-by: ishaansehgal99 --- .../rag_service/vector_store/faiss_store.py | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index 8a89977a9..a5cc0e9f6 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -7,6 +7,7 @@ load_graph_from_storage, load_index_from_storage, load_indices_from_storage) from llama_index.core.storage.index_store import SimpleIndexStore +from llama_index.core.storage.docstore import RefDocInfo from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.core.data_structs.data_structs import IndexStruct from models import Document @@ -130,13 +131,11 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[ # Returns a list of booleans indicating whether each document was successfully refreshed. return refresh_results - def list_documents(self, index_name: str) -> Dict[str, Document]: + def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]: """Lists all documents in the vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - pass - # return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) - # for doc_id, doc in self.index_map[index_name].docstore.docs.items()} + return self.index_map[index_name].ref_doc_info def document_exists(self, doc_id: str, index_name: str) -> bool: """Checks if a document exists in the vector store.""" @@ -144,16 +143,45 @@ def document_exists(self, doc_id: str, index_name: str) -> bool: raise ValueError(f"No such index: '{index_name}' exists.") return doc_id in self.index_map[index_name].ref_doc_info + def _load_index_store(self): + """Loads the global SimpleIndexStore from disk.""" + store_path = os.path.join(PERSIST_DIR, "store.json") + + if not os.path.exists(store_path): + raise ValueError("No persisted index store found.") + + # Load the global index store from the persisted JSON + self.index_store = SimpleIndexStore.from_persist_path(store_path) + def _load_index(self, index_name: str): """Loads the existing FAISS index from disk.""" + # Load the global index store if it hasn't been loaded yet + if not self.index_store or not self.index_store.index_structs(): + self._load_index_store() + + # Now load the specific index persist_dir = os.path.join(PERSIST_DIR, index_name) + if not os.path.exists(persist_dir): raise ValueError(f"No persisted index found for '{index_name}'") + + # Load the vector store from the persisted directory vector_store = FaissVectorStore.from_persist_dir(persist_dir) + + # Create a new StorageContext using the loaded vector store storage_context = StorageContext.from_defaults( - vector_store=vector_store, persist_dir=persist_dir + vector_store=vector_store, + persist_dir=persist_dir # Ensure it uses the correct directory for persistence ) - self.index_map[index_name] = load_index_from_storage(storage_context=storage_context) + + # Load the VectorStoreIndex using the storage context + loaded_index = load_index_from_storage(storage_context=storage_context) + + # Set the index_id for the loaded index to the current index_name + loaded_index.set_index_id(index_name) + + # Update the in-memory index map with the loaded index + self.index_map[index_name] = loaded_index return self.index_map[index_name] def _persist(self, index_name: str): From 3a83f26904f6b4478e17a3f0617b10ae798b64d4 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 20:09:11 -0700 Subject: [PATCH 15/42] feat: Remove chromadb from PR Signed-off-by: ishaansehgal99 --- .../vector_store/chromadb_store.py | 111 ------------------ 1 file changed, 111 deletions(-) delete mode 100644 presets/rag_service/vector_store/chromadb_store.py diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py deleted file mode 100644 index 927318202..000000000 --- a/presets/rag_service/vector_store/chromadb_store.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import Dict, List - -import chromadb -from llama_index.core import Document as LlamaDocument -from llama_index.core import (StorageContext, VectorStoreIndex, - load_index_from_storage) -from llama_index.vector_stores.chroma import ChromaVectorStore -from models import Document - -from config import PERSIST_DIR - -from .base import BaseVectorStore - - -class ChromaDBVectorStoreManager(BaseVectorStore): - def __init__(self, embed_model): - self.embed_model = embed_model - # Initialize ChromaDB client and collection - self.chroma_client = chromadb.EphemeralClient() - self.collection_name = "quickstart" - self.chroma_collection = self.chroma_client.create_collection(self.collection_name) - self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection) - self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) - self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index) - - if not os.path.exists(PERSIST_DIR): - os.makedirs(PERSIST_DIR) - - def index_documents(self, documents: List[Document], index_name: str): - """Recreates the entire FAISS index and vector store with new documents.""" - llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above - self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) - self._persist() - # Return the document IDs that were indexed - return [doc.doc_id for doc in documents] - - def add_document(self, document: Document): - """Inserts a single document into the existing ChromaDB index.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.index.insert(llama_doc) - self.storage_context.persist(persist_dir=PERSIST_DIR) - - def query(self, query: str, top_k: int): - """Queries the ChromaDB vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - query_engine = self.index.as_query_engine(top_k=top_k) - return query_engine.query(query) - - def delete_document(self, doc_id: str): - """Deletes a document from the ChromaDB vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - self.index.delete_ref_doc(doc_id, delete_from_docstore=True) - self.storage_context.persist(persist_dir=PERSIST_DIR) - - def update_document(self, document: Document): - """Updates an existing document in the ChromaDB vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.index.update_ref_doc(llama_doc) - self.storage_context.persist(persist_dir=PERSIST_DIR) - - def get_document(self, doc_id: str): - """Retrieves a document by its ID from ChromaDB.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - doc = self.index.docstore.get_document(doc_id) - if not doc: - raise ValueError(f"Document with ID {doc_id} not found.") - return doc - - def refresh_documents(self, documents: List[Document]) -> List[bool]: - """Updates existing documents and inserts new documents in the vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - refresh_results = self.index.refresh_ref_docs(llama_docs) - self._persist() - # Returns a list of booleans indicating whether each document was successfully refreshed. - return refresh_results - - def list_documents(self) -> Dict[str, Document]: - """Lists all documents in the ChromaDB vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) - for doc_id, doc in self.index.docstore.docs.items()} - - def document_exists(self, doc_id: str) -> bool: - """Checks if a document exists in the ChromaDB vector store.""" - if self.index is None: - self.index = self._load_index() # Load if not already in memory - return doc_id in self.index.docstore.docs - - def _load_index(self): - """Loads the existing ChromaDB index from disk.""" - vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection) - storage_context = StorageContext.from_defaults( - vector_store=vector_store, persist_dir=PERSIST_DIR - ) - return load_index_from_storage(storage_context=storage_context) - - def _persist(self): - """Saves the existing ChromaDB index to disk.""" - self.storage_context.persist(persist_dir=PERSIST_DIR) From cb80f3e04fa64c170363b0feab1f6756091c4525 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 20:09:55 -0700 Subject: [PATCH 16/42] feat: Add CustomLLM Inference Signed-off-by: ishaansehgal99 --- presets/rag_service/inference/__init__.py | 0 .../rag_service/inference/custom_inference.py | 43 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 presets/rag_service/inference/__init__.py create mode 100644 presets/rag_service/inference/custom_inference.py diff --git a/presets/rag_service/inference/__init__.py b/presets/rag_service/inference/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/inference/custom_inference.py b/presets/rag_service/inference/custom_inference.py new file mode 100644 index 000000000..11ed0ad25 --- /dev/null +++ b/presets/rag_service/inference/custom_inference.py @@ -0,0 +1,43 @@ +from typing import Any, Optional +from llama_index.core.llms import CustomLLM, CompletionResponse, LLMMetadata, CompletionResponseGen +from llama_index.llms.openai import OpenAI +from llama_index.core.llms.callbacks import llm_completion_callback +import requests +from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD + +class CustomInference(CustomLLM): + + @llm_completion_callback() + def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: + pass + + @llm_completion_callback() + def complete(self, prompt: str, **kwargs) -> CompletionResponse: + if "openai" in INFERENCE_URL: + return self._openai_complete(prompt, **kwargs) + else: + return self._custom_api_complete(prompt, **kwargs) + + def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: + llm = OpenAI( + api_key=INFERENCE_ACCESS_SECRET, + **kwargs # Pass all kwargs directly; kwargs may include model, temperature, max_tokens, etc. + ) + return llm.complete(prompt) + + def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: + headers = {"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} + data = {"prompt": prompt, **kwargs} + + response = requests.post(INFERENCE_URL, json=data, headers=headers) + response_data = response.json() + + # Dynamically extract the field from the response based on the specified response_field + completion_text = response_data.get(RESPONSE_FIELD, "No response field found") + + return CompletionResponse(text=completion_text) + + @property + def metadata(self) -> LLMMetadata: + """Get LLM metadata.""" + return LLMMetadata() From a0d1186ae4c857cec7300daaf15cbe04b9fd9a86 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 20:10:33 -0700 Subject: [PATCH 17/42] fix: Introduce Custom LLM class and top_k query Signed-off-by: ishaansehgal99 --- presets/rag_service/vector_store/faiss_store.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index a5cc0e9f6..f82ece9a1 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -7,10 +7,11 @@ load_graph_from_storage, load_index_from_storage, load_indices_from_storage) from llama_index.core.storage.index_store import SimpleIndexStore -from llama_index.core.storage.docstore import RefDocInfo +from llama_index.core.storage.docstore.types import RefDocInfo from llama_index.vector_stores.faiss import FaissVectorStore -from llama_index.core.data_structs.data_structs import IndexStruct + from models import Document +from inference.custom_inference import CustomInference from config import PERSIST_DIR @@ -39,6 +40,7 @@ def __init__(self, embedding_manager): """ self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index) self.index_store = SimpleIndexStore() # Use to store global index metadata + self.llm = CustomInference() def index_documents(self, documents: List[Document], index_name: str): """Recreates the entire FAISS index and vector store with new documents.""" @@ -78,7 +80,7 @@ def query(self, query: str, top_k: int, index_name: str): """Queries the FAISS vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - query_engine = self.index_map[index_name].as_query_engine(top_k=top_k) + query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k) return query_engine.query(query) def delete_document(self, doc_id: str, index_name: str): From 4c663877ac5489572c5d80f9734b253fb0aab92c Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 20:44:46 -0700 Subject: [PATCH 18/42] fix: Update tests to handle faiss delete not implemented yet Signed-off-by: ishaansehgal99 --- presets/rag_service/config.py | 5 +++ presets/rag_service/tests/test_faiss_store.py | 39 ++++++++++++++----- .../rag_service/vector_store/faiss_store.py | 15 +++++-- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py index e5086fed0..0745084f3 100644 --- a/presets/rag_service/config.py +++ b/presets/rag_service/config.py @@ -3,6 +3,11 @@ EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local") EMBEDDING_URL = os.getenv("EMBEDDING_URL") + +INFERENCE_URL = os.getenv("INFERENCE_URL", "https://api.test.com/v1") +INFERENCE_ACCESS_SECRET = os.getenv("AccessSecret") +RESPONSE_FIELD = os.getenv("RESPONSE_FIELD", "result") + MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5") VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss") INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service") diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py index 1cf63bdcf..72cb9524d 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/presets/rag_service/tests/test_faiss_store.py @@ -1,12 +1,12 @@ import os from tempfile import TemporaryDirectory -from unittest.mock import MagicMock +from unittest.mock import patch import pytest from vector_store.faiss_store import FaissVectorStoreManager from models import Document from embedding.huggingface_local import LocalHuggingFaceEmbedding -from config import MODEL_ID +from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET @pytest.fixture(scope='session') def init_embed_manager(): @@ -57,8 +57,15 @@ def test_index_documents_isolation(vector_store_manager): assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}" assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}" +@patch('requests.post') +def test_query_documents(mock_post, vector_store_manager): + # Define Mock Response for Custom Inference API + mock_response = { + "result": "This is the completion from the API" + } + + mock_post.return_value.json.return_value = mock_response -def test_query_documents(vector_store_manager): # Add documents to index documents = [ Document(doc_id="1", text="First document", metadata={"type": "text"}), @@ -68,13 +75,19 @@ def test_query_documents(vector_store_manager): # Mock query and results query_result = vector_store_manager.query("First", top_k=1, index_name="test_index") - + assert query_result is not None + assert query_result.response == "This is the completion from the API" + mock_post.assert_called_once_with( + INFERENCE_URL, + json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True}, + headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} + ) -def test_add_and_delete_document(vector_store_manager): - document = Document(doc_id="3", text="Third document", metadata={"type": "text"}) - vector_store_manager.index_documents([document], index_name="test_index") +def test_add_and_delete_document(vector_store_manager, capsys): + documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="test_index") # Add a document to the existing index new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"}) @@ -83,8 +96,14 @@ def test_add_and_delete_document(vector_store_manager): # Assert that the document exists assert vector_store_manager.document_exists("4", "test_index") - # Delete the document + # Delete the document - it should handle the NotImplementedError and not raise an exception vector_store_manager.delete_document("4", "test_index") - # Assert that the document no longer exists - assert not vector_store_manager.document_exists("4", "test_index") + # Capture the printed output (if any) + captured = capsys.readouterr() + + # Check if the expected message about NotImplementedError was printed + assert "Delete not yet implemented for Faiss index. Skipping document 4." in captured.out + + # Assert that the document still exists (since deletion wasn't implemented) + assert vector_store_manager.document_exists("4", "test_index") diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index f82ece9a1..13c925737 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -64,7 +64,7 @@ def index_documents(self, documents: List[Document], index_name: str): index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154 self.index_map[index_name] = index self.index_store.add_index_struct(index.index_struct) - self._persist(index_name) # TODO: Consider just persisting the index as opposed to shared index_store + self._persist(index_name) # Return the document IDs that were indexed return [doc.doc_id for doc in documents] @@ -87,8 +87,17 @@ def delete_document(self, doc_id: str, index_name: str): """Deletes a document from the FAISS vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True) - self._persist(index_name) + if not self.document_exists(doc_id, index_name): + print(f"Document with ID {doc_id} not found in index '{index_name}'. Skipping.") + return + try: + self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True) + except NotImplementedError as e: + print(f"Delete not yet implemented for Faiss index. Skipping document {doc_id}.") + except Exception as e: + print(f"Unable to Delete Document from the VectorStoreIndex. Skipping. Error: {e}") + finally: + self._persist(index_name) def update_document(self, document: Document, index_name: str): """Updates an existing document in the FAISS vector store.""" From 35b51133aff568df173010d2fc5317c9ba95cb5a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 21:31:43 -0700 Subject: [PATCH 19/42] fix: Update tests to handle refresh documents Signed-off-by: ishaansehgal99 --- presets/rag_service/tests/test_faiss_store.py | 69 +++++++++++++++++++ .../rag_service/vector_store/faiss_store.py | 27 ++++++-- 2 files changed, 90 insertions(+), 6 deletions(-) diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py index 72cb9524d..0963e1a94 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/presets/rag_service/tests/test_faiss_store.py @@ -107,3 +107,72 @@ def test_add_and_delete_document(vector_store_manager, capsys): # Assert that the document still exists (since deletion wasn't implemented) assert vector_store_manager.document_exists("4", "test_index") + + +def test_update_document_not_implemented(vector_store_manager, capsys): + """Test that updating a document raises a NotImplementedError and is handled properly.""" + # Add a document to the index + documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="test_index") + + # Attempt to update the existing document + updated_document = Document(doc_id="1", text="Updated first document", metadata={"type": "text"}) + vector_store_manager.update_document(updated_document, index_name="test_index") + + # Capture the printed output (if any) + captured = capsys.readouterr() + + # Check if the NotImplementedError message was printed + assert "Update is equivalent to deleting the document and then inserting it again." in captured.out + assert f"Update not yet implemented for Faiss index. Skipping document {updated_document.doc_id}." in captured.out + + # Ensure the document remains unchanged + original_doc = vector_store_manager.get_document("1", index_name="test_index") + assert original_doc is not None + + +def test_refresh_unchanged_documents(vector_store_manager, capsys): + """Test that refreshing documents does nothing on unchanged documents.""" + # Add documents to the index + documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}), + Document(doc_id="2", text="Second document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="test_index") + + refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index") + + # Capture the printed output (if any) + captured = capsys.readouterr() + assert captured.out == "" + assert refresh_results == [False, False] + +def test_refresh_new_documents(vector_store_manager): + """Test that refreshing new documents creates them.""" + vector_store_manager.index_documents([], index_name="test_index") + + # Add a document to the index + documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}), + Document(doc_id="2", text="Second document", metadata={"type": "text"})] + + refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index") + + inserted_documents = vector_store_manager.list_documents(index_name="test_index") + + assert len(inserted_documents) == len(documents) + assert inserted_documents.keys() == {"1", "2"} + assert refresh_results == [True, True] + +def test_refresh_existing_documents(vector_store_manager, capsys): + """Test that refreshing existing documents prints error.""" + original_documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})] + vector_store_manager.index_documents(original_documents, index_name="test_index") + + new_documents = [Document(doc_id="1", text="Updated document", metadata={"type": "text"}), + Document(doc_id="2", text="Second document", metadata={"type": "text"})] + + refresh_results = vector_store_manager.refresh_documents(new_documents, index_name="test_index") + + captured = capsys.readouterr() + + # Check if the NotImplementedError message was printed + assert "Refresh not yet fully implemented for index" in captured.out + assert not refresh_results diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index 13c925737..9e0922a97 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -104,8 +104,15 @@ def update_document(self, document: Document, index_name: str): if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) - self.index_map[index_name].update_ref_doc(llama_doc) - self._persist(index_name) + try: + self.index_map[index_name].update_ref_doc(llama_doc) + except NotImplementedError as e: + print("Update is equivalent to deleting the document and then inserting it again.") + print(f"Update not yet fully implemented for index. Skipping document {document.doc_id}. Error: {e}") + except Exception as e: + print(f"Unable to Update Document in the VectorStoreIndex. Skipping. Error: {e}") + finally: + self._persist(index_name) def get_document(self, doc_id: str, index_name: str): """Retrieves a document's RefDocInfo by its ID.""" @@ -136,11 +143,19 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[ """Updates existing documents and inserts new documents in the vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") + llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] - refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs) - self._persist(index_name) - # Returns a list of booleans indicating whether each document was successfully refreshed. - return refresh_results + try: + refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs) + # Returns a list of booleans indicating whether each document was successfully refreshed. + return refresh_results + except NotImplementedError as e: + print(f"Refresh is equivalent to insertion and update, which is equivalent to deletion and insertion.") + print(f"Refresh not yet fully implemented for index '{index_name}'. Error: {e}") + except Exception as e: + print(f"Unable to Refresh Documents in the VectorStoreIndex for index '{index_name}'. Error: {e}") + finally: + self._persist(index_name) def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]: """Lists all documents in the vector store.""" From 742485e662d9e85e4fca2a141976d484157b2570 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 1 Oct 2024 21:47:17 -0700 Subject: [PATCH 20/42] fix: Update tests for loading and persisting data Signed-off-by: ishaansehgal99 --- presets/rag_service/main.py | 9 ++++-- presets/rag_service/tests/test_faiss_store.py | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py index 97fed9151..49c77919e 100644 --- a/presets/rag_service/main.py +++ b/presets/rag_service/main.py @@ -2,6 +2,7 @@ from crud.operations import RAGOperations from embedding.huggingface_local import LocalHuggingFaceEmbedding +from embedding.huggingface_remote import RemoteHuggingFaceEmbedding from fastapi import FastAPI, HTTPException from models import (DocumentResponse, IndexRequest, ListDocumentsResponse, QueryRequest, RefreshRequest, UpdateRequest) @@ -12,10 +13,14 @@ app = FastAPI() # Initialize embedding model -embed_model = LocalHuggingFaceEmbedding(MODEL_ID) +if EMBEDDING_TYPE == "local": + embedding_manager = LocalHuggingFaceEmbedding(MODEL_ID) +elif EMBEDDING_TYPE == "remote": + embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID) # Initialize vector store -vector_store = FaissVectorStoreManager(embed_model=embed_model) +# TODO: Dynamically set VectorStore from EnvVars (which ultimately comes from CRD StorageSpec) +vector_store = FaissVectorStoreManager(embedding_manager) # Initialize RAG operations rag_ops = RAGOperations(vector_store) diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py index 0963e1a94..0196d5966 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/presets/rag_service/tests/test_faiss_store.py @@ -176,3 +176,34 @@ def test_refresh_existing_documents(vector_store_manager, capsys): # Check if the NotImplementedError message was printed assert "Refresh not yet fully implemented for index" in captured.out assert not refresh_results + +def test_persist_and_load_index_store(vector_store_manager): + """Test that the index store is persisted and loaded correctly.""" + # Add a document and persist the index + documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager._persist(index_name="test_index") + + # Simulate a fresh load of the index store (clearing in-memory state) + vector_store_manager.index_store = None # Clear current in-memory store + vector_store_manager._load_index_store() + + # Verify that the store was reloaded and contains the expected index structure + assert vector_store_manager.index_store is not None + assert len(vector_store_manager.index_store.index_structs()) > 0 + +# TODO: Prevent default re-indexing from load_index_from_storage +# def test_persist_and_load_index(vector_store_manager): +# """Test that an index is persisted and then loaded correctly.""" +# # Add a document and persist the index +# documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] +# vector_store_manager.index_documents(documents, index_name="test_index") +# vector_store_manager._persist(index_name="test_index") +# +# # Simulate a fresh load of the index (clearing in-memory state) +# vector_store_manager.index_map = {} # Clear current in-memory index map +# loaded_index = vector_store_manager._load_index(index_name="test_index") +# +# # Verify that the index was reloaded and contains the expected document +# assert loaded_index is not None +# assert vector_store_manager.document_exists("1", "test_index") From 51c70353dd986bf4f34057c12da3f33730fb54e3 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 2 Oct 2024 13:17:14 -0700 Subject: [PATCH 21/42] fix: Update tests for loading index Signed-off-by: ishaansehgal99 --- presets/rag_service/tests/test_faiss_store.py | 32 +++++++++++-------- .../rag_service/vector_store/faiss_store.py | 5 +-- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py index 0196d5966..3bace3f01 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/presets/rag_service/tests/test_faiss_store.py @@ -193,17 +193,21 @@ def test_persist_and_load_index_store(vector_store_manager): assert len(vector_store_manager.index_store.index_structs()) > 0 # TODO: Prevent default re-indexing from load_index_from_storage -# def test_persist_and_load_index(vector_store_manager): -# """Test that an index is persisted and then loaded correctly.""" -# # Add a document and persist the index -# documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] -# vector_store_manager.index_documents(documents, index_name="test_index") -# vector_store_manager._persist(index_name="test_index") -# -# # Simulate a fresh load of the index (clearing in-memory state) -# vector_store_manager.index_map = {} # Clear current in-memory index map -# loaded_index = vector_store_manager._load_index(index_name="test_index") -# -# # Verify that the index was reloaded and contains the expected document -# assert loaded_index is not None -# assert vector_store_manager.document_exists("1", "test_index") +def test_persist_and_load_index(vector_store_manager): + """Test that an index is persisted and then loaded correctly.""" + # Add a document and persist the index + documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="test_index") + + documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})] + vector_store_manager.index_documents(documents, index_name="another_test_index") + + vector_store_manager._persist(index_name="test_index") + + # Simulate a fresh load of the index (clearing in-memory state) + vector_store_manager.index_map = {} # Clear current in-memory index map + loaded_index = vector_store_manager._load_index(index_name="test_index") + + # Verify that the index was reloaded and contains the expected document + assert loaded_index is not None + assert vector_store_manager.document_exists("1", "test_index") diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py index 9e0922a97..4ed314454 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/presets/rag_service/vector_store/faiss_store.py @@ -197,11 +197,12 @@ def _load_index(self, index_name: str): # Create a new StorageContext using the loaded vector store storage_context = StorageContext.from_defaults( vector_store=vector_store, - persist_dir=persist_dir # Ensure it uses the correct directory for persistence + index_store = self.index_store, + persist_dir=persist_dir, # Ensure it uses the correct directory for persistence ) # Load the VectorStoreIndex using the storage context - loaded_index = load_index_from_storage(storage_context=storage_context) + loaded_index = load_index_from_storage(storage_context=storage_context, embed_model=self.embed_model) # Set the index_id for the loaded index to the current index_name loaded_index.set_index_id(index_name) From 6e7b82731827bc19a357be33bbf90682bdb96802 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 2 Oct 2024 18:57:00 -0700 Subject: [PATCH 22/42] feat: Move to ragengine folder and remove unneeded CRUD operations (refresh, update, delete) --- presets/rag_service/config.py | 15 ------ .../__init__.py => ragengine/README.md | 0 .../crud => ragengine}/__init__.py | 0 .../embedding => ragengine/crud}/__init__.py | 0 .../crud/operations.py | 15 +++--- .../embedding}/__init__.py | 0 .../embedding/base.py | 0 .../embedding/huggingface_local.py | 2 +- .../embedding/huggingface_remote.py | 0 .../tests => ragengine/inference}/__init__.py | 0 .../inference/custom_inference.py | 0 {presets/rag_service => ragengine}/main.py | 4 +- {presets/rag_service => ragengine}/models.py | 0 .../requirements.txt | 0 .../tests}/__init__.py | 0 .../tests/conftest.py | 0 .../tests/test_faiss_store.py | 1 + ragengine/vector_store/__init__.py | 0 .../vector_store/base.py | 12 +++-- .../vector_store/faiss_store.py | 51 ++++++++----------- ragengine/vector_store/playground/__init__.py | 0 .../playground/chromadb_playground.py | 0 22 files changed, 42 insertions(+), 58 deletions(-) delete mode 100644 presets/rag_service/config.py rename presets/rag_service/__init__.py => ragengine/README.md (100%) rename {presets/rag_service/crud => ragengine}/__init__.py (100%) rename {presets/rag_service/embedding => ragengine/crud}/__init__.py (100%) rename {presets/rag_service => ragengine}/crud/operations.py (85%) rename {presets/rag_service/inference => ragengine/embedding}/__init__.py (100%) rename {presets/rag_service => ragengine}/embedding/base.py (100%) rename {presets/rag_service => ragengine}/embedding/huggingface_local.py (93%) rename {presets/rag_service => ragengine}/embedding/huggingface_remote.py (100%) rename {presets/rag_service/tests => ragengine/inference}/__init__.py (100%) rename {presets/rag_service => ragengine}/inference/custom_inference.py (100%) rename {presets/rag_service => ragengine}/main.py (99%) rename {presets/rag_service => ragengine}/models.py (100%) rename {presets/rag_service => ragengine}/requirements.txt (100%) rename {presets/rag_service/vector_store => ragengine/tests}/__init__.py (100%) rename {presets/rag_service => ragengine}/tests/conftest.py (100%) rename {presets/rag_service => ragengine}/tests/test_faiss_store.py (99%) create mode 100644 ragengine/vector_store/__init__.py rename {presets/rag_service => ragengine}/vector_store/base.py (98%) rename {presets/rag_service => ragengine}/vector_store/faiss_store.py (93%) create mode 100644 ragengine/vector_store/playground/__init__.py rename {presets/rag_service => ragengine}/vector_store/playground/chromadb_playground.py (100%) diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py deleted file mode 100644 index 0745084f3..000000000 --- a/presets/rag_service/config.py +++ /dev/null @@ -1,15 +0,0 @@ -# config.py -import os - -EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local") -EMBEDDING_URL = os.getenv("EMBEDDING_URL") - -INFERENCE_URL = os.getenv("INFERENCE_URL", "https://api.test.com/v1") -INFERENCE_ACCESS_SECRET = os.getenv("AccessSecret") -RESPONSE_FIELD = os.getenv("RESPONSE_FIELD", "result") - -MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5") -VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss") -INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service") -ACCESS_SECRET = os.getenv("ACCESS_SECRET") -PERSIST_DIR = "./storage" \ No newline at end of file diff --git a/presets/rag_service/__init__.py b/ragengine/README.md similarity index 100% rename from presets/rag_service/__init__.py rename to ragengine/README.md diff --git a/presets/rag_service/crud/__init__.py b/ragengine/__init__.py similarity index 100% rename from presets/rag_service/crud/__init__.py rename to ragengine/__init__.py diff --git a/presets/rag_service/embedding/__init__.py b/ragengine/crud/__init__.py similarity index 100% rename from presets/rag_service/embedding/__init__.py rename to ragengine/crud/__init__.py diff --git a/presets/rag_service/crud/operations.py b/ragengine/crud/operations.py similarity index 85% rename from presets/rag_service/crud/operations.py rename to ragengine/crud/operations.py index 5218e4508..de45974dc 100644 --- a/presets/rag_service/crud/operations.py +++ b/ragengine/crud/operations.py @@ -4,7 +4,7 @@ from vector_store.base import BaseVectorStore -class RAGOperations: +class VectorStoreManager: def __init__(self, vector_store: BaseVectorStore): self.vector_store = vector_store @@ -16,8 +16,9 @@ def read(self, query: str, top_k: int): """Query the indexed documents.""" return self.vector_store.query(query, top_k) + """ def update(self, documents: List[Document]) -> Dict[str, List[str]]: - """Update existing documents, or insert new ones if they don’t exist.""" + # Update existing documents, or insert new ones if they don’t exist. updated_docs = [] new_docs = [] for doc in documents: @@ -30,8 +31,13 @@ def update(self, documents: List[Document]) -> Dict[str, List[str]]: return {"updated": updated_docs, "inserted": new_docs} def delete(self, doc_id: str): - """Delete a document by ID.""" + # Delete a document by ID. return self.vector_store.delete_document(doc_id) + + def refresh(self, documents: List[Document]) -> List[bool]: + # Refresh Documents. + return self.vector_store.refresh_documents(documents) + """ def get(self, doc_id: str) -> Document: """Retrieve a document by ID.""" @@ -41,6 +47,3 @@ def list_all(self) -> Dict[str, Document]: """List all documents.""" return self.vector_store.list_documents() - def refresh(self, documents: List[Document]) -> List[bool]: - """Refresh Documents.""" - return self.vector_store.refresh_documents(documents) diff --git a/presets/rag_service/inference/__init__.py b/ragengine/embedding/__init__.py similarity index 100% rename from presets/rag_service/inference/__init__.py rename to ragengine/embedding/__init__.py diff --git a/presets/rag_service/embedding/base.py b/ragengine/embedding/base.py similarity index 100% rename from presets/rag_service/embedding/base.py rename to ragengine/embedding/base.py diff --git a/presets/rag_service/embedding/huggingface_local.py b/ragengine/embedding/huggingface_local.py similarity index 93% rename from presets/rag_service/embedding/huggingface_local.py rename to ragengine/embedding/huggingface_local.py index cf58c7a3e..3dab51e9a 100644 --- a/presets/rag_service/embedding/huggingface_local.py +++ b/ragengine/embedding/huggingface_local.py @@ -5,7 +5,7 @@ class LocalHuggingFaceEmbedding(BaseEmbeddingModel): def __init__(self, model_name: str): - self.model = HuggingFaceEmbedding(model_name=model_name) + self.model = HuggingFaceEmbedding(model_name=model_name) # TODO: Ensure/test loads on GPU (when available) def get_text_embedding(self, text: str): """Returns the text embedding for a given input string.""" diff --git a/presets/rag_service/embedding/huggingface_remote.py b/ragengine/embedding/huggingface_remote.py similarity index 100% rename from presets/rag_service/embedding/huggingface_remote.py rename to ragengine/embedding/huggingface_remote.py diff --git a/presets/rag_service/tests/__init__.py b/ragengine/inference/__init__.py similarity index 100% rename from presets/rag_service/tests/__init__.py rename to ragengine/inference/__init__.py diff --git a/presets/rag_service/inference/custom_inference.py b/ragengine/inference/custom_inference.py similarity index 100% rename from presets/rag_service/inference/custom_inference.py rename to ragengine/inference/custom_inference.py diff --git a/presets/rag_service/main.py b/ragengine/main.py similarity index 99% rename from presets/rag_service/main.py rename to ragengine/main.py index 49c77919e..350aa1a57 100644 --- a/presets/rag_service/main.py +++ b/ragengine/main.py @@ -41,6 +41,7 @@ async def query_index(request: QueryRequest): # TODO: Research async/sync what t except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +""" @app.put("/update", response_model=Dict[str, List[str]]) async def update_documents(request: UpdateRequest): try: @@ -56,7 +57,7 @@ async def refresh_documents(request: RefreshRequest): return result except Exception as e: raise HTTPException(status_code=500, detail=str(e)) - + @app.delete("/document/{doc_id}") async def delete_document(doc_id: str): try: @@ -64,6 +65,7 @@ async def delete_document(doc_id: str): return {"message": "Document deleted successfully"} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +""" @app.get("/document/{doc_id}", response_model=DocumentResponse) async def get_document(doc_id: str): diff --git a/presets/rag_service/models.py b/ragengine/models.py similarity index 100% rename from presets/rag_service/models.py rename to ragengine/models.py diff --git a/presets/rag_service/requirements.txt b/ragengine/requirements.txt similarity index 100% rename from presets/rag_service/requirements.txt rename to ragengine/requirements.txt diff --git a/presets/rag_service/vector_store/__init__.py b/ragengine/tests/__init__.py similarity index 100% rename from presets/rag_service/vector_store/__init__.py rename to ragengine/tests/__init__.py diff --git a/presets/rag_service/tests/conftest.py b/ragengine/tests/conftest.py similarity index 100% rename from presets/rag_service/tests/conftest.py rename to ragengine/tests/conftest.py diff --git a/presets/rag_service/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py similarity index 99% rename from presets/rag_service/tests/test_faiss_store.py rename to ragengine/tests/test_faiss_store.py index 3bace3f01..eeaab1069 100644 --- a/presets/rag_service/tests/test_faiss_store.py +++ b/ragengine/tests/test_faiss_store.py @@ -81,6 +81,7 @@ def test_query_documents(mock_post, vector_store_manager): mock_post.assert_called_once_with( INFERENCE_URL, + # Auto-Generated by LlamaIndex json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True}, headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} ) diff --git a/ragengine/vector_store/__init__.py b/ragengine/vector_store/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/vector_store/base.py b/ragengine/vector_store/base.py similarity index 98% rename from presets/rag_service/vector_store/base.py rename to ragengine/vector_store/base.py index d9b92315c..789afe08a 100644 --- a/presets/rag_service/vector_store/base.py +++ b/ragengine/vector_store/base.py @@ -17,28 +17,30 @@ def query(self, query: str, top_k: int, index_name: str): def add_document(self, document: Document, index_name: str): pass + """ @abstractmethod def delete_document(self, doc_id: str, index_name: str): pass - + @abstractmethod def update_document(self, document: Document, index_name: str) -> str: pass @abstractmethod - def get_document(self, doc_id: str, index_name: str) -> Document: + def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: pass + """ @abstractmethod - def list_documents(self, index_name: str) -> Dict[str, Document]: + def get_document(self, doc_id: str, index_name: str) -> Document: pass @abstractmethod - def document_exists(self, doc_id: str, index_name: str) -> bool: + def list_documents(self, index_name: str) -> Dict[str, Document]: pass @abstractmethod - def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: + def document_exists(self, doc_id: str, index_name: str) -> bool: pass @abstractmethod diff --git a/presets/rag_service/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py similarity index 93% rename from presets/rag_service/vector_store/faiss_store.py rename to ragengine/vector_store/faiss_store.py index 4ed314454..63f1f535c 100644 --- a/presets/rag_service/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -23,7 +23,7 @@ def __init__(self, embedding_manager): self.embedding_manager = embedding_manager self.embed_model = self.embedding_manager.model self.dimension = self.embedding_manager.get_embedding_dimension() - # TODO: Consider allowing user custom indexing method e.g. + # TODO: Consider allowing user custom indexing method (would require configmap?) e.g. """ # Choose the FAISS index type based on the provided index_method if index_method == 'FlatL2': @@ -38,7 +38,7 @@ def __init__(self, embedding_manager): else: raise ValueError(f"Unknown index method: {index_method}") """ - self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index) + self.index_map = {} # Used to store the in-memory index via namespace (e.g. index_name -> VectorStoreIndex) self.index_store = SimpleIndexStore() # Use to store global index metadata self.llm = CustomInference() @@ -83,8 +83,23 @@ def query(self, query: str, top_k: int, index_name: str): query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k) return query_engine.query(query) + def get_document(self, doc_id: str, index_name: str): + """Retrieves a document's RefDocInfo by its ID.""" + if index_name not in self.index_map: + raise ValueError(f"No such index: '{index_name}' exists.") + + # Try to retrieve the RefDocInfo associated with the doc_id + ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id) + + if ref_doc_info is None: + print(f"Document with ID {doc_id} not found in index '{index_name}'.") + return None + + return ref_doc_info + + """ def delete_document(self, doc_id: str, index_name: str): - """Deletes a document from the FAISS vector store.""" + # Deletes a document from the FAISS vector store. if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") if not self.document_exists(doc_id, index_name): @@ -100,7 +115,7 @@ def delete_document(self, doc_id: str, index_name: str): self._persist(index_name) def update_document(self, document: Document, index_name: str): - """Updates an existing document in the FAISS vector store.""" + # Updates an existing document in the FAISS vector store. if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) @@ -114,33 +129,8 @@ def update_document(self, document: Document, index_name: str): finally: self._persist(index_name) - def get_document(self, doc_id: str, index_name: str): - """Retrieves a document's RefDocInfo by its ID.""" - if index_name not in self.index_map: - raise ValueError(f"No such index: '{index_name}' exists.") - - # Try to retrieve the RefDocInfo associated with the doc_id - ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id) - - if ref_doc_info is None: - print(f"Document with ID {doc_id} not found in index '{index_name}'.") - return None - - return ref_doc_info - - def get_nodes_by_ref_doc_id(self, doc_id: str, index_name: str): - """Retrieve nodes associated with a given document's ref ID.""" - if index_name not in self.index_map: - raise ValueError(f"No such index: '{index_name}' exists.") - - ref_doc_info = self.get_document(doc_id, index_name) - if ref_doc_info is None: - return None - - return ref_doc_info.node_ids - def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]: - """Updates existing documents and inserts new documents in the vector store.""" + # Updates existing documents and inserts new documents in the vector store. if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") @@ -156,6 +146,7 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[ print(f"Unable to Refresh Documents in the VectorStoreIndex for index '{index_name}'. Error: {e}") finally: self._persist(index_name) + """ def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]: """Lists all documents in the vector store.""" diff --git a/ragengine/vector_store/playground/__init__.py b/ragengine/vector_store/playground/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/presets/rag_service/vector_store/playground/chromadb_playground.py b/ragengine/vector_store/playground/chromadb_playground.py similarity index 100% rename from presets/rag_service/vector_store/playground/chromadb_playground.py rename to ragengine/vector_store/playground/chromadb_playground.py From aaaa21b3e695964f97e03212307ae2da83308c8c Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 2 Oct 2024 19:01:13 -0700 Subject: [PATCH 23/42] fix: Update to include rag unit tests Signed-off-by: ishaansehgal99 --- .github/workflows/tests.yml | 4 +++- Makefile | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a2c16bbc8..5015d742f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,8 +45,10 @@ jobs: - name: Run unit tests & Generate coverage run: | make unit-test + make rag-service-test + make tuning-metrics-server-test - - name: Run inference api unit tests + - name: Run inference api e2e tests run: | make inference-api-e2e diff --git a/Makefile b/Makefile index 59d09d7e3..025a0562f 100644 --- a/Makefile +++ b/Makefile @@ -96,13 +96,23 @@ unit-test: ## Run unit tests. -race -coverprofile=coverage.txt -covermode=atomic go tool cover -func=coverage.txt +.PHONY: rag-service-test +rag-service-test: + pip install -r presets/rag_service/requirements.txt + pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests + +.PHONY: tuning-metrics-server-test +tuning-metrics-server-test: + pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics + ## -------------------------------------- ## E2E tests ## -------------------------------------- -inference-api-e2e: +.PHONY: inference-api-e2e +inference-api-e2e: pip install -r presets/inference/text-generation/requirements.txt - pytest -o log_cli=true -o log_cli_level=INFO . + pytest -o log_cli=true -o log_cli_level=INFO presets/inference/text-generation/tests # Ginkgo configurations GINKGO_FOCUS ?= From be9d6ed89376999f35f8145b5348a7e7c2e6f41a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 3 Oct 2024 18:00:08 -0700 Subject: [PATCH 24/42] fix: Update persisting and loading logic Signed-off-by: ishaansehgal99 --- ragengine/tests/test_faiss_store.py | 17 +++++++--- ragengine/vector_store/faiss_store.py | 34 ++++++++++++++----- .../__init__.py | 0 .../manager.py} | 0 4 files changed, 39 insertions(+), 12 deletions(-) rename ragengine/{crud => vector_store_manager}/__init__.py (100%) rename ragengine/{crud/operations.py => vector_store_manager/manager.py} (100%) diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py index eeaab1069..5d0a2f1fd 100644 --- a/ragengine/tests/test_faiss_store.py +++ b/ragengine/tests/test_faiss_store.py @@ -3,7 +3,7 @@ from unittest.mock import patch import pytest -from vector_store.faiss_store import FaissVectorStoreManager +from vector_store.faiss_store import FaissVectorStoreHandler from models import Document from embedding.huggingface_local import LocalHuggingFaceEmbedding from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET @@ -18,7 +18,7 @@ def vector_store_manager(init_embed_manager): print(f"Saving Temporary Test Storage at: {temp_dir}") # Mock the persistence directory os.environ['PERSIST_DIR'] = temp_dir - yield FaissVectorStoreManager(init_embed_manager) + yield FaissVectorStoreHandler(init_embed_manager) def test_index_documents(vector_store_manager): @@ -203,12 +203,21 @@ def test_persist_and_load_index(vector_store_manager): documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})] vector_store_manager.index_documents(documents, index_name="another_test_index") - vector_store_manager._persist(index_name="test_index") + vector_store_manager._persist_all() # Simulate a fresh load of the index (clearing in-memory state) vector_store_manager.index_map = {} # Clear current in-memory index map - loaded_index = vector_store_manager._load_index(index_name="test_index") + loaded_indices = vector_store_manager._load_indices() # Verify that the index was reloaded and contains the expected document + assert loaded_indices is not None + assert vector_store_manager.document_exists("1", "test_index") + assert vector_store_manager.document_exists("1", "another_test_index") + + vector_store_manager.index_map = {} # Clear current in-memory index map + loaded_index = vector_store_manager._load_index(index_name="test_index") + assert loaded_index is not None assert vector_store_manager.document_exists("1", "test_index") + assert not vector_store_manager.document_exists("1", "another_test_index") # Since we didn't load this index + diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py index 63f1f535c..13a362218 100644 --- a/ragengine/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -18,7 +18,7 @@ from .base import BaseVectorStore -class FaissVectorStoreManager(BaseVectorStore): +class FaissVectorStoreHandler(BaseVectorStore): def __init__(self, embedding_manager): self.embedding_manager = embedding_manager self.embed_model = self.embedding_manager.model @@ -157,7 +157,8 @@ def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]: def document_exists(self, doc_id: str, index_name: str) -> bool: """Checks if a document exists in the vector store.""" if index_name not in self.index_map: - raise ValueError(f"No such index: '{index_name}' exists.") + print(f"No such index: '{index_name}' exists in vector store.") + return False return doc_id in self.index_map[index_name].ref_doc_info def _load_index_store(self): @@ -170,8 +171,22 @@ def _load_index_store(self): # Load the global index store from the persisted JSON self.index_store = SimpleIndexStore.from_persist_path(store_path) + def _load_indices(self): + """Loads the existing indices from disk.""" + # Load the global index store if it hasn't been loaded yet + if not self.index_store or not self.index_store.index_structs(): + self._load_index_store() + + if not os.path.exists(PERSIST_DIR): + raise ValueError(f"No persisted index found in '{PERSIST_DIR}'") + + for idx in self.index_store.index_structs(): + self._load_index(idx.index_id) + + return self.index_map + def _load_index(self, index_name: str): - """Loads the existing FAISS index from disk.""" + """Loads the existing index from disk.""" # Load the global index store if it hasn't been loaded yet if not self.index_store or not self.index_store.index_structs(): self._load_index_store() @@ -188,20 +203,21 @@ def _load_index(self, index_name: str): # Create a new StorageContext using the loaded vector store storage_context = StorageContext.from_defaults( vector_store=vector_store, - index_store = self.index_store, persist_dir=persist_dir, # Ensure it uses the correct directory for persistence ) # Load the VectorStoreIndex using the storage context loaded_index = load_index_from_storage(storage_context=storage_context, embed_model=self.embed_model) - # Set the index_id for the loaded index to the current index_name - loaded_index.set_index_id(index_name) - # Update the in-memory index map with the loaded index self.index_map[index_name] = loaded_index return self.index_map[index_name] + def _persist_all(self): + self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store + for idx in self.index_store.index_structs(): + self._persist(idx.index_id) + def _persist(self, index_name: str): """Saves the existing FAISS index to disk.""" self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store @@ -209,4 +225,6 @@ def _persist(self, index_name: str): # Persist each index's storage context separately storage_context = self.index_map[index_name].storage_context - storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name)) + storage_context.persist( + persist_dir=os.path.join(PERSIST_DIR, index_name) + ) diff --git a/ragengine/crud/__init__.py b/ragengine/vector_store_manager/__init__.py similarity index 100% rename from ragengine/crud/__init__.py rename to ragengine/vector_store_manager/__init__.py diff --git a/ragengine/crud/operations.py b/ragengine/vector_store_manager/manager.py similarity index 100% rename from ragengine/crud/operations.py rename to ragengine/vector_store_manager/manager.py From cf24953696f1eed37fa43a901b7d31ec91fb5c98 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 7 Oct 2024 16:14:37 -0700 Subject: [PATCH 25/42] feat: Custom params for llm --- ragengine/inference/custom_inference.py | 19 +++++++++++++++---- ragengine/models.py | 1 + ragengine/tests/test_faiss_store.py | 6 +++--- ragengine/vector_store/base.py | 2 +- ragengine/vector_store/faiss_store.py | 4 +++- ragengine/vector_store_manager/manager.py | 4 ++-- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py index 11ed0ad25..5e49e04b6 100644 --- a/ragengine/inference/custom_inference.py +++ b/ragengine/inference/custom_inference.py @@ -6,6 +6,13 @@ from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD class CustomInference(CustomLLM): + params: dict = {} + + def set_params(self, params: dict) -> None: + self.params = params + + def get_param(self, key, default=None): + return self.params.get(key, default) @llm_completion_callback() def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: @@ -13,10 +20,14 @@ def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: @llm_completion_callback() def complete(self, prompt: str, **kwargs) -> CompletionResponse: - if "openai" in INFERENCE_URL: - return self._openai_complete(prompt, **kwargs) - else: - return self._custom_api_complete(prompt, **kwargs) + try: + if "openai" in INFERENCE_URL: + return self._openai_complete(prompt, **kwargs, **self.params) + else: + return self._custom_api_complete(prompt, **kwargs, **self.params) + finally: + # Clear params after the completion is done + self.params = {} def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: llm = OpenAI( diff --git a/ragengine/models.py b/ragengine/models.py index a1d21537b..69f913712 100644 --- a/ragengine/models.py +++ b/ragengine/models.py @@ -14,6 +14,7 @@ class IndexRequest(BaseModel): class QueryRequest(BaseModel): query: str top_k: int = 10 + params: Optional[Dict] = None # Accept a dictionary for parameters class UpdateRequest(BaseModel): documents: List[Document] diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py index 5d0a2f1fd..650ee03cc 100644 --- a/ragengine/tests/test_faiss_store.py +++ b/ragengine/tests/test_faiss_store.py @@ -20,7 +20,6 @@ def vector_store_manager(init_embed_manager): os.environ['PERSIST_DIR'] = temp_dir yield FaissVectorStoreHandler(init_embed_manager) - def test_index_documents(vector_store_manager): documents = [ Document(doc_id="1", text="First document", metadata={"type": "text"}), @@ -73,8 +72,9 @@ def test_query_documents(mock_post, vector_store_manager): ] vector_store_manager.index_documents(documents, index_name="test_index") + params = {"temperature": 0.7} # Mock query and results - query_result = vector_store_manager.query("First", top_k=1, index_name="test_index") + query_result = vector_store_manager.query("First", top_k=1, index_name="test_index", params=params) assert query_result is not None assert query_result.response == "This is the completion from the API" @@ -82,7 +82,7 @@ def test_query_documents(mock_post, vector_store_manager): mock_post.assert_called_once_with( INFERENCE_URL, # Auto-Generated by LlamaIndex - json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True}, + json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True, 'temperature': 0.7}, headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} ) diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py index 789afe08a..74254f2e3 100644 --- a/ragengine/vector_store/base.py +++ b/ragengine/vector_store/base.py @@ -10,7 +10,7 @@ def index_documents(self, documents: List[Document], index_name: str) -> List[st pass @abstractmethod - def query(self, query: str, top_k: int, index_name: str): + def query(self, query: str, top_k: int, index_name: str, params: dict): pass @abstractmethod diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py index 13a362218..ca9f50f31 100644 --- a/ragengine/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -76,10 +76,12 @@ def add_document(self, document: Document, index_name: str): self.index_map[index_name].insert(llama_doc) self._persist(index_name) - def query(self, query: str, top_k: int, index_name: str): + def query(self, query: str, top_k: int, index_name: str, params: dict): """Queries the FAISS vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") + self.llm.set_params(params) + query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k) return query_engine.query(query) diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py index de45974dc..d6a17229b 100644 --- a/ragengine/vector_store_manager/manager.py +++ b/ragengine/vector_store_manager/manager.py @@ -12,9 +12,9 @@ def create(self, documents: List[Document]) -> List[str]: """Index new documents.""" return self.vector_store.index_documents(documents) - def read(self, query: str, top_k: int): + def read(self, query: str, top_k: int, params: dict): """Query the indexed documents.""" - return self.vector_store.query(query, top_k) + return self.vector_store.query(query, top_k, params) """ def update(self, documents: List[Document]) -> Dict[str, List[str]]: From eff5b3778a5bb835d78b6c3610ef07847f5931c0 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 14:55:30 -0700 Subject: [PATCH 26/42] feat: massive update, improvements all across service and enhanced unit testing --- ragengine/main.py | 72 ++++++--- ragengine/models.py | 17 +-- ragengine/tests/api/__init__.py | 0 ragengine/tests/{ => api}/conftest.py | 2 +- ragengine/tests/api/test_main.py | 140 ++++++++++++++++++ ragengine/tests/vector_store/__init__.py | 0 ragengine/tests/vector_store/conftest.py | 6 + .../{ => vector_store}/test_faiss_store.py | 78 +++++----- ragengine/vector_store/base.py | 21 +-- ragengine/vector_store/faiss_store.py | 38 +++-- ragengine/vector_store_manager/manager.py | 18 +-- 11 files changed, 272 insertions(+), 120 deletions(-) create mode 100644 ragengine/tests/api/__init__.py rename ragengine/tests/{ => api}/conftest.py (94%) create mode 100644 ragengine/tests/api/test_main.py create mode 100644 ragengine/tests/vector_store/__init__.py create mode 100644 ragengine/tests/vector_store/conftest.py rename ragengine/tests/{ => vector_store}/test_faiss_store.py (70%) diff --git a/ragengine/main.py b/ragengine/main.py index 350aa1a57..bcecfb966 100644 --- a/ragengine/main.py +++ b/ragengine/main.py @@ -1,42 +1,52 @@ from typing import Dict, List -from crud.operations import RAGOperations +from llama_index.core.schema import TextNode + +from vector_store_manager.manager import VectorStoreManager from embedding.huggingface_local import LocalHuggingFaceEmbedding from embedding.huggingface_remote import RemoteHuggingFaceEmbedding +from llama_index.core.storage.docstore.types import RefDocInfo from fastapi import FastAPI, HTTPException -from models import (DocumentResponse, IndexRequest, ListDocumentsResponse, - QueryRequest, RefreshRequest, UpdateRequest) -from vector_store.faiss_store import FaissVectorStoreManager +from models import (IndexRequest, ListDocumentsResponse, + QueryRequest, Document) +from vector_store.faiss_store import FaissVectorStoreHandler from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID app = FastAPI() # Initialize embedding model -if EMBEDDING_TYPE == "local": +if EMBEDDING_TYPE.lower() == "local": embedding_manager = LocalHuggingFaceEmbedding(MODEL_ID) -elif EMBEDDING_TYPE == "remote": - embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID) +elif EMBEDDING_TYPE.lower() == "remote": + embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID, ACCESS_SECRET) +else: + raise ValueError("Invalid Embedding Type Specified (Must be Local or Remote)") # Initialize vector store # TODO: Dynamically set VectorStore from EnvVars (which ultimately comes from CRD StorageSpec) -vector_store = FaissVectorStoreManager(embedding_manager) +vector_store_handler = FaissVectorStoreHandler(embedding_manager) # Initialize RAG operations -rag_ops = RAGOperations(vector_store) +rag_ops = VectorStoreManager(vector_store_handler) -@app.post("/index", response_model=List[str]) -async def index_documents(request: IndexRequest): +@app.post("/index", response_model=List[Document]) +async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling) try: - doc_ids = rag_ops.create(request.documents) - return doc_ids + doc_ids = rag_ops.create(request.index_name, request.documents) + documents = [ + Document(doc_id=doc_id, text=doc.text, metadata=doc.metadata) + for doc_id, doc in zip(doc_ids, request.documents) + ] + return documents except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.post("/query") -async def query_index(request: QueryRequest): # TODO: Research async/sync what to use (inference is calling) +@app.post("/query", response_model=Dict[str, str]) +async def query_index(request: QueryRequest): try: - response = rag_ops.read(request.query, request.top_k) + llm_params = request.llm_params or {} # Default to empty dict if no params provided + response = rag_ops.read(request.index_name, request.query, request.top_k, llm_params) return {"response": str(response)} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -67,19 +77,33 @@ async def delete_document(doc_id: str): raise HTTPException(status_code=500, detail=str(e)) """ -@app.get("/document/{doc_id}", response_model=DocumentResponse) -async def get_document(doc_id: str): +@app.get("/document/{index_name}/{doc_id}", response_model=RefDocInfo) +async def get_document(index_name: str, doc_id: str): try: - document = rag_ops.get(doc_id) - return DocumentResponse(doc_id=doc_id, document=document) + document = rag_ops.get(index_name, doc_id) + if document: + return document + else: + raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} " + f"not found in index '{index_name}'.") + except ValueError as ve: + raise HTTPException(status_code=404, detail=str(ve)) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.get("/documents", response_model=ListDocumentsResponse) -async def list_documents(): +@app.get("/indexed-documents", response_model=ListDocumentsResponse) +async def list_all_indexed_documents(): try: - documents = rag_ops.list_all() - return ListDocumentsResponse(documents=documents) + documents = rag_ops.list_all_indexed_documents() + serialized_documents = { + index_name: { + doc_name: { + "text": doc_info.text, "hash": doc_info.hash + } for doc_name, doc_info in vector_store_index.docstore.docs.items() + } + for index_name, vector_store_index in documents.items() + } + return ListDocumentsResponse(documents=serialized_documents) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) diff --git a/ragengine/models.py b/ragengine/models.py index 69f913712..c6bc8f723 100644 --- a/ragengine/models.py +++ b/ragengine/models.py @@ -2,29 +2,20 @@ from pydantic import BaseModel - class Document(BaseModel): text: str metadata: Optional[dict] = {} doc_id: Optional[str] = None class IndexRequest(BaseModel): + index_name: str documents: List[Document] class QueryRequest(BaseModel): + index_name: str query: str top_k: int = 10 - params: Optional[Dict] = None # Accept a dictionary for parameters - -class UpdateRequest(BaseModel): - documents: List[Document] - -class RefreshRequest(BaseModel): - documents: List[Document] - -class DocumentResponse(BaseModel): - doc_id: str - document: Document + llm_params: Optional[Dict] = None # Accept a dictionary for parameters class ListDocumentsResponse(BaseModel): - documents: Dict[str, Document] \ No newline at end of file + documents:Dict[str, Dict[str, Dict[str, str]]] diff --git a/ragengine/tests/api/__init__.py b/ragengine/tests/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ragengine/tests/conftest.py b/ragengine/tests/api/conftest.py similarity index 94% rename from ragengine/tests/conftest.py rename to ragengine/tests/api/conftest.py index afb6c4713..08ad12a74 100644 --- a/ragengine/tests/conftest.py +++ b/ragengine/tests/api/conftest.py @@ -1,6 +1,6 @@ import sys import os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model os.environ["MKL_NUM_THREADS"] = "1" # Force MKL to use a single thread diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py new file mode 100644 index 000000000..7a5ff3b17 --- /dev/null +++ b/ragengine/tests/api/test_main.py @@ -0,0 +1,140 @@ +import os +from tempfile import TemporaryDirectory +from unittest.mock import patch + +import pytest +from vector_store.faiss_store import FaissVectorStoreHandler +from models import Document +from embedding.huggingface_local import LocalHuggingFaceEmbedding +from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET + +from main import app, rag_ops +from fastapi.testclient import TestClient +from unittest.mock import MagicMock + +AUTO_GEN_DOC_ID_LEN = 36 + +client = TestClient(app) + +def test_index_documents_success(): + request_data = { + "index_name": "test_index", + "documents": [ + {"text": "This is a test document"}, + {"text": "Another test document"} + ] + } + + response = client.post("/index", json=request_data) + assert response.status_code == 200 + doc1, doc2 = response.json() + assert (doc1["text"] == "This is a test document") + assert len(doc1["doc_id"]) == AUTO_GEN_DOC_ID_LEN + assert not doc1["metadata"] + + assert (doc2["text"] == "Another test document") + assert len(doc2["doc_id"]) == AUTO_GEN_DOC_ID_LEN + assert not doc2["metadata"] + +@patch('requests.post') +def test_query_index_success(mock_post): + # Define Mock Response for Custom Inference API + mock_response = { + "result": "This is the completion from the API" + } + mock_post.return_value.json.return_value = mock_response + # Index + request_data = { + "index_name": "test_index", + "documents": [ + {"text": "This is a test document"}, + {"text": "Another test document"} + ] + } + + response = client.post("/index", json=request_data) + assert response.status_code == 200 + + # Query + request_data = { + "index_name": "test_index", + "query": "test query", + "top_k": 1, + "llm_params": {"temperature": 0.7} + } + + response = client.post("/query", json=request_data) + assert response.status_code == 200 + assert response.json() == {"response": "This is the completion from the API"} + assert mock_post.call_count == 1 + +def test_query_index_failure(): + # Prepare request data for querying. + request_data = { + "index_name": "non_existent_index", # Use an index name that doesn't exist + "query": "test query", + "top_k": 1, + "llm_params": {"temperature": 0.7} + } + + response = client.post("/query", json=request_data) + assert response.status_code == 500 + assert response.json()["detail"] == "No such index: 'non_existent_index' exists." + + +def test_get_document_success(): + request_data = { + "index_name": "test_index", + "documents": [ + # {"doc_id": "doc1", "text": "This is a test document"}, + {"doc_id": "doc1", "text": "This is a test document"}, + {"text": "Another test document"} + ] + } + + index_response = client.post("/index", json=request_data) + assert index_response.status_code == 200 + + # Call the GET document endpoint. + get_response = client.get("/document/test_index/doc1") + assert get_response.status_code == 200 + + response_json = get_response.json() + + assert response_json.keys() == {"node_ids", 'metadata'} + assert response_json['metadata'] == {} + + assert isinstance(response_json["node_ids"], list) and len(response_json["node_ids"]) == 1 + + +def test_get_document_failure(): + # Call the GET document endpoint. + response = client.get("/document/test_index/doc1") + assert response.status_code == 404 + +def test_list_all_indexed_documents_success(): + response = client.get("/indexed-documents") + assert response.status_code == 200 + assert response.json() == {'documents': {}} + + request_data = { + "index_name": "test_index", + "documents": [ + {"text": "This is a test document"}, + {"text": "Another test document"} + ] + } + + response = client.post("/index", json=request_data) + assert response.status_code == 200 + + response = client.get("/indexed-documents") + assert response.status_code == 200 + assert "test_index" in response.json()["documents"] + response_idx = response.json()["documents"]["test_index"] + assert len(response_idx) == 2 # Two Documents Indexed + assert ({item["text"] for item in response_idx.values()} + == {item["text"] for item in request_data["documents"]}) + + + diff --git a/ragengine/tests/vector_store/__init__.py b/ragengine/tests/vector_store/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ragengine/tests/vector_store/conftest.py b/ragengine/tests/vector_store/conftest.py new file mode 100644 index 000000000..08ad12a74 --- /dev/null +++ b/ragengine/tests/vector_store/conftest.py @@ -0,0 +1,6 @@ +import sys +import os +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing +os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model +os.environ["MKL_NUM_THREADS"] = "1" # Force MKL to use a single thread diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py similarity index 70% rename from ragengine/tests/test_faiss_store.py rename to ragengine/tests/vector_store/test_faiss_store.py index 650ee03cc..029a864b3 100644 --- a/ragengine/tests/test_faiss_store.py +++ b/ragengine/tests/vector_store/test_faiss_store.py @@ -26,7 +26,7 @@ def test_index_documents(vector_store_manager): Document(doc_id="2", text="Second document", metadata={"type": "text"}) ] - doc_ids = vector_store_manager.index_documents(documents, index_name="test_index") + doc_ids = vector_store_manager.index_documents("test_index", documents) assert len(doc_ids) == 2 assert doc_ids == ["1", "2"] @@ -42,19 +42,19 @@ def test_index_documents_isolation(vector_store_manager): # Index documents in separate indices index_name_1, index_name_2 = "index1", "index2" - vector_store_manager.index_documents(documents1, index_name=index_name_1) - vector_store_manager.index_documents(documents2, index_name=index_name_2) + vector_store_manager.index_documents(index_name_1, documents1) + vector_store_manager.index_documents(index_name_2, documents2) # Ensure documents are correctly persisted and separated by index - doc_1 = vector_store_manager.get_document(doc_1_id, index_name=index_name_1) + doc_1 = vector_store_manager.get_document(index_name_1, doc_1_id) assert doc_1 and doc_1.node_ids # Ensure documents were created - doc_2 = vector_store_manager.get_document(doc_2_id, index_name=index_name_2) + doc_2 = vector_store_manager.get_document(index_name_2, doc_2_id) assert doc_2 and doc_2.node_ids # Ensure documents were created # Ensure that the documents do not mix between indices - assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}" - assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}" + assert vector_store_manager.get_document(index_name_2, doc_1_id) is None, f"Document {doc_1_id} should not exist in {index_name_2}" + assert vector_store_manager.get_document(index_name_1, doc_2_id) is None, f"Document {doc_2_id} should not exist in {index_name_1}" @patch('requests.post') def test_query_documents(mock_post, vector_store_manager): @@ -70,11 +70,11 @@ def test_query_documents(mock_post, vector_store_manager): Document(doc_id="1", text="First document", metadata={"type": "text"}), Document(doc_id="2", text="Second document", metadata={"type": "text"}) ] - vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager.index_documents("test_index", documents) params = {"temperature": 0.7} # Mock query and results - query_result = vector_store_manager.query("First", top_k=1, index_name="test_index", params=params) + query_result = vector_store_manager.query("test_index", "First", top_k=1, params=params) assert query_result is not None assert query_result.response == "This is the completion from the API" @@ -86,19 +86,21 @@ def test_query_documents(mock_post, vector_store_manager): headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} ) +""" +Commented because Refresh, Update, and Delete functionality are commented def test_add_and_delete_document(vector_store_manager, capsys): documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager.index_documents("test_index", documents) # Add a document to the existing index new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"}) - vector_store_manager.add_document(new_document, index_name="test_index") + vector_store_manager.add_document("test_index", new_document) # Assert that the document exists - assert vector_store_manager.document_exists("4", "test_index") + assert vector_store_manager.document_exists("test_index", "4") # Delete the document - it should handle the NotImplementedError and not raise an exception - vector_store_manager.delete_document("4", "test_index") + vector_store_manager.delete_document("test_index", "4") # Capture the printed output (if any) captured = capsys.readouterr() @@ -107,18 +109,18 @@ def test_add_and_delete_document(vector_store_manager, capsys): assert "Delete not yet implemented for Faiss index. Skipping document 4." in captured.out # Assert that the document still exists (since deletion wasn't implemented) - assert vector_store_manager.document_exists("4", "test_index") + assert vector_store_manager.document_exists("test_index", "4") def test_update_document_not_implemented(vector_store_manager, capsys): - """Test that updating a document raises a NotImplementedError and is handled properly.""" + # Test that updating a document raises a NotImplementedError and is handled properly. # Add a document to the index documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager.index_documents("test_index", documents) # Attempt to update the existing document updated_document = Document(doc_id="1", text="Updated first document", metadata={"type": "text"}) - vector_store_manager.update_document(updated_document, index_name="test_index") + vector_store_manager.update_document("test_index", updated_document) # Capture the printed output (if any) captured = capsys.readouterr() @@ -128,18 +130,17 @@ def test_update_document_not_implemented(vector_store_manager, capsys): assert f"Update not yet implemented for Faiss index. Skipping document {updated_document.doc_id}." in captured.out # Ensure the document remains unchanged - original_doc = vector_store_manager.get_document("1", index_name="test_index") + original_doc = vector_store_manager.get_document("test_index", "1") assert original_doc is not None - def test_refresh_unchanged_documents(vector_store_manager, capsys): - """Test that refreshing documents does nothing on unchanged documents.""" + # Test that refreshing documents does nothing on unchanged documents. # Add documents to the index documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}), Document(doc_id="2", text="Second document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager.index_documents("test_index", documents) - refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index") + refresh_results = vector_store_manager.refresh_documents("test_index", documents) # Capture the printed output (if any) captured = capsys.readouterr() @@ -147,43 +148,44 @@ def test_refresh_unchanged_documents(vector_store_manager, capsys): assert refresh_results == [False, False] def test_refresh_new_documents(vector_store_manager): - """Test that refreshing new documents creates them.""" - vector_store_manager.index_documents([], index_name="test_index") + # Test that refreshing new documents creates them. + vector_store_manager.index_documents("test_index", []) # Add a document to the index documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}), Document(doc_id="2", text="Second document", metadata={"type": "text"})] - refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index") + refresh_results = vector_store_manager.refresh_documents("test_index", documents) - inserted_documents = vector_store_manager.list_documents(index_name="test_index") + inserted_documents = vector_store_manager.list_all_documents("test_index") assert len(inserted_documents) == len(documents) assert inserted_documents.keys() == {"1", "2"} assert refresh_results == [True, True] def test_refresh_existing_documents(vector_store_manager, capsys): - """Test that refreshing existing documents prints error.""" + # Test that refreshing existing documents prints error. original_documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})] - vector_store_manager.index_documents(original_documents, index_name="test_index") + vector_store_manager.index_documents("test_index", original_documents) new_documents = [Document(doc_id="1", text="Updated document", metadata={"type": "text"}), Document(doc_id="2", text="Second document", metadata={"type": "text"})] - refresh_results = vector_store_manager.refresh_documents(new_documents, index_name="test_index") + refresh_results = vector_store_manager.refresh_documents("test_index", new_documents) captured = capsys.readouterr() # Check if the NotImplementedError message was printed assert "Refresh not yet fully implemented for index" in captured.out assert not refresh_results +""" def test_persist_and_load_index_store(vector_store_manager): """Test that the index store is persisted and loaded correctly.""" # Add a document and persist the index documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="test_index") - vector_store_manager._persist(index_name="test_index") + vector_store_manager.index_documents("test_index", documents) + vector_store_manager._persist("test_index") # Simulate a fresh load of the index store (clearing in-memory state) vector_store_manager.index_store = None # Clear current in-memory store @@ -198,10 +200,10 @@ def test_persist_and_load_index(vector_store_manager): """Test that an index is persisted and then loaded correctly.""" # Add a document and persist the index documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="test_index") + vector_store_manager.index_documents("test_index", documents) documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})] - vector_store_manager.index_documents(documents, index_name="another_test_index") + vector_store_manager.index_documents("another_test_index", documents) vector_store_manager._persist_all() @@ -211,13 +213,13 @@ def test_persist_and_load_index(vector_store_manager): # Verify that the index was reloaded and contains the expected document assert loaded_indices is not None - assert vector_store_manager.document_exists("1", "test_index") - assert vector_store_manager.document_exists("1", "another_test_index") + assert vector_store_manager.document_exists("test_index", "1") + assert vector_store_manager.document_exists("another_test_index", "1") vector_store_manager.index_map = {} # Clear current in-memory index map - loaded_index = vector_store_manager._load_index(index_name="test_index") + loaded_index = vector_store_manager._load_index("test_index") assert loaded_index is not None - assert vector_store_manager.document_exists("1", "test_index") - assert not vector_store_manager.document_exists("1", "another_test_index") # Since we didn't load this index + assert vector_store_manager.document_exists("test_index", "1") + assert not vector_store_manager.document_exists("another_test_index", "1") # Since we didn't load this index diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py index 74254f2e3..31142d71e 100644 --- a/ragengine/vector_store/base.py +++ b/ragengine/vector_store/base.py @@ -2,19 +2,20 @@ from typing import Dict, List from models import Document +from llama_index.core import VectorStoreIndex class BaseVectorStore(ABC): @abstractmethod - def index_documents(self, documents: List[Document], index_name: str) -> List[str]: + def index_documents(self, index_name: str, documents: List[Document]) -> List[str]: pass @abstractmethod - def query(self, query: str, top_k: int, index_name: str, params: dict): + def query(self, index_name: str, query: str, top_k: int, params: dict): pass @abstractmethod - def add_document(self, document: Document, index_name: str): + def add_document(self, index_name: str, document: Document): pass """ @@ -32,21 +33,13 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[ """ @abstractmethod - def get_document(self, doc_id: str, index_name: str) -> Document: + def get_document(self, index_name: str, doc_id: str) -> Document: pass @abstractmethod - def list_documents(self, index_name: str) -> Dict[str, Document]: + def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: pass @abstractmethod - def document_exists(self, doc_id: str, index_name: str) -> bool: + def document_exists(self, index_name: str, doc_id: str) -> bool: pass - - @abstractmethod - def list_documents(self, index_name: str) -> Dict[str, Document]: - pass - - @abstractmethod - def document_exists(self, doc_id: str, index_name: str) -> bool: - pass \ No newline at end of file diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py index ca9f50f31..28dae799b 100644 --- a/ragengine/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -3,9 +3,7 @@ import faiss from llama_index.core import Document as LlamaDocument -from llama_index.core import (StorageContext, VectorStoreIndex, - load_graph_from_storage, load_index_from_storage, - load_indices_from_storage) +from llama_index.core import (StorageContext, VectorStoreIndex, load_index_from_storage) from llama_index.core.storage.index_store import SimpleIndexStore from llama_index.core.storage.docstore.types import RefDocInfo from llama_index.vector_stores.faiss import FaissVectorStore @@ -42,7 +40,7 @@ def __init__(self, embedding_manager): self.index_store = SimpleIndexStore() # Use to store global index metadata self.llm = CustomInference() - def index_documents(self, documents: List[Document], index_name: str): + def index_documents(self, index_name: str, documents: List[Document]): """Recreates the entire FAISS index and vector store with new documents.""" if index_name in self.index_map: del self.index_map[index_name] @@ -53,22 +51,27 @@ def index_documents(self, documents: List[Document], index_name: str): vector_store = FaissVectorStore(faiss_index=faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings storage_context = StorageContext.from_defaults(vector_store=vector_store) # Used to persist the vector store and its underlying data across sessions - llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents] + llama_docs = [ + LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) + if doc.doc_id is not None + else LlamaDocument(text=doc.text, metadata=doc.metadata) + for doc in documents + ] # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above index = VectorStoreIndex.from_documents( llama_docs, storage_context=storage_context, embed_model=self.embed_model, - use_async=True # Indexing Process Performed Async + # use_async=True # TODO: Indexing Process Performed Async ) index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154 self.index_map[index_name] = index self.index_store.add_index_struct(index.index_struct) self._persist(index_name) # Return the document IDs that were indexed - return [doc.doc_id for doc in documents] + return [doc.doc_id for doc in llama_docs] - def add_document(self, document: Document, index_name: str): + def add_document(self, index_name: str, document: Document): """Inserts a single document into the existing FAISS index.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") @@ -76,27 +79,22 @@ def add_document(self, document: Document, index_name: str): self.index_map[index_name].insert(llama_doc) self._persist(index_name) - def query(self, query: str, top_k: int, index_name: str, params: dict): + def query(self, index_name: str, query: str, top_k: int, llm_params: dict): """Queries the FAISS vector store.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - self.llm.set_params(params) + self.llm.set_params(llm_params) query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k) return query_engine.query(query) - def get_document(self, doc_id: str, index_name: str): + def get_document(self, index_name: str, doc_id: str) -> RefDocInfo: """Retrieves a document's RefDocInfo by its ID.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") # Try to retrieve the RefDocInfo associated with the doc_id ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id) - - if ref_doc_info is None: - print(f"Document with ID {doc_id} not found in index '{index_name}'.") - return None - return ref_doc_info """ @@ -150,13 +148,11 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[ self._persist(index_name) """ - def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]: + def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: """Lists all documents in the vector store.""" - if index_name not in self.index_map: - raise ValueError(f"No such index: '{index_name}' exists.") - return self.index_map[index_name].ref_doc_info + return self.index_map - def document_exists(self, doc_id: str, index_name: str) -> bool: + def document_exists(self, index_name: str, doc_id: str) -> bool: """Checks if a document exists in the vector store.""" if index_name not in self.index_map: print(f"No such index: '{index_name}' exists in vector store.") diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py index d6a17229b..0e47a8fe7 100644 --- a/ragengine/vector_store_manager/manager.py +++ b/ragengine/vector_store_manager/manager.py @@ -3,18 +3,19 @@ from models import Document from vector_store.base import BaseVectorStore +from llama_index.core import VectorStoreIndex class VectorStoreManager: def __init__(self, vector_store: BaseVectorStore): self.vector_store = vector_store - def create(self, documents: List[Document]) -> List[str]: + def create(self, index_name: str, documents: List[Document]) -> List[str]: """Index new documents.""" - return self.vector_store.index_documents(documents) + return self.vector_store.index_documents(index_name, documents) - def read(self, query: str, top_k: int, params: dict): + def read(self, index_name: str, query: str, top_k: int, llm_params: dict): """Query the indexed documents.""" - return self.vector_store.query(query, top_k, params) + return self.vector_store.query(index_name, query, top_k, llm_params) """ def update(self, documents: List[Document]) -> Dict[str, List[str]]: @@ -39,11 +40,10 @@ def refresh(self, documents: List[Document]) -> List[bool]: return self.vector_store.refresh_documents(documents) """ - def get(self, doc_id: str) -> Document: + def get(self, index_name: str, doc_id: str) -> Document: """Retrieve a document by ID.""" - return self.vector_store.get_document(doc_id) + return self.vector_store.get_document(index_name, doc_id) - def list_all(self) -> Dict[str, Document]: + def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: """List all documents.""" - return self.vector_store.list_documents() - + return self.vector_store.list_all_indexed_documents() From 9f52ee87e0951a56875713d4c202a0e88b792511 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 18:04:51 -0700 Subject: [PATCH 27/42] fix: Slight fix no need to parse inference result --- ragengine/inference/custom_inference.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py index 5e49e04b6..bdfb0a61d 100644 --- a/ragengine/inference/custom_inference.py +++ b/ragengine/inference/custom_inference.py @@ -3,7 +3,7 @@ from llama_index.llms.openai import OpenAI from llama_index.core.llms.callbacks import llm_completion_callback import requests -from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD +from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET #, RESPONSE_FIELD class CustomInference(CustomLLM): params: dict = {} @@ -44,9 +44,8 @@ def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse response_data = response.json() # Dynamically extract the field from the response based on the specified response_field - completion_text = response_data.get(RESPONSE_FIELD, "No response field found") - - return CompletionResponse(text=completion_text) + # completion_text = response_data.get(RESPONSE_FIELD, "No response field found") # not necessary for now + return CompletionResponse(text=str(response_data)) @property def metadata(self) -> LLMMetadata: From a232d67a7b3b0fdbabc93759c6fdcfc02f69a3a8 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 18:05:48 -0700 Subject: [PATCH 28/42] nit --- ragengine/tests/api/test_main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py index 7a5ff3b17..0f4e3396e 100644 --- a/ragengine/tests/api/test_main.py +++ b/ragengine/tests/api/test_main.py @@ -136,5 +136,3 @@ def test_list_all_indexed_documents_success(): assert ({item["text"] for item in response_idx.values()} == {item["text"] for item in request_data["documents"]}) - - From afb860661bf4ce10afae64f87d6a8d8e4991b0bc Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 18:13:48 -0700 Subject: [PATCH 29/42] nit --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a29eb8776..4b72a9f68 100644 --- a/Makefile +++ b/Makefile @@ -99,12 +99,12 @@ unit-test: ## Run unit tests. .PHONY: rag-service-test rag-service-test: - pip install -r presets/rag_service/requirements.txt - pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests + pip install -r presets/rag_service/requirements.txt + pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests .PHONY: tuning-metrics-server-test tuning-metrics-server-test: - pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics + pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics ## -------------------------------------- ## E2E tests From 2455dfd653eaf61469f0b274303cd593d0fcd893 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 18:15:14 -0700 Subject: [PATCH 30/42] fix: remove unused files --- ragengine/vector_store/playground/__init__.py | 0 .../playground/chromadb_playground.py | 62 ------------------- 2 files changed, 62 deletions(-) delete mode 100644 ragengine/vector_store/playground/__init__.py delete mode 100644 ragengine/vector_store/playground/chromadb_playground.py diff --git a/ragengine/vector_store/playground/__init__.py b/ragengine/vector_store/playground/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/ragengine/vector_store/playground/chromadb_playground.py b/ragengine/vector_store/playground/chromadb_playground.py deleted file mode 100644 index 31a5af077..000000000 --- a/ragengine/vector_store/playground/chromadb_playground.py +++ /dev/null @@ -1,62 +0,0 @@ -from llama_index.core import Settings -from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI - -remote_llm_api = HuggingFaceInferenceAPI( - model_name="HuggingFaceH4/zephyr-7b-alpha" -) - -Settings.llm = remote_llm_api - -import logging - -import chromadb -from IPython.display import Markdown, display -from llama_index.core import (SimpleDirectoryReader, StorageContext, - VectorStoreIndex) -from llama_index.embeddings.huggingface import HuggingFaceEmbedding -from llama_index.vector_stores.chroma import ChromaVectorStore - -# Enable DEBUG logging for ChromaDB -logging.basicConfig(level=logging.DEBUG) - -# create ChromaDB client and a new collection -chroma_client = chromadb.EphemeralClient() -chroma_collection = chroma_client.create_collection("quickstart") - -# define embedding function -embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5") - -# load documents from directory -documents = SimpleDirectoryReader("./data/paul_graham/").load_data() - -# set up ChromaVectorStore and load in data -vector_store = ChromaVectorStore(chroma_collection=chroma_collection) -storage_context = StorageContext.from_defaults(vector_store=vector_store) -index = VectorStoreIndex.from_documents( - documents, storage_context=storage_context, embed_model=embed_model -) - -# Log collection contents before querying -logging.debug("Documents in ChromaDB collection before querying:") -all_documents = chroma_collection.get(include=["documents"]) -logging.debug(all_documents["documents"]) - -# Query Data -query_engine = index.as_query_engine() -response = query_engine.query("What did the author do growing up?") -display(Markdown(f"{response}")) - -# Log collection contents after querying -logging.debug("Documents in ChromaDB collection after querying:") -all_documents_after_query = chroma_collection.get(include=["documents"]) -logging.debug(all_documents_after_query["documents"]) - -# Log embeddings stored in ChromaDB -logging.debug("Embeddings stored in ChromaDB:") -all_embeddings = chroma_collection.get(include=["embeddings"]) -logging.debug(all_embeddings["embeddings"]) - -# Log metadata stored in ChromaDB -logging.debug("Metadata stored in ChromaDB:") -all_metadata = chroma_collection.get(include=["metadatas"]) -logging.debug(all_metadata["metadatas"]) From d32169f00b3c2f295ce6b6c82210a87715fc4900 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 10 Oct 2024 18:22:27 -0700 Subject: [PATCH 31/42] fix: Example of live test --- ragengine/tests/api/test_main.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py index 0f4e3396e..cfa74716c 100644 --- a/ragengine/tests/api/test_main.py +++ b/ragengine/tests/api/test_main.py @@ -136,3 +136,35 @@ def test_list_all_indexed_documents_success(): assert ({item["text"] for item in response_idx.values()} == {item["text"] for item in request_data["documents"]}) + +""" +Example of a live query test. This test is currently commented out as it requires a valid +INFERENCE_URL in config.py. To run the test, ensure that a valid INFERENCE_URL is provided. +Upon execution, RAG results should be observed. + +def test_live_query_test(): + # Index + request_data = { + "index_name": "test_index", + "documents": [ + {"text": "Polar bear – can lift 450Kg (approximately 0.7 times their body weight) \ + Adult male polar bears can grow to be anywhere between 300 and 700kg"}, + {"text": "Giraffes are the tallest mammals and are well-adapted to living in trees. \ + They have few predators as adults."} + ] + } + + response = client.post("/index", json=request_data) + assert response.status_code == 200 + + # Query + request_data = { + "index_name": "test_index", + "query": "What is the strongest bear?", + "top_k": 1, + "llm_params": {"temperature": 0.7} + } + + response = client.post("/query", json=request_data) + assert response.status_code == 200 +""" \ No newline at end of file From 42f288b1409f30b410e90f61ed56955b4d978e8b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 21 Oct 2024 14:14:43 -0700 Subject: [PATCH 32/42] Update endpoints and remove old class --- ragengine/inference/custom_inference.py | 53 ------------------------- ragengine/main.py | 40 ------------------- 2 files changed, 93 deletions(-) delete mode 100644 ragengine/inference/custom_inference.py diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py deleted file mode 100644 index bdfb0a61d..000000000 --- a/ragengine/inference/custom_inference.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Any, Optional -from llama_index.core.llms import CustomLLM, CompletionResponse, LLMMetadata, CompletionResponseGen -from llama_index.llms.openai import OpenAI -from llama_index.core.llms.callbacks import llm_completion_callback -import requests -from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET #, RESPONSE_FIELD - -class CustomInference(CustomLLM): - params: dict = {} - - def set_params(self, params: dict) -> None: - self.params = params - - def get_param(self, key, default=None): - return self.params.get(key, default) - - @llm_completion_callback() - def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: - pass - - @llm_completion_callback() - def complete(self, prompt: str, **kwargs) -> CompletionResponse: - try: - if "openai" in INFERENCE_URL: - return self._openai_complete(prompt, **kwargs, **self.params) - else: - return self._custom_api_complete(prompt, **kwargs, **self.params) - finally: - # Clear params after the completion is done - self.params = {} - - def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: - llm = OpenAI( - api_key=INFERENCE_ACCESS_SECRET, - **kwargs # Pass all kwargs directly; kwargs may include model, temperature, max_tokens, etc. - ) - return llm.complete(prompt) - - def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: - headers = {"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} - data = {"prompt": prompt, **kwargs} - - response = requests.post(INFERENCE_URL, json=data, headers=headers) - response_data = response.json() - - # Dynamically extract the field from the response based on the specified response_field - # completion_text = response_data.get(RESPONSE_FIELD, "No response field found") # not necessary for now - return CompletionResponse(text=str(response_data)) - - @property - def metadata(self) -> LLMMetadata: - """Get LLM metadata.""" - return LLMMetadata() diff --git a/ragengine/main.py b/ragengine/main.py index bcecfb966..3ca9cc6de 100644 --- a/ragengine/main.py +++ b/ragengine/main.py @@ -51,46 +51,6 @@ async def query_index(request: QueryRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -""" -@app.put("/update", response_model=Dict[str, List[str]]) -async def update_documents(request: UpdateRequest): - try: - result = rag_ops.update(request.documents) - return result - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.post("/refresh", response_model=List[bool]) -async def refresh_documents(request: RefreshRequest): - try: - result = rag_ops.refresh(request.documents) - return result - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - -@app.delete("/document/{doc_id}") -async def delete_document(doc_id: str): - try: - rag_ops.delete(doc_id) - return {"message": "Document deleted successfully"} - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) -""" - -@app.get("/document/{index_name}/{doc_id}", response_model=RefDocInfo) -async def get_document(index_name: str, doc_id: str): - try: - document = rag_ops.get(index_name, doc_id) - if document: - return document - else: - raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} " - f"not found in index '{index_name}'.") - except ValueError as ve: - raise HTTPException(status_code=404, detail=str(ve)) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - @app.get("/indexed-documents", response_model=ListDocumentsResponse) async def list_all_indexed_documents(): try: From e652935fcd908ab05e40fcf7d4926128ffeeedbe Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 21 Oct 2024 14:46:58 -0700 Subject: [PATCH 33/42] pytest fix target --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4b72a9f68..fa138ecd3 100644 --- a/Makefile +++ b/Makefile @@ -99,8 +99,8 @@ unit-test: ## Run unit tests. .PHONY: rag-service-test rag-service-test: - pip install -r presets/rag_service/requirements.txt - pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests + pip install -r ragengine/requirements.txt + pytest -o log_cli=true -o log_cli_level=INFO ragengine/tests .PHONY: tuning-metrics-server-test tuning-metrics-server-test: From 7748420e91d9bc7ab8c47af080ec6e65a1bbdb04 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 22 Oct 2024 18:43:11 -0700 Subject: [PATCH 34/42] feat: Updated UTs, models and API --- ragengine/main.py | 27 ++---- ragengine/models.py | 19 +++- ragengine/tests/api/test_main.py | 55 +++-------- .../tests/vector_store/test_faiss_store.py | 95 ++++++++----------- ragengine/vector_store/base.py | 4 +- ragengine/vector_store/faiss_store.py | 53 +++++++---- ragengine/vector_store_manager/manager.py | 4 +- 7 files changed, 116 insertions(+), 141 deletions(-) diff --git a/ragengine/main.py b/ragengine/main.py index 3ca9cc6de..eff696790 100644 --- a/ragengine/main.py +++ b/ragengine/main.py @@ -1,17 +1,13 @@ from typing import Dict, List - -from llama_index.core.schema import TextNode - from vector_store_manager.manager import VectorStoreManager from embedding.huggingface_local import LocalHuggingFaceEmbedding from embedding.huggingface_remote import RemoteHuggingFaceEmbedding -from llama_index.core.storage.docstore.types import RefDocInfo from fastapi import FastAPI, HTTPException from models import (IndexRequest, ListDocumentsResponse, - QueryRequest, Document) + QueryRequest, QueryResponse, DocumentResponse) from vector_store.faiss_store import FaissVectorStoreHandler -from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID +from ragengine.config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID app = FastAPI() @@ -30,24 +26,23 @@ # Initialize RAG operations rag_ops = VectorStoreManager(vector_store_handler) -@app.post("/index", response_model=List[Document]) +@app.post("/index", response_model=List[DocumentResponse]) async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling) try: doc_ids = rag_ops.create(request.index_name, request.documents) documents = [ - Document(doc_id=doc_id, text=doc.text, metadata=doc.metadata) + DocumentResponse(doc_id=doc_id, text=doc.text, metadata=doc.metadata) for doc_id, doc in zip(doc_ids, request.documents) ] return documents except Exception as e: raise HTTPException(status_code=500, detail=str(e)) -@app.post("/query", response_model=Dict[str, str]) +@app.post("/query", response_model=QueryResponse) async def query_index(request: QueryRequest): try: llm_params = request.llm_params or {} # Default to empty dict if no params provided - response = rag_ops.read(request.index_name, request.query, request.top_k, llm_params) - return {"response": str(response)} + return rag_ops.read(request.index_name, request.query, request.top_k, llm_params) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -55,15 +50,7 @@ async def query_index(request: QueryRequest): async def list_all_indexed_documents(): try: documents = rag_ops.list_all_indexed_documents() - serialized_documents = { - index_name: { - doc_name: { - "text": doc_info.text, "hash": doc_info.hash - } for doc_name, doc_info in vector_store_index.docstore.docs.items() - } - for index_name, vector_store_index in documents.items() - } - return ListDocumentsResponse(documents=serialized_documents) + return ListDocumentsResponse(documents=documents) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) diff --git a/ragengine/models.py b/ragengine/models.py index 941b845a4..a74d63608 100644 --- a/ragengine/models.py +++ b/ragengine/models.py @@ -6,6 +6,11 @@ class Document(BaseModel): text: str metadata: Optional[dict] = {} +class DocumentResponse(BaseModel): + doc_id: str + text: str + metadata: Optional[dict] = None + class IndexRequest(BaseModel): index_name: str documents: List[Document] @@ -17,4 +22,16 @@ class QueryRequest(BaseModel): llm_params: Optional[Dict] = None # Accept a dictionary for parameters class ListDocumentsResponse(BaseModel): - documents:Dict[str, Dict[str, Dict[str, str]]] + documents: Dict[str, Dict[str, Dict[str, str]]] + +# Define models for TextNode, NodeWithScore, and the main Response +class NodeWithScore(BaseModel): + node_id: str + text: str + score: float + metadata: Optional[dict] = None + +class QueryResponse(BaseModel): + response: str + source_nodes: List[NodeWithScore] + metadata: Optional[dict] = None \ No newline at end of file diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py index cfa74716c..1e7366c86 100644 --- a/ragengine/tests/api/test_main.py +++ b/ragengine/tests/api/test_main.py @@ -1,21 +1,20 @@ -import os -from tempfile import TemporaryDirectory from unittest.mock import patch -import pytest -from vector_store.faiss_store import FaissVectorStoreHandler -from models import Document -from embedding.huggingface_local import LocalHuggingFaceEmbedding -from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET +from llama_index.core.storage.index_store import SimpleIndexStore -from main import app, rag_ops +from ragengine.main import app, vector_store_handler, rag_ops from fastapi.testclient import TestClient -from unittest.mock import MagicMock +import pytest -AUTO_GEN_DOC_ID_LEN = 36 +AUTO_GEN_DOC_ID_LEN = 64 client = TestClient(app) +@pytest.fixture(autouse=True) +def clear_index(): + vector_store_handler.index_map.clear() + vector_store_handler.index_store = SimpleIndexStore() + def test_index_documents_success(): request_data = { "index_name": "test_index", @@ -65,7 +64,11 @@ def test_query_index_success(mock_post): response = client.post("/query", json=request_data) assert response.status_code == 200 - assert response.json() == {"response": "This is the completion from the API"} + assert response.json()["response"] == "{'result': 'This is the completion from the API'}" + assert len(response.json()["source_nodes"]) == 1 + assert response.json()["source_nodes"][0]["text"] == "This is a test document" + assert response.json()["source_nodes"][0]["score"] == 0.5354418754577637 + assert response.json()["source_nodes"][0]["metadata"] == {} assert mock_post.call_count == 1 def test_query_index_failure(): @@ -82,36 +85,6 @@ def test_query_index_failure(): assert response.json()["detail"] == "No such index: 'non_existent_index' exists." -def test_get_document_success(): - request_data = { - "index_name": "test_index", - "documents": [ - # {"doc_id": "doc1", "text": "This is a test document"}, - {"doc_id": "doc1", "text": "This is a test document"}, - {"text": "Another test document"} - ] - } - - index_response = client.post("/index", json=request_data) - assert index_response.status_code == 200 - - # Call the GET document endpoint. - get_response = client.get("/document/test_index/doc1") - assert get_response.status_code == 200 - - response_json = get_response.json() - - assert response_json.keys() == {"node_ids", 'metadata'} - assert response_json['metadata'] == {} - - assert isinstance(response_json["node_ids"], list) and len(response_json["node_ids"]) == 1 - - -def test_get_document_failure(): - # Call the GET document endpoint. - response = client.get("/document/test_index/doc1") - assert response.status_code == 404 - def test_list_all_indexed_documents_success(): response = client.get("/indexed-documents") assert response.status_code == 200 diff --git a/ragengine/tests/vector_store/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py index 452388aeb..116c89ba2 100644 --- a/ragengine/tests/vector_store/test_faiss_store.py +++ b/ragengine/tests/vector_store/test_faiss_store.py @@ -3,10 +3,13 @@ from unittest.mock import patch import pytest + +from ragengine.vector_store.base import BaseVectorStore from ragengine.vector_store.faiss_store import FaissVectorStoreHandler from ragengine.models import Document from ragengine.embedding.huggingface_local import LocalHuggingFaceEmbedding from ragengine.config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET +from ragengine.config import PERSIST_DIR @pytest.fixture(scope='session') def init_embed_manager(): @@ -21,23 +24,24 @@ def vector_store_manager(init_embed_manager): yield FaissVectorStoreHandler(init_embed_manager) def test_index_documents(vector_store_manager): + first_doc_text, second_doc_text = "First document", "Second document" documents = [ - Document(doc_id="1", text="First document", metadata={"type": "text"}), - Document(doc_id="2", text="Second document", metadata={"type": "text"}) + Document(text=first_doc_text, metadata={"type": "text"}), + Document(text=second_doc_text, metadata={"type": "text"}) ] doc_ids = vector_store_manager.index_documents("test_index", documents) assert len(doc_ids) == 2 - assert doc_ids == ["1", "2"] + assert set(doc_ids) == {BaseVectorStore.generate_doc_id(first_doc_text), + BaseVectorStore.generate_doc_id(second_doc_text)} def test_index_documents_isolation(vector_store_manager): - doc_1_id, doc_2_id = "1", "2" documents1 = [ - Document(doc_id=doc_1_id, text="First document in index1", metadata={"type": "text"}), + Document(text="First document in index1", metadata={"type": "text"}), ] documents2 = [ - Document(doc_id=doc_2_id, text="First document in index2", metadata={"type": "text"}), + Document(text="First document in index2", metadata={"type": "text"}), ] # Index documents in separate indices @@ -45,16 +49,14 @@ def test_index_documents_isolation(vector_store_manager): vector_store_manager.index_documents(index_name_1, documents1) vector_store_manager.index_documents(index_name_2, documents2) - # Ensure documents are correctly persisted and separated by index - doc_1 = vector_store_manager.get_document(index_name_1, doc_1_id) - assert doc_1 and doc_1.node_ids # Ensure documents were created - - doc_2 = vector_store_manager.get_document(index_name_2, doc_2_id) - assert doc_2 and doc_2.node_ids # Ensure documents were created - - # Ensure that the documents do not mix between indices - assert vector_store_manager.get_document(index_name_2, doc_1_id) is None, f"Document {doc_1_id} should not exist in {index_name_2}" - assert vector_store_manager.get_document(index_name_1, doc_2_id) is None, f"Document {doc_2_id} should not exist in {index_name_1}" + assert vector_store_manager.list_all_indexed_documents() == { + 'index1': {"87117028123498eb7d757b1507aa3e840c63294f94c27cb5ec83c939dedb32fd": + {'hash': '1e64a170be48c45efeaa8667ab35919106da0489ec99a11d0029f2842db133aa', + 'text': 'First document in index1'}}, + 'index2': {"49b198c0e126a99e1975f17b564756c25b4ad691a57eda583e232fd9bee6de91": + {'hash': 'a222f875b83ce8b6eb72b3cae278b620de9bcc7c6b73222424d3ce979d1a463b', + 'text': 'First document in index2'}} + } @patch('requests.post') def test_query_documents(mock_post, vector_store_manager): @@ -67,17 +69,19 @@ def test_query_documents(mock_post, vector_store_manager): # Add documents to index documents = [ - Document(doc_id="1", text="First document", metadata={"type": "text"}), - Document(doc_id="2", text="Second document", metadata={"type": "text"}) + Document(text="First document", metadata={"type": "text"}), + Document(text="Second document", metadata={"type": "text"}) ] vector_store_manager.index_documents("test_index", documents) params = {"temperature": 0.7} # Mock query and results - query_result = vector_store_manager.query("test_index", "First", top_k=1, params=params) + query_result = vector_store_manager.query("test_index", "First", top_k=1, llm_params=params) assert query_result is not None - assert query_result.response == "This is the completion from the API" + assert query_result["response"] == "{'result': 'This is the completion from the API'}" + assert query_result["source_nodes"][0]["text"] == "First document" + assert query_result["source_nodes"][0]["score"] == 0.5795239210128784 mock_post.assert_called_once_with( INFERENCE_URL, @@ -86,57 +90,34 @@ def test_query_documents(mock_post, vector_store_manager): headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} ) -def test_add_document(vector_store_manager, capsys): - documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})] +def test_add_document(vector_store_manager): + documents = [Document(text="Third document", metadata={"type": "text"})] vector_store_manager.index_documents("test_index", documents) # Add a document to the existing index - new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"}) + new_document = [Document(text="Fourth document", metadata={"type": "text"})] vector_store_manager.index_documents("test_index", new_document) # Assert that the document exists - assert vector_store_manager.document_exists("test_index", "4") + assert vector_store_manager.document_exists("test_index", + BaseVectorStore.generate_doc_id("Fourth document")) -def test_persist_and_load_index_store(vector_store_manager): - """Test that the index store is persisted and loaded correctly.""" +def test_persist_index_1(vector_store_manager): + """Test that the index store is persisted.""" # Add a document and persist the index - documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] + documents = [Document(text="Test document", metadata={"type": "text"})] vector_store_manager.index_documents("test_index", documents) vector_store_manager._persist("test_index") + assert os.path.exists(PERSIST_DIR) - # Simulate a fresh load of the index store (clearing in-memory state) - vector_store_manager.index_store = None # Clear current in-memory store - vector_store_manager._load_index_store() - - # Verify that the store was reloaded and contains the expected index structure - assert vector_store_manager.index_store is not None - assert len(vector_store_manager.index_store.index_structs()) > 0 - -# TODO: Prevent default re-indexing from load_index_from_storage -def test_persist_and_load_index(vector_store_manager): - """Test that an index is persisted and then loaded correctly.""" +def test_persist_index_2(vector_store_manager): + """Test that an index store is persisted.""" # Add a document and persist the index - documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})] + documents = [Document(text="Test document", metadata={"type": "text"})] vector_store_manager.index_documents("test_index", documents) - documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})] + documents = [Document(text="Another Test document", metadata={"type": "text"})] vector_store_manager.index_documents("another_test_index", documents) vector_store_manager._persist_all() - - # Simulate a fresh load of the index (clearing in-memory state) - vector_store_manager.index_map = {} # Clear current in-memory index map - loaded_indices = vector_store_manager._load_indices() - - # Verify that the index was reloaded and contains the expected document - assert loaded_indices is not None - assert vector_store_manager.document_exists("test_index", "1") - assert vector_store_manager.document_exists("another_test_index", "1") - - vector_store_manager.index_map = {} # Clear current in-memory index map - loaded_index = vector_store_manager._load_index("test_index") - - assert loaded_index is not None - assert vector_store_manager.document_exists("test_index", "1") - assert not vector_store_manager.document_exists("another_test_index", "1") # Since we didn't load this index - + assert os.path.exists(PERSIST_DIR) diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py index 938330fa2..062342b60 100644 --- a/ragengine/vector_store/base.py +++ b/ragengine/vector_store/base.py @@ -20,11 +20,11 @@ def query(self, index_name: str, query: str, top_k: int, params: dict): pass @abstractmethod - def add_document(self, index_name: str, document: Document): + def add_document_to_index(self, index_name: str, document: Document, doc_id: str): pass @abstractmethod - def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: + def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]: pass @abstractmethod diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py index 35472d004..5bf71e314 100644 --- a/ragengine/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -5,13 +5,12 @@ from llama_index.core import Document as LlamaDocument from llama_index.core import (StorageContext, VectorStoreIndex) from llama_index.core.storage.index_store import SimpleIndexStore -from llama_index.core.storage.docstore.types import RefDocInfo from llama_index.vector_stores.faiss import FaissVectorStore -from ragengine.models import Document +from ragengine.models import Document, NodeWithScore from ragengine.inference.inference import Inference -from config import PERSIST_DIR +from ragengine.config import PERSIST_DIR from .base import BaseVectorStore from ragengine.embedding.base import BaseEmbeddingModel @@ -75,12 +74,12 @@ def _append_documents_to_index(self, index_name: str, documents: List[Document]) indexed_doc_ids = set() for doc in documents: - doc.doc_id = self.generate_doc_id(doc.text) - if not self.document_exists(index_name, doc.doc_id): - self.add_document_to_index(index_name, doc) - indexed_doc_ids.add(doc.doc_id) + doc_id = BaseVectorStore.generate_doc_id(doc.text) + if not self.document_exists(index_name, doc_id): + self.add_document_to_index(index_name, doc, doc_id) + indexed_doc_ids.add(doc_id) else: - print(f"Document {doc.doc_id} already exists in index {index_name}. Skipping.") + print(f"Document {doc_id} already exists in index {index_name}. Skipping.") if indexed_doc_ids: self._persist(index_name) @@ -105,10 +104,10 @@ def _create_new_index(self, index_name: str, documents: List[Document]) -> List[ indexed_doc_ids = set() for doc in documents: - doc.doc_id = self.generate_doc_id(doc.text) - llama_doc = LlamaDocument(id_=doc.doc_id, text=doc.text, metadata=doc.metadata) + doc_id = BaseVectorStore.generate_doc_id(doc.text) + llama_doc = LlamaDocument(id_=doc_id, text=doc.text, metadata=doc.metadata) llama_docs.append(llama_doc) - indexed_doc_ids.add(doc.doc_id) + indexed_doc_ids.add(doc_id) if llama_docs: index = VectorStoreIndex.from_documents( @@ -123,11 +122,11 @@ def _create_new_index(self, index_name: str, documents: List[Document]) -> List[ self._persist(index_name) return list(indexed_doc_ids) - def add_document_to_index(self, index_name: str, document: Document): + def add_document_to_index(self, index_name: str, document: Document, doc_id: str): """Inserts a single document into the existing FAISS index.""" if index_name not in self.index_map: raise ValueError(f"No such index: '{index_name}' exists.") - llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id) + llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=doc_id) self.index_map[index_name].insert(llama_doc) def query(self, index_name: str, query: str, top_k: int, llm_params: dict): @@ -137,11 +136,31 @@ def query(self, index_name: str, query: str, top_k: int, llm_params: dict): self.llm.set_params(llm_params) query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k) - return query_engine.query(query) - - def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: + query_result = query_engine.query(query) + return { + "response": query_result.response, + "source_nodes": [ + { + "node_id": node.node_id, + "text": node.text, + "score": node.score, + "metadata": node.metadata + } + for node in query_result.source_nodes + ], + "metadata": query_result.metadata, + } + + def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]: """Lists all documents in the vector store.""" - return self.index_map + return { + index_name: { + doc_info.ref_doc_id: { + "text": doc_info.text, "hash": doc_info.hash + } for doc_name, doc_info in vector_store_index.docstore.docs.items() + } + for index_name, vector_store_index in self.index_map.items() + } def document_exists(self, index_name: str, doc_id: str) -> bool: """Checks if a document exists in the vector store.""" diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py index e69192f1d..6976b4f9c 100644 --- a/ragengine/vector_store_manager/manager.py +++ b/ragengine/vector_store_manager/manager.py @@ -3,8 +3,6 @@ from ragengine.models import Document from ragengine.vector_store.base import BaseVectorStore -from llama_index.core import VectorStoreIndex - class VectorStoreManager: def __init__(self, vector_store: BaseVectorStore): self.vector_store = vector_store @@ -17,6 +15,6 @@ def read(self, index_name: str, query: str, top_k: int, llm_params: dict): """Query the indexed documents.""" return self.vector_store.query(index_name, query, top_k, llm_params) - def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]: + def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]: """List all documents.""" return self.vector_store.list_all_indexed_documents() From 1b0a7a033d7e07c09b34b3166e0a364970e847d4 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 22 Oct 2024 18:44:38 -0700 Subject: [PATCH 35/42] feat: Updated UTs, models and API --- ragengine/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ragengine/models.py b/ragengine/models.py index a74d63608..982c1b1b9 100644 --- a/ragengine/models.py +++ b/ragengine/models.py @@ -24,7 +24,7 @@ class QueryRequest(BaseModel): class ListDocumentsResponse(BaseModel): documents: Dict[str, Dict[str, Dict[str, str]]] -# Define models for TextNode, NodeWithScore, and the main Response +# Define models for NodeWithScore, and QueryResponse class NodeWithScore(BaseModel): node_id: str text: str From 1c34fb041275954191ab69f6dd159367bead19d2 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 22 Oct 2024 18:46:34 -0700 Subject: [PATCH 36/42] feat: Updated UTs, models and API --- ragengine/main.py | 2 +- ragengine/vector_store/base.py | 1 - ragengine/vector_store/faiss_store.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ragengine/main.py b/ragengine/main.py index eff696790..a60b7a5a9 100644 --- a/ragengine/main.py +++ b/ragengine/main.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import List from vector_store_manager.manager import VectorStoreManager from embedding.huggingface_local import LocalHuggingFaceEmbedding from embedding.huggingface_remote import RemoteHuggingFaceEmbedding diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py index 062342b60..bf3be9624 100644 --- a/ragengine/vector_store/base.py +++ b/ragengine/vector_store/base.py @@ -2,7 +2,6 @@ from typing import Dict, List from ragengine.models import Document -from llama_index.core import VectorStoreIndex import hashlib diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py index 5bf71e314..ddd5b670d 100644 --- a/ragengine/vector_store/faiss_store.py +++ b/ragengine/vector_store/faiss_store.py @@ -7,7 +7,7 @@ from llama_index.core.storage.index_store import SimpleIndexStore from llama_index.vector_stores.faiss import FaissVectorStore -from ragengine.models import Document, NodeWithScore +from ragengine.models import Document from ragengine.inference.inference import Inference from ragengine.config import PERSIST_DIR From bc946693110d3b7c3f06b7c9ba961b6e547111dc Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 11:33:01 -0700 Subject: [PATCH 37/42] feat: Updated UTs, models and API --- ragengine/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt index 18ca062fc..7d8177c2a 100644 --- a/ragengine/requirements.txt +++ b/ragengine/requirements.txt @@ -5,3 +5,5 @@ fastapi faiss-cpu llama-index-vector-stores-faiss uvicorn +# For UTs +pytest \ No newline at end of file From bc076bd84e1d9af5c7aca84798275793ce310dad Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 11:45:23 -0700 Subject: [PATCH 38/42] feat: Updated UTs, models and API --- ragengine/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt index 7d8177c2a..7be7a8c38 100644 --- a/ragengine/requirements.txt +++ b/ragengine/requirements.txt @@ -1,6 +1,11 @@ # RAG Library Requirements llama-index +# HF Embeddings llama-index-embeddings-huggingface +llama-index-embeddings-huggingface-api +# HF LLMs +llama-index-llms-huggingface +llama-index-llms-huggingface-api fastapi faiss-cpu llama-index-vector-stores-faiss From f93669bc63084aae41df11e8447ec609ffcba079 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 11:45:44 -0700 Subject: [PATCH 39/42] feat: Updated UTs, models and API --- ragengine/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt index 7be7a8c38..4a324766c 100644 --- a/ragengine/requirements.txt +++ b/ragengine/requirements.txt @@ -6,6 +6,7 @@ llama-index-embeddings-huggingface-api # HF LLMs llama-index-llms-huggingface llama-index-llms-huggingface-api + fastapi faiss-cpu llama-index-vector-stores-faiss From a5dd527d2b6a84ae80dfcb693d29c4c6cc12ca17 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 11:57:12 -0700 Subject: [PATCH 40/42] approx --- ragengine/tests/api/test_main.py | 2 +- ragengine/tests/vector_store/test_faiss_store.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py index 1e7366c86..c0b6ef13b 100644 --- a/ragengine/tests/api/test_main.py +++ b/ragengine/tests/api/test_main.py @@ -67,7 +67,7 @@ def test_query_index_success(mock_post): assert response.json()["response"] == "{'result': 'This is the completion from the API'}" assert len(response.json()["source_nodes"]) == 1 assert response.json()["source_nodes"][0]["text"] == "This is a test document" - assert response.json()["source_nodes"][0]["score"] == 0.5354418754577637 + assert response.json()["source_nodes"][0]["score"] == pytest.approx(0.5354418754577637, rel=1e-6) assert response.json()["source_nodes"][0]["metadata"] == {} assert mock_post.call_count == 1 diff --git a/ragengine/tests/vector_store/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py index 116c89ba2..0fc17a912 100644 --- a/ragengine/tests/vector_store/test_faiss_store.py +++ b/ragengine/tests/vector_store/test_faiss_store.py @@ -81,7 +81,7 @@ def test_query_documents(mock_post, vector_store_manager): assert query_result is not None assert query_result["response"] == "{'result': 'This is the completion from the API'}" assert query_result["source_nodes"][0]["text"] == "First document" - assert query_result["source_nodes"][0]["score"] == 0.5795239210128784 + assert query_result["source_nodes"][0]["score"] == pytest.approx(0.5795239210128784, rel=1e-6) mock_post.assert_called_once_with( INFERENCE_URL, From 1000732209b9eec098314888b2f60b0f5c7c0e0b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 12:08:39 -0700 Subject: [PATCH 41/42] fix: add ut dependency --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index fa138ecd3..699e557b5 100644 --- a/Makefile +++ b/Makefile @@ -104,6 +104,7 @@ rag-service-test: .PHONY: tuning-metrics-server-test tuning-metrics-server-test: + pip install -r presets/inference/text-generation/requirements.txt pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics ## -------------------------------------- From 3d6a623624ac37a6d3e91ca23ed1332567276a09 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 23 Oct 2024 12:15:05 -0700 Subject: [PATCH 42/42] fix: renamed --- ragengine/main.py | 4 ++-- ragengine/vector_store_manager/manager.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ragengine/main.py b/ragengine/main.py index a60b7a5a9..53bdb1997 100644 --- a/ragengine/main.py +++ b/ragengine/main.py @@ -29,7 +29,7 @@ @app.post("/index", response_model=List[DocumentResponse]) async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling) try: - doc_ids = rag_ops.create(request.index_name, request.documents) + doc_ids = rag_ops.index(request.index_name, request.documents) documents = [ DocumentResponse(doc_id=doc_id, text=doc.text, metadata=doc.metadata) for doc_id, doc in zip(doc_ids, request.documents) @@ -42,7 +42,7 @@ async def index_documents(request: IndexRequest): # TODO: Research async/sync wh async def query_index(request: QueryRequest): try: llm_params = request.llm_params or {} # Default to empty dict if no params provided - return rag_ops.read(request.index_name, request.query, request.top_k, llm_params) + return rag_ops.query(request.index_name, request.query, request.top_k, llm_params) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py index 6976b4f9c..d8871b93a 100644 --- a/ragengine/vector_store_manager/manager.py +++ b/ragengine/vector_store_manager/manager.py @@ -7,11 +7,11 @@ class VectorStoreManager: def __init__(self, vector_store: BaseVectorStore): self.vector_store = vector_store - def create(self, index_name: str, documents: List[Document]) -> List[str]: + def index(self, index_name: str, documents: List[Document]) -> List[str]: """Index new documents.""" return self.vector_store.index_documents(index_name, documents) - def read(self, index_name: str, query: str, top_k: int, llm_params: dict): + def query(self, index_name: str, query: str, top_k: int, llm_params: dict): """Query the indexed documents.""" return self.vector_store.query(index_name, query, top_k, llm_params)