From d369d85a160a75e83c1c98712e26346d90053256 Mon Sep 17 00:00:00 2001
From: Fei-Guo <vrgf2003@gmail.com>
Date: Tue, 17 Sep 2024 10:41:21 -0700
Subject: [PATCH 01/42] feat: Add RAGEngine CRD

---
 api/v1alpha1/ragengine_types.go           | 109 +++++++++
 api/v1alpha1/zz_generated.deepcopy.go     | 205 +++++++++++++++++
 config/crd/bases/kaito.sh_ragengines.yaml | 269 ++++++++++++++++++++++
 presets/models/falcon/model.go            |   4 +-
 presets/models/mistral/model.go           |   4 +-
 presets/models/phi2/model.go              |   4 +-
 presets/models/phi3/model.go              |   4 +-
 7 files changed, 591 insertions(+), 8 deletions(-)
 create mode 100644 api/v1alpha1/ragengine_types.go
 create mode 100644 config/crd/bases/kaito.sh_ragengines.yaml

diff --git a/api/v1alpha1/ragengine_types.go b/api/v1alpha1/ragengine_types.go
new file mode 100644
index 000000000..a5d35205e
--- /dev/null
+++ b/api/v1alpha1/ragengine_types.go
@@ -0,0 +1,109 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+type StorageSpec struct {
+	//TODO: add vendor specific APIs for accessing vector DB services here.
+}
+
+type RemoteEmbeddingSpec struct {
+	// URL points to a publicly available embedding service, such as OpenAI.
+	URL string `json:"url"`
+	// AccessSecret is the name of the secret that contains the service access token.
+	// +optional
+	AccessSecret string `json:"accessSecret,omitempty"`
+}
+
+type LocalEmbeddingSpec struct {
+	// Image is the name of the containerized embedding model image.
+	// +optional
+	Image string `json:"image,omitempty"`
+	// +optional
+	ImagePullSecret string `json:"imagePullSecret,omitempty"`
+	// ModelID is the ID of the embedding model hosted by huggingface, e.g., BAAI/bge-small-en-v1.5.
+	// When this field is specified, the RAG engine will download the embedding model
+	// from huggingface repository during startup. The embedding model will not persist in local storage.
+	// Note that if Image is specified, ModelID should not be specified and vice versa.
+	// +optional
+	ModelID string `json:"modelID,omitempty"`
+	// ModelAccessSecret is the name of the secret that contains the huggingface access token.
+	// +optional
+	ModelAccessSecret string `json:"modelAccessSecret,omitempty"`
+}
+
+type EmbeddingSpec struct {
+	// Remote specifies how to generate embeddings for index data using a remote service.
+	// Note that either Remote or Local needs to be specified, not both.
+	// +optional
+	Remote *RemoteEmbeddingSpec `json:"remote,omitempty"`
+	// Local specifies how to generate embeddings for index data using a model run locally.
+	// +optional
+	Local *LocalEmbeddingSpec `json:"local,omitempty"`
+}
+
+type InferenceServiceSpec struct {
+	// URL points to a running inference service endpoint which accepts http(s) payload.
+	URL string `json:"url"`
+	// AccessSecret is the name of the secret that contains the service access token.
+	// +optional
+	AccessSecret string `json:"accessSecret,omitempty"`
+}
+
+type RAGEngineSpec struct {
+	// Compute specifies the dedicated GPU resource used by an embedding model running locally if required.
+	// +optional
+	Compute *ResourceSpec `json:"compute,omitempty"`
+	// Storage specifies how to access the vector database used to save the embedding vectors.
+	// If this field is not specified, by default, an in-memory vector DB will be used.
+	// The data will not be persisted.
+	// +optional
+	Storage *StorageSpec `json:"storage,omitempty"`
+	// Embedding specifies whether the RAG engine generates embedding vectors using a remote service
+	// or using a embedding model running locally.
+	Embedding        *EmbeddingSpec        `json:"embedding"`
+	InferenceService *InferenceServiceSpec `json:"inferenceService"`
+	// QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the
+	// inference service. If not specified, a default service name will be created by the RAG engine.
+	// +optional
+	QueryServiceName string `json:"queryServiceName,omitempty"`
+	// IndexServiceName is the name of the service which exposes the endpoint for user to input the index data
+	// to generate embeddings. If not specified, a default service name will be created by the RAG engine.
+	// +optional
+	IndexServiceName string `json:"indexServiceName,omitempty"`
+}
+
+// RAGEngineStatus defines the observed state of RAGEngine
+type RAGEngineStatus struct {
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// RAGEngine is the Schema for the ragengine API
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:path=ragengines,scope=Namespaced,categories=ragengine
+// +kubebuilder:storageversion
+type RAGEngine struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec *RAGEngineSpec `json:"spec,omitempty"`
+
+	Status RAGEngineStatus `json:"status,omitempty"`
+}
+
+// RAGEngineList contains a list of RAGEngine
+// +kubebuilder:object:root=true
+type RAGEngineList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []RAGEngine `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&RAGEngine{}, &RAGEngineList{})
+}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 4a0517171..ef55fed6a 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -104,6 +104,31 @@ func (in *DataSource) DeepCopy() *DataSource {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingSpec) DeepCopyInto(out *EmbeddingSpec) {
+	*out = *in
+	if in.Remote != nil {
+		in, out := &in.Remote, &out.Remote
+		*out = new(RemoteEmbeddingSpec)
+		**out = **in
+	}
+	if in.Local != nil {
+		in, out := &in.Local, &out.Local
+		*out = new(LocalEmbeddingSpec)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingSpec.
+func (in *EmbeddingSpec) DeepCopy() *EmbeddingSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(EmbeddingSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GPUConfig) DeepCopyInto(out *GPUConfig) {
 	*out = *in
@@ -124,6 +149,21 @@ func (in *GPUConfig) DeepCopy() *GPUConfig {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceServiceSpec.
+func (in *InferenceServiceSpec) DeepCopy() *InferenceServiceSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceServiceSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *InferenceSpec) DeepCopyInto(out *InferenceSpec) {
 	*out = *in
@@ -156,6 +196,21 @@ func (in *InferenceSpec) DeepCopy() *InferenceSpec {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LocalEmbeddingSpec) DeepCopyInto(out *LocalEmbeddingSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalEmbeddingSpec.
+func (in *LocalEmbeddingSpec) DeepCopy() *LocalEmbeddingSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(LocalEmbeddingSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *PresetMeta) DeepCopyInto(out *PresetMeta) {
 	*out = *in
@@ -208,6 +263,141 @@ func (in *PresetSpec) DeepCopy() *PresetSpec {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RAGEngine) DeepCopyInto(out *RAGEngine) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	if in.Spec != nil {
+		in, out := &in.Spec, &out.Spec
+		*out = new(RAGEngineSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngine.
+func (in *RAGEngine) DeepCopy() *RAGEngine {
+	if in == nil {
+		return nil
+	}
+	out := new(RAGEngine)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *RAGEngine) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RAGEngineList) DeepCopyInto(out *RAGEngineList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]RAGEngine, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineList.
+func (in *RAGEngineList) DeepCopy() *RAGEngineList {
+	if in == nil {
+		return nil
+	}
+	out := new(RAGEngineList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *RAGEngineList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RAGEngineSpec) DeepCopyInto(out *RAGEngineSpec) {
+	*out = *in
+	if in.Compute != nil {
+		in, out := &in.Compute, &out.Compute
+		*out = new(ResourceSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Storage != nil {
+		in, out := &in.Storage, &out.Storage
+		*out = new(StorageSpec)
+		**out = **in
+	}
+	if in.Embedding != nil {
+		in, out := &in.Embedding, &out.Embedding
+		*out = new(EmbeddingSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.InferenceService != nil {
+		in, out := &in.InferenceService, &out.InferenceService
+		*out = new(InferenceServiceSpec)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineSpec.
+func (in *RAGEngineSpec) DeepCopy() *RAGEngineSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(RAGEngineSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RAGEngineStatus) DeepCopyInto(out *RAGEngineStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RAGEngineStatus.
+func (in *RAGEngineStatus) DeepCopy() *RAGEngineStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(RAGEngineStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RemoteEmbeddingSpec) DeepCopyInto(out *RemoteEmbeddingSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoteEmbeddingSpec.
+func (in *RemoteEmbeddingSpec) DeepCopy() *RemoteEmbeddingSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(RemoteEmbeddingSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) {
 	*out = *in
@@ -238,6 +428,21 @@ func (in *ResourceSpec) DeepCopy() *ResourceSpec {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *StorageSpec) DeepCopyInto(out *StorageSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageSpec.
+func (in *StorageSpec) DeepCopy() *StorageSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(StorageSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *TrainingConfig) DeepCopyInto(out *TrainingConfig) {
 	*out = *in
diff --git a/config/crd/bases/kaito.sh_ragengines.yaml b/config/crd/bases/kaito.sh_ragengines.yaml
new file mode 100644
index 000000000..7b1ec3f55
--- /dev/null
+++ b/config/crd/bases/kaito.sh_ragengines.yaml
@@ -0,0 +1,269 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.15.0
+  name: ragengines.kaito.sh
+spec:
+  group: kaito.sh
+  names:
+    categories:
+    - ragengine
+    kind: RAGEngine
+    listKind: RAGEngineList
+    plural: ragengines
+    singular: ragengine
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: RAGEngine is the Schema for the ragengine API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            properties:
+              compute:
+                description: Compute specifies the dedicated GPU resource used by
+                  an embedding model running locally if required.
+                properties:
+                  count:
+                    default: 1
+                    description: Count is the required number of GPU nodes.
+                    type: integer
+                  instanceType:
+                    default: Standard_NC12s_v3
+                    description: |-
+                      InstanceType specifies the GPU node SKU.
+                      This field defaults to "Standard_NC12s_v3" if not specified.
+                    type: string
+                  labelSelector:
+                    description: LabelSelector specifies the required labels for the
+                      GPU nodes.
+                    properties:
+                      matchExpressions:
+                        description: matchExpressions is a list of label selector
+                          requirements. The requirements are ANDed.
+                        items:
+                          description: |-
+                            A label selector requirement is a selector that contains values, a key, and an operator that
+                            relates the key and values.
+                          properties:
+                            key:
+                              description: key is the label key that the selector
+                                applies to.
+                              type: string
+                            operator:
+                              description: |-
+                                operator represents a key's relationship to a set of values.
+                                Valid operators are In, NotIn, Exists and DoesNotExist.
+                              type: string
+                            values:
+                              description: |-
+                                values is an array of string values. If the operator is In or NotIn,
+                                the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                the values array must be empty. This array is replaced during a strategic
+                                merge patch.
+                              items:
+                                type: string
+                              type: array
+                              x-kubernetes-list-type: atomic
+                          required:
+                          - key
+                          - operator
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                      matchLabels:
+                        additionalProperties:
+                          type: string
+                        description: |-
+                          matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                          map is equivalent to an element of matchExpressions, whose key field is "key", the
+                          operator is "In", and the values array contains only "value". The requirements are ANDed.
+                        type: object
+                    type: object
+                    x-kubernetes-map-type: atomic
+                  preferredNodes:
+                    description: |-
+                      PreferredNodes is an optional node list specified by the user.
+                      If a node in the list does not have the required labels or
+                      the required instanceType, it will be ignored.
+                    items:
+                      type: string
+                    type: array
+                required:
+                - labelSelector
+                type: object
+              embedding:
+                description: |-
+                  Embedding specifies whether the RAG engine generates embedding vectors using a remote service
+                  or using a embedding model running locally.
+                properties:
+                  local:
+                    description: Local specifies how to generate embeddings for index
+                      data using a model run locally.
+                    properties:
+                      image:
+                        description: Image is the name of the containerized embedding
+                          model image.
+                        type: string
+                      imagePullSecret:
+                        type: string
+                      modelAccessSecret:
+                        description: ModelAccessSecret is the name of the secret that
+                          contains the huggingface access token.
+                        type: string
+                      modelID:
+                        description: |-
+                          ModelID is the ID of the embedding model hosted by huggingface.
+                          When this field is specified, the RAG engine will download the embedding model
+                          from huggingface repository during startup. The embedding model will not persist in local storage.
+                          Note that if Image is specified, ModelID should not be specified and vice versa.
+                        type: string
+                    type: object
+                  remote:
+                    description: |-
+                      Remote specifies how to generate embeddings for index data using a remote service.
+                      Note that either Remote or Local needs to be specified, not both.
+                    properties:
+                      accessSecret:
+                        description: AccessSecret is the name of the secret that contains
+                          the service access token.
+                        type: string
+                      url:
+                        description: URL points to a publicly available embedding
+                          service, such as OpenAI.
+                        type: string
+                    required:
+                    - url
+                    type: object
+                type: object
+              indexServiceName:
+                description: |-
+                  IndexServiceName is the name of the service which exposes the endpoint for user to input the index data
+                  to generate embeddings. If not specified, a default service name will be created by the RAG engine.
+                type: string
+              inferencService:
+                properties:
+                  accessSecret:
+                    description: AccessSecret is the name of the secret that contains
+                      the service access token.
+                    type: string
+                  url:
+                    description: URL points to a running inference service endpoint
+                      which accepts http(s) payload.
+                    type: string
+                required:
+                - url
+                type: object
+              queryServiceName:
+                description: |-
+                  QueryServiceName is the name of the service which exposes the endpoint for accepting user queries to the
+                  inference service. If not specified, a default service name will be created by the RAG engine.
+                type: string
+              storage:
+                description: |-
+                  Storage specifies how to access the vector database used to save the embedding vectors.
+                  If this field is not specified, by default, an in-memoty vector DB will be used.
+                  The data will not be persisted.
+                type: object
+            required:
+            - embedding
+            - inferencService
+            type: object
+          status:
+            description: RAGEngineStatus defines the observed state of RAGEngine
+            properties:
+              conditions:
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go
index a94fa81f2..74c39995f 100644
--- a/presets/models/falcon/model.go
+++ b/presets/models/falcon/model.go
@@ -45,8 +45,8 @@ var (
 	}
 
 	baseCommandPresetFalconInference = "accelerate launch"
-	baseCommandPresetFalconTuning = "python3 metrics_server.py & accelerate launch"
-	falconRunParams         = map[string]string{
+	baseCommandPresetFalconTuning    = "python3 metrics_server.py & accelerate launch"
+	falconRunParams                  = map[string]string{
 		"torch_dtype": "bfloat16",
 		"pipeline":    "text-generation",
 	}
diff --git a/presets/models/mistral/model.go b/presets/models/mistral/model.go
index ebab6fbe9..b4581d6f1 100644
--- a/presets/models/mistral/model.go
+++ b/presets/models/mistral/model.go
@@ -32,8 +32,8 @@ var (
 	}
 
 	baseCommandPresetMistralInference = "accelerate launch"
-	baseCommandPresetMistralTuning = "python3 metrics_server.py & accelerate launch"
-	mistralRunParams         = map[string]string{
+	baseCommandPresetMistralTuning    = "python3 metrics_server.py & accelerate launch"
+	mistralRunParams                  = map[string]string{
 		"torch_dtype": "bfloat16",
 		"pipeline":    "text-generation",
 	}
diff --git a/presets/models/phi2/model.go b/presets/models/phi2/model.go
index 731043f11..07fb8e0d2 100644
--- a/presets/models/phi2/model.go
+++ b/presets/models/phi2/model.go
@@ -26,8 +26,8 @@ var (
 	}
 
 	baseCommandPresetPhiInference = "accelerate launch"
-	baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch"
-	phiRunParams         = map[string]string{
+	baseCommandPresetPhiTuning    = "python3 metrics_server.py & accelerate launch"
+	phiRunParams                  = map[string]string{
 		"torch_dtype": "float16",
 		"pipeline":    "text-generation",
 	}
diff --git a/presets/models/phi3/model.go b/presets/models/phi3/model.go
index c645b99e5..5656fc15a 100644
--- a/presets/models/phi3/model.go
+++ b/presets/models/phi3/model.go
@@ -44,8 +44,8 @@ var (
 	}
 
 	baseCommandPresetPhiInference = "accelerate launch"
-	baseCommandPresetPhiTuning = "python3 metrics_server.py & accelerate launch"
-	phiRunParams         = map[string]string{
+	baseCommandPresetPhiTuning    = "python3 metrics_server.py & accelerate launch"
+	phiRunParams                  = map[string]string{
 		"torch_dtype":       "auto",
 		"pipeline":          "text-generation",
 		"trust_remote_code": "",

From 47c1ce6a030ce6d5c95eb5e16c10977fa0af3c40 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 20 Sep 2024 01:44:31 -0500
Subject: [PATCH 02/42] feat: New RAG Service

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/__init__.py               |  0
 presets/rag_service/config.py                 | 10 +++
 presets/rag_service/crud/__init__.py          |  0
 presets/rag_service/crud/operations.py        | 39 +++++++++
 presets/rag_service/embedding/__init__.py     |  0
 presets/rag_service/embedding/base.py         |  7 ++
 .../embedding/huggingface_local.py            | 11 +++
 .../embedding/huggingface_remote.py           | 12 +++
 presets/rag_service/main.py                   | 81 +++++++++++++++++++
 presets/rag_service/models.py                 | 29 +++++++
 presets/rag_service/vector_store/__init__.py  |  0
 presets/rag_service/vector_store/base.py      | 42 ++++++++++
 .../rag_service/vector_store/faiss_store.py   | 61 ++++++++++++++
 13 files changed, 292 insertions(+)
 create mode 100644 presets/rag_service/__init__.py
 create mode 100644 presets/rag_service/config.py
 create mode 100644 presets/rag_service/crud/__init__.py
 create mode 100644 presets/rag_service/crud/operations.py
 create mode 100644 presets/rag_service/embedding/__init__.py
 create mode 100644 presets/rag_service/embedding/base.py
 create mode 100644 presets/rag_service/embedding/huggingface_local.py
 create mode 100644 presets/rag_service/embedding/huggingface_remote.py
 create mode 100644 presets/rag_service/main.py
 create mode 100644 presets/rag_service/models.py
 create mode 100644 presets/rag_service/vector_store/__init__.py
 create mode 100644 presets/rag_service/vector_store/base.py
 create mode 100644 presets/rag_service/vector_store/faiss_store.py

diff --git a/presets/rag_service/__init__.py b/presets/rag_service/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py
new file mode 100644
index 000000000..e5086fed0
--- /dev/null
+++ b/presets/rag_service/config.py
@@ -0,0 +1,10 @@
+# config.py
+import os
+
+EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local")
+EMBEDDING_URL = os.getenv("EMBEDDING_URL")
+MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5")
+VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss")
+INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service")
+ACCESS_SECRET = os.getenv("ACCESS_SECRET")
+PERSIST_DIR = "./storage"
\ No newline at end of file
diff --git a/presets/rag_service/crud/__init__.py b/presets/rag_service/crud/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py
new file mode 100644
index 000000000..9a5003de7
--- /dev/null
+++ b/presets/rag_service/crud/operations.py
@@ -0,0 +1,39 @@
+from typing import Dict, List
+
+from models import Document
+from vector_store.base import BaseVectorStore
+
+
+class RAGOperations:
+    def __init__(self, vector_store: BaseVectorStore):
+        self.vector_store = vector_store
+
+    def create(self, documents: List[Document]) -> List[str]:
+        return self.vector_store.index_documents(documents)
+
+    def read(self, query: str, top_k: int):
+        return self.vector_store.query(query, top_k)
+
+    def update(self, documents: List[Document]) -> Dict[str, List[str]]:
+        updated_docs = []
+        new_docs = []
+        for doc in documents:
+            if doc.doc_id and self.vector_store.document_exists(doc.doc_id):
+                self.vector_store.update_document(doc)
+                updated_docs.append(doc.doc_id)
+            else:
+                self.vector_store.add_document(doc)
+                new_docs.extend(doc.doc_id)
+        return {"updated": updated_docs, "inserted": new_docs}
+
+    def delete(self, doc_id: str):
+        return self.vector_store.delete_document(doc_id)
+
+    def get(self, doc_id: str) -> Document:
+        return self.vector_store.get_document(doc_id)
+
+    def list_all(self) -> Dict[str, Document]:
+        return self.vector_store.list_documents()
+
+    def refresh(self, documents: List[Document]) -> List[bool]:
+        return self.vector_store.refresh_documents(documents)
diff --git a/presets/rag_service/embedding/__init__.py b/presets/rag_service/embedding/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/embedding/base.py b/presets/rag_service/embedding/base.py
new file mode 100644
index 000000000..ba5a8573e
--- /dev/null
+++ b/presets/rag_service/embedding/base.py
@@ -0,0 +1,7 @@
+from abc import ABC, abstractmethod
+
+
+class BaseEmbeddingModel(ABC):
+    @abstractmethod
+    def get_text_embedding(self, text: str):
+        pass
\ No newline at end of file
diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py
new file mode 100644
index 000000000..be380a8d5
--- /dev/null
+++ b/presets/rag_service/embedding/huggingface_local.py
@@ -0,0 +1,11 @@
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+
+from .base import BaseEmbeddingModel
+
+
+class LocalHuggingFaceEmbedding(BaseEmbeddingModel):
+    def __init__(self, model_name: str):
+        self.model = HuggingFaceEmbedding(model_name=model_name)
+
+    def get_text_embedding(self, text: str):
+        return self.model.get_text_embedding(text)
diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py
new file mode 100644
index 000000000..c3314ccb6
--- /dev/null
+++ b/presets/rag_service/embedding/huggingface_remote.py
@@ -0,0 +1,12 @@
+from llama_index.embeddings.huggingface_api import \
+    HuggingFaceInferenceAPIEmbedding
+
+from .base import BaseEmbeddingModel
+
+
+class RemoteHuggingFaceEmbedding(BaseEmbeddingModel):
+    def __init__(self, model_name: str, api_key: str):
+        self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key)
+
+    def get_text_embedding(self, text: str):
+        return self.model.get_text_embedding(text)
diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py
new file mode 100644
index 000000000..953926cdf
--- /dev/null
+++ b/presets/rag_service/main.py
@@ -0,0 +1,81 @@
+from typing import Dict, List
+
+from crud.operations import RAGOperations
+from embedding import get_embedding_model
+from fastapi import FastAPI, HTTPException
+from models import (DocumentResponse, IndexRequest, ListDocumentsResponse,
+                    QueryRequest, RefreshRequest, UpdateRequest)
+from vector_store.faiss_store import FaissVectorStoreManager
+
+from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID
+
+app = FastAPI()
+
+# Initialize embedding model
+embed_model = get_embedding_model(EMBEDDING_TYPE, MODEL_ID, ACCESS_SECRET)
+
+# Initialize vector store
+vector_store = FaissVectorStoreManager(dimension=384, embed_model=embed_model)
+
+# Initialize RAG operations
+rag_ops = RAGOperations(vector_store)
+
+@app.post("/index", response_model=List[str])
+async def index_documents(request: IndexRequest):
+    try:
+        doc_ids = rag_ops.create(request.documents)
+        return doc_ids
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/query")
+async def query_index(request: QueryRequest):
+    try:
+        response = rag_ops.read(request.query, request.top_k)
+        return {"response": str(response)}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.put("/update", response_model=Dict[str, List[str]])
+async def update_documents(request: UpdateRequest):
+    try:
+        result = rag_ops.update(request.documents)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/refresh", response_model=List[bool])
+async def refresh_documents(request: RefreshRequest):
+    try:
+        result = rag_ops.refresh(request.documents)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.delete("/document/{doc_id}")
+async def delete_document(doc_id: str):
+    try:
+        rag_ops.delete(doc_id)
+        return {"message": "Document deleted successfully"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/document/{doc_id}", response_model=DocumentResponse)
+async def get_document(doc_id: str):
+    try:
+        document = rag_ops.get(doc_id)
+        return DocumentResponse(doc_id=doc_id, document=document)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/documents", response_model=ListDocumentsResponse)
+async def list_documents():
+    try:
+        documents = rag_ops.list_all()
+        return ListDocumentsResponse(documents=documents)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/presets/rag_service/models.py b/presets/rag_service/models.py
new file mode 100644
index 000000000..a1d21537b
--- /dev/null
+++ b/presets/rag_service/models.py
@@ -0,0 +1,29 @@
+from typing import Dict, List, Optional
+
+from pydantic import BaseModel
+
+
+class Document(BaseModel):
+    text: str
+    metadata: Optional[dict] = {}
+    doc_id: Optional[str] = None
+
+class IndexRequest(BaseModel):
+    documents: List[Document]
+
+class QueryRequest(BaseModel):
+    query: str
+    top_k: int = 10
+
+class UpdateRequest(BaseModel):
+    documents: List[Document]
+
+class RefreshRequest(BaseModel):
+    documents: List[Document]
+
+class DocumentResponse(BaseModel):
+    doc_id: str
+    document: Document
+
+class ListDocumentsResponse(BaseModel):
+    documents: Dict[str, Document]
\ No newline at end of file
diff --git a/presets/rag_service/vector_store/__init__.py b/presets/rag_service/vector_store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py
new file mode 100644
index 000000000..b448bc213
--- /dev/null
+++ b/presets/rag_service/vector_store/base.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from typing import Dict, List
+
+from models import Document
+
+
+class BaseVectorStore(ABC):
+    @abstractmethod
+    def index_documents(self, documents: List[Document]) -> List[str]:
+        pass
+
+    @abstractmethod
+    def query(self, query: str, top_k: int):
+        pass
+
+    @abstractmethod
+    def add_document(self, document: Document): 
+        pass
+
+    @abstractmethod
+    def delete_document(self, doc_id: str):
+        pass
+
+    @abstractmethod
+    def update_document(self, document: Document) -> str:
+        pass
+
+    @abstractmethod
+    def get_document(self, doc_id: str) -> Document:
+        pass
+
+    @abstractmethod
+    def list_documents(self) -> Dict[str, Document]:
+        pass
+
+    @abstractmethod
+    def document_exists(self, doc_id: str) -> bool:
+        pass
+
+    @abstractmethod
+    def refresh_documents(self, documents: List[Document]) -> List[bool]:
+        pass
\ No newline at end of file
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
new file mode 100644
index 000000000..6024f024d
--- /dev/null
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -0,0 +1,61 @@
+import os
+
+import faiss
+from llama_index.core import Document as LlamaDocument
+from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.vector_stores.faiss import FaissVectorStore
+from models import Document
+
+from config import PERSIST_DIR
+
+from .base import BaseVectorStore
+
+
+class FaissVectorStoreManager(BaseVectorStore):
+    def __init__(self, dimension: int, embed_model):
+        self.dimension = dimension
+        self.embed_model = embed_model
+        self.faiss_index = faiss.IndexFlatL2(self.dimension)
+        self.vector_store = FaissVectorStore(faiss_index=self.faiss_index)
+        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
+        
+        if not os.path.exists(PERSIST_DIR):
+            os.makedirs(PERSIST_DIR)
+
+    def index_documents(self, documents: List[Document]):
+        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
+        index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        return index
+
+    def query(self, query: str, top_k: int):
+        index = self._load_index()
+        query_engine = index.as_query_engine(top_k=top_k)
+        return query_engine.query(query)
+    
+    def add_document(self, document: Document): 
+        index = self._load_index()
+        index.insert(document)
+
+    def delete_document(self, doc_id: str):
+        index = self._load_index()
+        index.delete_ref_doc(doc_id, delete_from_docstore=True)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+
+    def update_document(self, document: Document):
+        index = self._load_index()
+        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
+        index.update_ref_doc(llama_doc)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+
+    def get_document(self, doc_id: str):
+        index = self._load_index()
+        doc = index.docstore.get_document(doc_id)
+        if not doc:
+            raise ValueError(f"Document with ID {doc_id} not found.")
+        return doc
+
+    def _load_index(self):
+        vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR)
+        storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR)
+        return VectorStoreIndex.from_storage(storage_context)

From c8bfa185f1a26a588c24a56419834ecfd6ed9eb1 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 20 Sep 2024 02:06:30 -0500
Subject: [PATCH 03/42] feat: New RAG Service

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/crud/operations.py        | 11 ++++-
 presets/rag_service/vector_store/base.py      |  8 ++++
 .../rag_service/vector_store/faiss_store.py   | 45 ++++++++++++++++---
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py
index 9a5003de7..e5c670e0e 100644
--- a/presets/rag_service/crud/operations.py
+++ b/presets/rag_service/crud/operations.py
@@ -9,12 +9,15 @@ def __init__(self, vector_store: BaseVectorStore):
         self.vector_store = vector_store
 
     def create(self, documents: List[Document]) -> List[str]:
+        """Index new documents."""
         return self.vector_store.index_documents(documents)
 
     def read(self, query: str, top_k: int):
+        """Query the indexed documents."""
         return self.vector_store.query(query, top_k)
 
     def update(self, documents: List[Document]) -> Dict[str, List[str]]:
+        """Update existing documents, or insert new ones if they don’t exist."""
         updated_docs = []
         new_docs = []
         for doc in documents:
@@ -22,18 +25,22 @@ def update(self, documents: List[Document]) -> Dict[str, List[str]]:
                 self.vector_store.update_document(doc)
                 updated_docs.append(doc.doc_id)
             else:
-                self.vector_store.add_document(doc)
-                new_docs.extend(doc.doc_id)
+                self.vector_store.add_document(doc)  # Only inserts new document, no reindex
+                new_docs.append(doc.doc_id)
         return {"updated": updated_docs, "inserted": new_docs}
 
     def delete(self, doc_id: str):
+        """Delete a document by ID."""
         return self.vector_store.delete_document(doc_id)
 
     def get(self, doc_id: str) -> Document:
+        """Retrieve a document by ID."""
         return self.vector_store.get_document(doc_id)
 
     def list_all(self) -> Dict[str, Document]:
+        """List all documents."""
         return self.vector_store.list_documents()
 
     def refresh(self, documents: List[Document]) -> List[bool]:
+        """Dummy method for refresh, if needed."""
         return self.vector_store.refresh_documents(documents)
diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py
index b448bc213..b791bb7e6 100644
--- a/presets/rag_service/vector_store/base.py
+++ b/presets/rag_service/vector_store/base.py
@@ -39,4 +39,12 @@ def document_exists(self, doc_id: str) -> bool:
 
     @abstractmethod
     def refresh_documents(self, documents: List[Document]) -> List[bool]:
+        pass
+
+    @abstractmethod
+    def list_documents(self) -> Dict[str, Document]:
+        pass
+
+    @abstractmethod
+    def document_exists(self, doc_id: str) -> bool:
         pass
\ No newline at end of file
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index 6024f024d..b7c274345 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -1,4 +1,5 @@
 import os
+from typing import Dict, List
 
 import faiss
 from llama_index.core import Document as LlamaDocument
@@ -18,44 +19,76 @@ def __init__(self, dimension: int, embed_model):
         self.faiss_index = faiss.IndexFlatL2(self.dimension)
         self.vector_store = FaissVectorStore(faiss_index=self.faiss_index)
         self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
-        
+
         if not os.path.exists(PERSIST_DIR):
             os.makedirs(PERSIST_DIR)
 
     def index_documents(self, documents: List[Document]):
+        """Indexes new documents."""
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
         index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
-        return index
+        return [doc.doc_id for doc in documents]
+
+    def add_document(self, document: Document):
+        """Inserts a single document into the existing FAISS index."""
+        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
+        index = self._load_index()
+        index.insert(llama_doc)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def query(self, query: str, top_k: int):
+        """Queries the FAISS vector store."""
         index = self._load_index()
         query_engine = index.as_query_engine(top_k=top_k)
         return query_engine.query(query)
-    
-    def add_document(self, document: Document): 
-        index = self._load_index()
-        index.insert(document)
 
     def delete_document(self, doc_id: str):
+        """Deletes a document from the FAISS vector store."""
         index = self._load_index()
         index.delete_ref_doc(doc_id, delete_from_docstore=True)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def update_document(self, document: Document):
+        """Updates an existing document in the FAISS vector store."""
         index = self._load_index()
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
         index.update_ref_doc(llama_doc)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def get_document(self, doc_id: str):
+        """Retrieves a document by its ID."""
         index = self._load_index()
         doc = index.docstore.get_document(doc_id)
         if not doc:
             raise ValueError(f"Document with ID {doc_id} not found.")
         return doc
 
+    def refresh_documents(self, documents: List[Document]) -> List[bool]:
+        """Updates existing documents and inserts new documents in the vector store."""
+        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
+        refresh_results = self.index.refresh_ref_docs(llama_docs)
+        self._persist()
+        # Returns a list of booleans indicating whether each document was successfully refreshed.
+        return refresh_results
+
+    def list_documents(self) -> Dict[str, Document]:
+        """Lists all documents in the vector store."""
+        index = self._load_index()
+        return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
+                for doc_id, doc in index.docstore.docs.items()}
+
+    def document_exists(self, doc_id: str) -> bool:
+        """Checks if a document exists in the vector store."""
+        index = self._load_index()
+        return doc_id in index.docstore.docs
+
     def _load_index(self):
+        """Loads the existing FAISS index from disk."""
         vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR)
         storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR)
         return VectorStoreIndex.from_storage(storage_context)
+
+    def _persist(self):
+        """Saves the existing FAISS index to disk."""
+        self.storage_context.persist(persist_dir=PERSIST_DIR)

From a28a8d5ec0743dd3e466e7481e25facd39437693 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 20 Sep 2024 11:07:24 -0500
Subject: [PATCH 04/42] fix: Use local index object

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .../rag_service/vector_store/faiss_store.py   | 45 ++++++++++++-------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index b7c274345..a75ce00b7 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -19,53 +19,62 @@ def __init__(self, dimension: int, embed_model):
         self.faiss_index = faiss.IndexFlatL2(self.dimension)
         self.vector_store = FaissVectorStore(faiss_index=self.faiss_index)
         self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
+        self.index = None # Use to store the in-memory index
 
         if not os.path.exists(PERSIST_DIR):
             os.makedirs(PERSIST_DIR)
 
     def index_documents(self, documents: List[Document]):
-        """Indexes new documents."""
+        """Recreates the entire FAISS index and vector store with new documents."""
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
+        self._persist()
+        # Return the document IDs that were indexed
         return [doc.doc_id for doc in documents]
 
     def add_document(self, document: Document):
         """Inserts a single document into the existing FAISS index."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        index = self._load_index()
-        index.insert(llama_doc)
+        self.index.insert(llama_doc)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def query(self, query: str, top_k: int):
         """Queries the FAISS vector store."""
-        index = self._load_index()
-        query_engine = index.as_query_engine(top_k=top_k)
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        query_engine = self.index.as_query_engine(top_k=top_k)
         return query_engine.query(query)
 
     def delete_document(self, doc_id: str):
         """Deletes a document from the FAISS vector store."""
-        index = self._load_index()
-        index.delete_ref_doc(doc_id, delete_from_docstore=True)
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        self.index.delete_ref_doc(doc_id, delete_from_docstore=True)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def update_document(self, document: Document):
         """Updates an existing document in the FAISS vector store."""
-        index = self._load_index()
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        index.update_ref_doc(llama_doc)
+        self.index.update_ref_doc(llama_doc)
         self.storage_context.persist(persist_dir=PERSIST_DIR)
 
     def get_document(self, doc_id: str):
         """Retrieves a document by its ID."""
-        index = self._load_index()
-        doc = index.docstore.get_document(doc_id)
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        doc = self.index.docstore.get_document(doc_id)
         if not doc:
             raise ValueError(f"Document with ID {doc_id} not found.")
         return doc
 
     def refresh_documents(self, documents: List[Document]) -> List[bool]:
         """Updates existing documents and inserts new documents in the vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
         refresh_results = self.index.refresh_ref_docs(llama_docs)
         self._persist()
@@ -74,14 +83,16 @@ def refresh_documents(self, documents: List[Document]) -> List[bool]:
 
     def list_documents(self) -> Dict[str, Document]:
         """Lists all documents in the vector store."""
-        index = self._load_index()
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
         return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
-                for doc_id, doc in index.docstore.docs.items()}
+                for doc_id, doc in self.index.docstore.docs.items()}
 
     def document_exists(self, doc_id: str) -> bool:
         """Checks if a document exists in the vector store."""
-        index = self._load_index()
-        return doc_id in index.docstore.docs
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        return doc_id in self.index.docstore.docs
 
     def _load_index(self):
         """Loads the existing FAISS index from disk."""

From 63ef83d8d925286efa978bc62687165436d4811f Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 20 Sep 2024 11:11:04 -0500
Subject: [PATCH 05/42] fix: Load Index

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/vector_store/faiss_store.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index a75ce00b7..ef6e5483d 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -3,7 +3,9 @@
 
 import faiss
 from llama_index.core import Document as LlamaDocument
-from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.core import (StorageContext, VectorStoreIndex,
+                              load_graph_from_storage, load_index_from_storage,
+                              load_indices_from_storage)
 from llama_index.vector_stores.faiss import FaissVectorStore
 from models import Document
 
@@ -97,8 +99,10 @@ def document_exists(self, doc_id: str) -> bool:
     def _load_index(self):
         """Loads the existing FAISS index from disk."""
         vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR)
-        storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=PERSIST_DIR)
-        return VectorStoreIndex.from_storage(storage_context)
+        storage_context = StorageContext.from_defaults(
+            vector_store=vector_store, persist_dir=PERSIST_DIR
+        )
+        return load_index_from_storage(storage_context=storage_context)
 
     def _persist(self):
         """Saves the existing FAISS index to disk."""

From ff03456f422652ec4cf6caabb38c33d17e830db0 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 23 Sep 2024 19:07:36 -0700
Subject: [PATCH 06/42] fix: Add ChromaDB VectorStore

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .../vector_store/chromadb_playground.py       |  62 ++++++++++
 .../vector_store/chromadb_store.py            | 110 ++++++++++++++++++
 2 files changed, 172 insertions(+)
 create mode 100644 presets/rag_service/vector_store/chromadb_playground.py
 create mode 100644 presets/rag_service/vector_store/chromadb_store.py

diff --git a/presets/rag_service/vector_store/chromadb_playground.py b/presets/rag_service/vector_store/chromadb_playground.py
new file mode 100644
index 000000000..31a5af077
--- /dev/null
+++ b/presets/rag_service/vector_store/chromadb_playground.py
@@ -0,0 +1,62 @@
+from llama_index.core import Settings
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+
+remote_llm_api = HuggingFaceInferenceAPI(
+    model_name="HuggingFaceH4/zephyr-7b-alpha"
+)
+
+Settings.llm = remote_llm_api
+
+import logging
+
+import chromadb
+from IPython.display import Markdown, display
+from llama_index.core import (SimpleDirectoryReader, StorageContext,
+                              VectorStoreIndex)
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.vector_stores.chroma import ChromaVectorStore
+
+# Enable DEBUG logging for ChromaDB
+logging.basicConfig(level=logging.DEBUG)
+
+# create ChromaDB client and a new collection
+chroma_client = chromadb.EphemeralClient()
+chroma_collection = chroma_client.create_collection("quickstart")
+
+# define embedding function
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
+
+# load documents from directory
+documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
+
+# set up ChromaVectorStore and load in data
+vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+index = VectorStoreIndex.from_documents(
+    documents, storage_context=storage_context, embed_model=embed_model
+)
+
+# Log collection contents before querying
+logging.debug("Documents in ChromaDB collection before querying:")
+all_documents = chroma_collection.get(include=["documents"])
+logging.debug(all_documents["documents"])
+
+# Query Data
+query_engine = index.as_query_engine()
+response = query_engine.query("What did the author do growing up?")
+display(Markdown(f"{response}"))
+
+# Log collection contents after querying
+logging.debug("Documents in ChromaDB collection after querying:")
+all_documents_after_query = chroma_collection.get(include=["documents"])
+logging.debug(all_documents_after_query["documents"])
+
+# Log embeddings stored in ChromaDB
+logging.debug("Embeddings stored in ChromaDB:")
+all_embeddings = chroma_collection.get(include=["embeddings"])
+logging.debug(all_embeddings["embeddings"])
+
+# Log metadata stored in ChromaDB
+logging.debug("Metadata stored in ChromaDB:")
+all_metadata = chroma_collection.get(include=["metadatas"])
+logging.debug(all_metadata["metadatas"])
diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py
new file mode 100644
index 000000000..acb940747
--- /dev/null
+++ b/presets/rag_service/vector_store/chromadb_store.py
@@ -0,0 +1,110 @@
+import os
+from typing import Dict, List
+
+import chromadb
+from llama_index.core import Document as LlamaDocument
+from llama_index.core import (StorageContext, VectorStoreIndex,
+                              load_index_from_storage)
+from llama_index.vector_stores.chroma import ChromaVectorStore
+from models import Document
+
+from config import PERSIST_DIR
+
+from .base import BaseVectorStore
+
+
+class ChromaDBVectorStoreManager(BaseVectorStore):
+    def __init__(self, embed_model):
+        self.embed_model = embed_model
+        # Initialize ChromaDB client and collection
+        self.chroma_client = chromadb.EphemeralClient()
+        self.collection_name = "quickstart"
+        self.chroma_collection = self.chroma_client.create_collection(self.collection_name)
+        self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
+        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
+        self.index = None  # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace)
+
+        if not os.path.exists(PERSIST_DIR):
+            os.makedirs(PERSIST_DIR)
+
+    def index_documents(self, documents: List[Document]):
+        """Recreates the entire ChromaDB index and vector store with new documents."""
+        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
+        self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
+        self._persist()
+        # Return the document IDs that were indexed
+        return [doc.doc_id for doc in documents]
+
+    def add_document(self, document: Document):
+        """Inserts a single document into the existing ChromaDB index."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
+        self.index.insert(llama_doc)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+
+    def query(self, query: str, top_k: int):
+        """Queries the ChromaDB vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        query_engine = self.index.as_query_engine(top_k=top_k)
+        return query_engine.query(query)
+
+    def delete_document(self, doc_id: str):
+        """Deletes a document from the ChromaDB vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        self.index.delete_ref_doc(doc_id, delete_from_docstore=True)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+
+    def update_document(self, document: Document):
+        """Updates an existing document in the ChromaDB vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
+        self.index.update_ref_doc(llama_doc)
+        self.storage_context.persist(persist_dir=PERSIST_DIR)
+
+    def get_document(self, doc_id: str):
+        """Retrieves a document by its ID from ChromaDB."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        doc = self.index.docstore.get_document(doc_id)
+        if not doc:
+            raise ValueError(f"Document with ID {doc_id} not found.")
+        return doc
+
+    def refresh_documents(self, documents: List[Document]) -> List[bool]:
+        """Updates existing documents and inserts new documents in the vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
+        refresh_results = self.index.refresh_ref_docs(llama_docs)
+        self._persist()
+        # Returns a list of booleans indicating whether each document was successfully refreshed.
+        return refresh_results
+
+    def list_documents(self) -> Dict[str, Document]:
+        """Lists all documents in the ChromaDB vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
+                for doc_id, doc in self.index.docstore.docs.items()}
+
+    def document_exists(self, doc_id: str) -> bool:
+        """Checks if a document exists in the ChromaDB vector store."""
+        if self.index is None:
+            self.index = self._load_index()  # Load if not already in memory
+        return doc_id in self.index.docstore.docs
+
+    def _load_index(self):
+        """Loads the existing ChromaDB index from disk."""
+        vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
+        storage_context = StorageContext.from_defaults(
+            vector_store=vector_store, persist_dir=PERSIST_DIR
+        )
+        return load_index_from_storage(storage_context=storage_context)
+
+    def _persist(self):
+        """Saves the existing ChromaDB index to disk."""
+        self.storage_context.persist(persist_dir=PERSIST_DIR)

From d02391aa735fb0d6606930cc0f8f3ffcb3088a95 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 23 Sep 2024 19:08:30 -0700
Subject: [PATCH 07/42] fix: Add TODOs and comments

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/crud/operations.py          | 2 +-
 presets/rag_service/main.py                     | 2 +-
 presets/rag_service/vector_store/faiss_store.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/presets/rag_service/crud/operations.py b/presets/rag_service/crud/operations.py
index e5c670e0e..5218e4508 100644
--- a/presets/rag_service/crud/operations.py
+++ b/presets/rag_service/crud/operations.py
@@ -42,5 +42,5 @@ def list_all(self) -> Dict[str, Document]:
         return self.vector_store.list_documents()
 
     def refresh(self, documents: List[Document]) -> List[bool]:
-        """Dummy method for refresh, if needed."""
+        """Refresh Documents."""
         return self.vector_store.refresh_documents(documents)
diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py
index 953926cdf..80f6da87f 100644
--- a/presets/rag_service/main.py
+++ b/presets/rag_service/main.py
@@ -29,7 +29,7 @@ async def index_documents(request: IndexRequest):
         raise HTTPException(status_code=500, detail=str(e))
 
 @app.post("/query")
-async def query_index(request: QueryRequest):
+async def query_index(request: QueryRequest): # TODO: Research async/sync what to use (inference is calling)
     try:
         response = rag_ops.read(request.query, request.top_k)
         return {"response": str(response)}
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index ef6e5483d..df44e6c8f 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -16,12 +16,12 @@
 
 class FaissVectorStoreManager(BaseVectorStore):
     def __init__(self, dimension: int, embed_model):
-        self.dimension = dimension
+        self.dimension = dimension # TODO: Automatically needs to configure dim based on embed_model
         self.embed_model = embed_model
         self.faiss_index = faiss.IndexFlatL2(self.dimension)
         self.vector_store = FaissVectorStore(faiss_index=self.faiss_index)
         self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
-        self.index = None # Use to store the in-memory index
+        self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace)
 
         if not os.path.exists(PERSIST_DIR):
             os.makedirs(PERSIST_DIR)

From d82897d8419ed3e9f2fcf9fdafc45867030a7fcf Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 24 Sep 2024 11:44:18 -0700
Subject: [PATCH 08/42] fix: Add function for getting embedding dim

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/embedding/base.py               | 6 ++++++
 presets/rag_service/embedding/huggingface_local.py  | 9 +++++++++
 presets/rag_service/embedding/huggingface_remote.py | 9 +++++++++
 3 files changed, 24 insertions(+)

diff --git a/presets/rag_service/embedding/base.py b/presets/rag_service/embedding/base.py
index ba5a8573e..a1c371937 100644
--- a/presets/rag_service/embedding/base.py
+++ b/presets/rag_service/embedding/base.py
@@ -4,4 +4,10 @@
 class BaseEmbeddingModel(ABC):
     @abstractmethod
     def get_text_embedding(self, text: str):
+        """Returns the text embedding for a given input string."""
+        pass
+    
+    @abstractmethod
+    def get_embedding_dimension(self) -> int:
+        """Returns the embedding dimension for the model."""
         pass
\ No newline at end of file
diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py
index be380a8d5..a18798a2c 100644
--- a/presets/rag_service/embedding/huggingface_local.py
+++ b/presets/rag_service/embedding/huggingface_local.py
@@ -8,4 +8,13 @@ def __init__(self, model_name: str):
         self.model = HuggingFaceEmbedding(model_name=model_name)
 
     def get_text_embedding(self, text: str):
+        """Returns the text embedding for a given input string."""
         return self.model.get_text_embedding(text)
+
+    def get_embedding_dimension(self) -> int:
+        """Infers the embedding dimension by making a local call to get the embedding of a dummy text."""
+        dummy_input = "This is a dummy sentence."
+        embedding = self.get_text_embedding(dummy_input)
+        
+        # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
+        return len(embedding)
\ No newline at end of file
diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py
index c3314ccb6..341a1d03b 100644
--- a/presets/rag_service/embedding/huggingface_remote.py
+++ b/presets/rag_service/embedding/huggingface_remote.py
@@ -9,4 +9,13 @@ def __init__(self, model_name: str, api_key: str):
         self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key)
 
     def get_text_embedding(self, text: str):
+        """Returns the text embedding for a given input string."""
         return self.model.get_text_embedding(text)
+    
+    def get_embedding_dimension(self) -> int:
+        """Infers the embedding dimension by making a remote call to get the embedding of a dummy text."""
+        dummy_input = "This is a dummy sentence."
+        embedding = self.get_text_embedding(dummy_input)
+        
+        # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
+        return len(embedding)

From cd9cbab69800322ce5fe3f917b4e86027b6a04d7 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 24 Sep 2024 11:46:04 -0700
Subject: [PATCH 09/42] fix: Updates faiss store to handle multiple indices and
 dynamically get embedding dim

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/vector_store/base.py      |  22 ++--
 .../vector_store/chromadb_store.py            |   9 +-
 .../rag_service/vector_store/faiss_store.py   | 119 ++++++++++--------
 .../{ => playground}/chromadb_playground.py   |   0
 4 files changed, 84 insertions(+), 66 deletions(-)
 rename presets/rag_service/vector_store/{ => playground}/chromadb_playground.py (100%)

diff --git a/presets/rag_service/vector_store/base.py b/presets/rag_service/vector_store/base.py
index b791bb7e6..d9b92315c 100644
--- a/presets/rag_service/vector_store/base.py
+++ b/presets/rag_service/vector_store/base.py
@@ -6,45 +6,45 @@
 
 class BaseVectorStore(ABC):
     @abstractmethod
-    def index_documents(self, documents: List[Document]) -> List[str]:
+    def index_documents(self, documents: List[Document], index_name: str) -> List[str]:
         pass
 
     @abstractmethod
-    def query(self, query: str, top_k: int):
+    def query(self, query: str, top_k: int, index_name: str):
         pass
 
     @abstractmethod
-    def add_document(self, document: Document): 
+    def add_document(self, document: Document, index_name: str): 
         pass
 
     @abstractmethod
-    def delete_document(self, doc_id: str):
+    def delete_document(self, doc_id: str, index_name: str):
         pass
 
     @abstractmethod
-    def update_document(self, document: Document) -> str:
+    def update_document(self, document: Document, index_name: str) -> str:
         pass
 
     @abstractmethod
-    def get_document(self, doc_id: str) -> Document:
+    def get_document(self, doc_id: str, index_name: str) -> Document:
         pass
 
     @abstractmethod
-    def list_documents(self) -> Dict[str, Document]:
+    def list_documents(self, index_name: str) -> Dict[str, Document]:
         pass
 
     @abstractmethod
-    def document_exists(self, doc_id: str) -> bool:
+    def document_exists(self, doc_id: str, index_name: str) -> bool:
         pass
 
     @abstractmethod
-    def refresh_documents(self, documents: List[Document]) -> List[bool]:
+    def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
         pass
 
     @abstractmethod
-    def list_documents(self) -> Dict[str, Document]:
+    def list_documents(self, index_name: str) -> Dict[str, Document]:
         pass
 
     @abstractmethod
-    def document_exists(self, doc_id: str) -> bool:
+    def document_exists(self, doc_id: str, index_name: str) -> bool:
         pass
\ No newline at end of file
diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py
index acb940747..927318202 100644
--- a/presets/rag_service/vector_store/chromadb_store.py
+++ b/presets/rag_service/vector_store/chromadb_store.py
@@ -22,15 +22,16 @@ def __init__(self, embed_model):
         self.chroma_collection = self.chroma_client.create_collection(self.collection_name)
         self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
         self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
-        self.index = None  # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace)
+        self.indices = {}  # Use to store the in-memory index via namespace (e.g. namespace -> index)
 
         if not os.path.exists(PERSIST_DIR):
             os.makedirs(PERSIST_DIR)
 
-    def index_documents(self, documents: List[Document]):
-        """Recreates the entire ChromaDB index and vector store with new documents."""
+    def index_documents(self, documents: List[Document], index_name: str):
+        """Recreates the entire FAISS index and vector store with new documents."""
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
+        # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above
+        self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) 
         self._persist()
         # Return the document IDs that were indexed
         return [doc.doc_id for doc in documents]
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index df44e6c8f..e33b3904a 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -15,95 +15,112 @@
 
 
 class FaissVectorStoreManager(BaseVectorStore):
-    def __init__(self, dimension: int, embed_model):
-        self.dimension = dimension # TODO: Automatically needs to configure dim based on embed_model
+    def __init__(self, embed_model):
         self.embed_model = embed_model
-        self.faiss_index = faiss.IndexFlatL2(self.dimension)
-        self.vector_store = FaissVectorStore(faiss_index=self.faiss_index)
-        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
-        self.index = None # Use to store the in-memory index # TODO: Multiple indexes via name (e.g. namespace)
+        self.dimension = self.embed_model.get_embedding_dimension()
+        # TODO: Consider allowing user custom indexing method e.g.
+        """
+        # Choose the FAISS index type based on the provided index_method
+        if index_method == 'FlatL2':
+            faiss_index = faiss.IndexFlatL2(self.dimension)  # L2 (Euclidean distance) index
+        elif index_method == 'FlatIP':
+            faiss_index = faiss.IndexFlatIP(self.dimension)  # Inner product (cosine similarity) index
+        elif index_method == 'IVFFlat':
+            quantizer = faiss.IndexFlatL2(self.dimension)  # Quantizer for IVF
+            faiss_index = faiss.IndexIVFFlat(quantizer, self.dimension, 100)  # IVF with flat quantization
+        elif index_method == 'HNSW':
+            faiss_index = faiss.IndexHNSWFlat(self.dimension, 32)  # HNSW index with 32 neighbors
+        else:
+            raise ValueError(f"Unknown index method: {index_method}")
+        """
+        # TODO: We need to test if sharing storage_context is viable/correct or if we should make a new one for each index
+        self.faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes)
+        self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings
+        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) # Used to persist the vector store and its underlying data across sessions
+        self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index)
 
         if not os.path.exists(PERSIST_DIR):
             os.makedirs(PERSIST_DIR)
 
-    def index_documents(self, documents: List[Document]):
+    def index_documents(self, documents: List[Document], index_name: str):
         """Recreates the entire FAISS index and vector store with new documents."""
+        if index_name in self.indices:
+            print(f"Index {index_name} already exists. Overwriting.")
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        self.index = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model)
-        self._persist()
+        # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above
+        self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) 
+        self._persist(index_name)
         # Return the document IDs that were indexed
         return [doc.doc_id for doc in documents]
 
-    def add_document(self, document: Document):
+    def add_document(self, document: Document, index_name: str):
         """Inserts a single document into the existing FAISS index."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.index.insert(llama_doc)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        self.indices[index_name].insert(llama_doc)
+        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
 
-    def query(self, query: str, top_k: int):
+    def query(self, query: str, top_k: int, index_name: str):
         """Queries the FAISS vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        query_engine = self.index.as_query_engine(top_k=top_k)
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        query_engine = self.indices[index_name].as_query_engine(top_k=top_k)
         return query_engine.query(query)
 
-    def delete_document(self, doc_id: str):
+    def delete_document(self, doc_id: str, index_name: str):
         """Deletes a document from the FAISS vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        self.index.delete_ref_doc(doc_id, delete_from_docstore=True)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        self.indices[index_name].delete_ref_doc(doc_id, delete_from_docstore=True)
+        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
 
-    def update_document(self, document: Document):
+    def update_document(self, document: Document, index_name: str):
         """Updates an existing document in the FAISS vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.index.update_ref_doc(llama_doc)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        self.indices[index_name].update_ref_doc(llama_doc)
+        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
 
-    def get_document(self, doc_id: str):
+    def get_document(self, doc_id: str, index_name: str):
         """Retrieves a document by its ID."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        doc = self.index.docstore.get_document(doc_id)
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        doc = self.indices[index_name].docstore.get_document(doc_id)
         if not doc:
             raise ValueError(f"Document with ID {doc_id} not found.")
         return doc
 
-    def refresh_documents(self, documents: List[Document]) -> List[bool]:
+    def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
         """Updates existing documents and inserts new documents in the vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        refresh_results = self.index.refresh_ref_docs(llama_docs)
-        self._persist()
+        refresh_results = self.indices[index_name].refresh_ref_docs(llama_docs)
+        self._persist(index_name)
         # Returns a list of booleans indicating whether each document was successfully refreshed.
         return refresh_results
 
-    def list_documents(self) -> Dict[str, Document]:
+    def list_documents(self, index_name: str) -> Dict[str, Document]:
         """Lists all documents in the vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
         return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
-                for doc_id, doc in self.index.docstore.docs.items()}
+                for doc_id, doc in self.indices[index_name].docstore.docs.items()}
 
-    def document_exists(self, doc_id: str) -> bool:
+    def document_exists(self, doc_id: str, index_name: str) -> bool:
         """Checks if a document exists in the vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        return doc_id in self.index.docstore.docs
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        return doc_id in self.indices[index_name].docstore.docs
 
-    def _load_index(self):
+    def _load_index(self, index_name: str):
         """Loads the existing FAISS index from disk."""
-        vector_store = FaissVectorStore.from_persist_dir(PERSIST_DIR)
+        persist_dir = os.path.join(PERSIST_DIR, index_name)
+        if not os.path.exists(persist_dir):
+            raise ValueError(f"No persisted index found for '{index_name}'")
+        vector_store = FaissVectorStore.from_persist_dir(persist_dir)
         storage_context = StorageContext.from_defaults(
-            vector_store=vector_store, persist_dir=PERSIST_DIR
+            vector_store=vector_store, persist_dir=persist_dir
         )
-        return load_index_from_storage(storage_context=storage_context)
+        self.indices[index_name] = load_index_from_storage(storage_context=storage_context)
+        return self.indices[index_name]
 
-    def _persist(self):
+    def _persist(self, index_name: str):
         """Saves the existing FAISS index to disk."""
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
+        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        storage_context = self.indices[index_name].storage_context
+        storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name))
diff --git a/presets/rag_service/vector_store/chromadb_playground.py b/presets/rag_service/vector_store/playground/chromadb_playground.py
similarity index 100%
rename from presets/rag_service/vector_store/chromadb_playground.py
rename to presets/rag_service/vector_store/playground/chromadb_playground.py

From 33669fcbde2613ace53b05f681e7fa7788977cdc Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 27 Sep 2024 16:06:08 -0700
Subject: [PATCH 10/42] feat: Add requirements

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/requirements.txt | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 presets/rag_service/requirements.txt

diff --git a/presets/rag_service/requirements.txt b/presets/rag_service/requirements.txt
new file mode 100644
index 000000000..bd210b6c8
--- /dev/null
+++ b/presets/rag_service/requirements.txt
@@ -0,0 +1,6 @@
+llama-index
+llama-index-embeddings-huggingface
+fastapi
+faiss-cpu
+llama-index-vector-stores-faiss
+uvicorn

From 7165ccf50dbc6842edd26ccac282406643e66888 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 27 Sep 2024 16:06:43 -0700
Subject: [PATCH 11/42] feat: fix typos, syntax errors and bugs

---
 presets/rag_service/embedding/huggingface_remote.py | 2 +-
 presets/rag_service/main.py                         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py
index 341a1d03b..f45e08c2c 100644
--- a/presets/rag_service/embedding/huggingface_remote.py
+++ b/presets/rag_service/embedding/huggingface_remote.py
@@ -6,7 +6,7 @@
 
 class RemoteHuggingFaceEmbedding(BaseEmbeddingModel):
     def __init__(self, model_name: str, api_key: str):
-        self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, api_key=api_key)
+        self.model = HuggingFaceInferenceAPIEmbedding(model_name=model_name, token=api_key)
 
     def get_text_embedding(self, text: str):
         """Returns the text embedding for a given input string."""
diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py
index 80f6da87f..97fed9151 100644
--- a/presets/rag_service/main.py
+++ b/presets/rag_service/main.py
@@ -1,7 +1,7 @@
 from typing import Dict, List
 
 from crud.operations import RAGOperations
-from embedding import get_embedding_model
+from embedding.huggingface_local import LocalHuggingFaceEmbedding
 from fastapi import FastAPI, HTTPException
 from models import (DocumentResponse, IndexRequest, ListDocumentsResponse,
                     QueryRequest, RefreshRequest, UpdateRequest)
@@ -12,10 +12,10 @@
 app = FastAPI()
 
 # Initialize embedding model
-embed_model = get_embedding_model(EMBEDDING_TYPE, MODEL_ID, ACCESS_SECRET)
+embed_model = LocalHuggingFaceEmbedding(MODEL_ID)
 
 # Initialize vector store
-vector_store = FaissVectorStoreManager(dimension=384, embed_model=embed_model)
+vector_store = FaissVectorStoreManager(embed_model=embed_model)
 
 # Initialize RAG operations
 rag_ops = RAGOperations(vector_store)

From 7f399399d525df7af67b2fb371e3fb2b3063bd8b Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Fri, 27 Sep 2024 17:59:51 -0700
Subject: [PATCH 12/42] fix: Bugs fixed for managing embeddings

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .../embedding/huggingface_local.py            |  1 -
 .../embedding/huggingface_remote.py           |  1 -
 presets/rag_service/tests/__init__.py         |  0
 presets/rag_service/tests/conftest.py         |  5 ++
 presets/rag_service/tests/test_faiss_store.py | 64 +++++++++++++++++++
 .../rag_service/vector_store/faiss_store.py   |  7 +-
 6 files changed, 73 insertions(+), 5 deletions(-)
 create mode 100644 presets/rag_service/tests/__init__.py
 create mode 100644 presets/rag_service/tests/conftest.py
 create mode 100644 presets/rag_service/tests/test_faiss_store.py

diff --git a/presets/rag_service/embedding/huggingface_local.py b/presets/rag_service/embedding/huggingface_local.py
index a18798a2c..cf58c7a3e 100644
--- a/presets/rag_service/embedding/huggingface_local.py
+++ b/presets/rag_service/embedding/huggingface_local.py
@@ -16,5 +16,4 @@ def get_embedding_dimension(self) -> int:
         dummy_input = "This is a dummy sentence."
         embedding = self.get_text_embedding(dummy_input)
         
-        # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
         return len(embedding)
\ No newline at end of file
diff --git a/presets/rag_service/embedding/huggingface_remote.py b/presets/rag_service/embedding/huggingface_remote.py
index f45e08c2c..0f8e79181 100644
--- a/presets/rag_service/embedding/huggingface_remote.py
+++ b/presets/rag_service/embedding/huggingface_remote.py
@@ -17,5 +17,4 @@ def get_embedding_dimension(self) -> int:
         dummy_input = "This is a dummy sentence."
         embedding = self.get_text_embedding(dummy_input)
         
-        # TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
         return len(embedding)
diff --git a/presets/rag_service/tests/__init__.py b/presets/rag_service/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/tests/conftest.py b/presets/rag_service/tests/conftest.py
new file mode 100644
index 000000000..3c7c9c6ab
--- /dev/null
+++ b/presets/rag_service/tests/conftest.py
@@ -0,0 +1,5 @@
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing
+os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model
diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
new file mode 100644
index 000000000..49c33273f
--- /dev/null
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -0,0 +1,64 @@
+import os
+from tempfile import TemporaryDirectory
+from unittest.mock import MagicMock
+
+import pytest
+from vector_store.faiss_store import FaissVectorStoreManager
+from models import Document
+from embedding.huggingface_local import LocalHuggingFaceEmbedding
+from config import MODEL_ID
+
+@pytest.fixture(scope='session')
+def init_embed_manager():
+    return LocalHuggingFaceEmbedding(MODEL_ID)
+
+@pytest.fixture
+def vector_store_manager(init_embed_manager):
+    with TemporaryDirectory() as temp_dir:
+        # Mock the persistence directory
+        os.environ['PERSIST_DIR'] = temp_dir
+        yield FaissVectorStoreManager(init_embed_manager)
+
+
+def test_index_documents(vector_store_manager):
+    documents = [
+        Document(doc_id="1", text="First document", metadata={"type": "text"}),
+        Document(doc_id="2", text="Second document", metadata={"type": "text"})
+    ]
+    
+    doc_ids = vector_store_manager.index_documents(documents, index_name="test_index")
+    
+    assert len(doc_ids) == 2
+    assert doc_ids == ["1", "2"]
+
+
+def test_query_documents(vector_store_manager):
+    # Add documents to index
+    documents = [
+        Document(doc_id="1", text="First document", metadata={"type": "text"}),
+        Document(doc_id="2", text="Second document", metadata={"type": "text"})
+    ]
+    vector_store_manager.index_documents(documents, index_name="test_index")
+
+    # Mock query and results
+    query_result = vector_store_manager.query("First", top_k=1, index_name="test_index")
+    
+    assert query_result is not None
+
+
+def test_add_and_delete_document(vector_store_manager):
+    document = Document(doc_id="3", text="Third document", metadata={"type": "text"})
+    vector_store_manager.index_documents([document], index_name="test_index")
+
+    # Add a document to the existing index
+    new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"})
+    vector_store_manager.add_document(new_document, index_name="test_index")
+
+    # Assert that the document exists
+    assert vector_store_manager.document_exists("4", "test_index")
+
+    # Delete the document
+    vector_store_manager.delete_document("4", "test_index")
+
+    # Assert that the document no longer exists
+    assert not vector_store_manager.document_exists("4", "test_index")
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index e33b3904a..b6c79292b 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -15,9 +15,10 @@
 
 
 class FaissVectorStoreManager(BaseVectorStore):
-    def __init__(self, embed_model):
-        self.embed_model = embed_model
-        self.dimension = self.embed_model.get_embedding_dimension()
+    def __init__(self, embedding_manager):
+        self.embedding_manager = embedding_manager
+        self.embed_model =  self.embedding_manager.model
+        self.dimension = self.embedding_manager.get_embedding_dimension()
         # TODO: Consider allowing user custom indexing method e.g.
         """
         # Choose the FAISS index type based on the provided index_method

From 1e07beb034f2e8453ef16ba03010cc380214c45f Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 30 Sep 2024 21:38:39 -0700
Subject: [PATCH 13/42] feat: Use a global SimpleIndexStore and seperate
 StorageContexts

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/tests/conftest.py         |   1 +
 presets/rag_service/tests/test_faiss_store.py |  26 ++++
 .../rag_service/vector_store/faiss_store.py   | 117 ++++++++++++------
 3 files changed, 105 insertions(+), 39 deletions(-)

diff --git a/presets/rag_service/tests/conftest.py b/presets/rag_service/tests/conftest.py
index 3c7c9c6ab..afb6c4713 100644
--- a/presets/rag_service/tests/conftest.py
+++ b/presets/rag_service/tests/conftest.py
@@ -3,3 +3,4 @@
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing
 os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model
+os.environ["MKL_NUM_THREADS"] = "1"  # Force MKL to use a single thread
diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
index 49c33273f..1cf63bdcf 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -15,6 +15,7 @@ def init_embed_manager():
 @pytest.fixture
 def vector_store_manager(init_embed_manager):
     with TemporaryDirectory() as temp_dir:
+        print(f"Saving Temporary Test Storage at: {temp_dir}")
         # Mock the persistence directory
         os.environ['PERSIST_DIR'] = temp_dir
         yield FaissVectorStoreManager(init_embed_manager)
@@ -31,6 +32,31 @@ def test_index_documents(vector_store_manager):
     assert len(doc_ids) == 2
     assert doc_ids == ["1", "2"]
 
+def test_index_documents_isolation(vector_store_manager):
+    doc_1_id, doc_2_id = "1", "2"
+    documents1 = [
+        Document(doc_id=doc_1_id, text="First document in index1", metadata={"type": "text"}),
+    ]
+    documents2 = [
+        Document(doc_id=doc_2_id, text="First document in index2", metadata={"type": "text"}),
+    ]
+
+    # Index documents in separate indices
+    index_name_1, index_name_2 = "index1", "index2"
+    vector_store_manager.index_documents(documents1, index_name=index_name_1)
+    vector_store_manager.index_documents(documents2, index_name=index_name_2)
+
+    # Ensure documents are correctly persisted and separated by index
+    doc_1 = vector_store_manager.get_document(doc_1_id, index_name=index_name_1)
+    assert doc_1 and doc_1.node_ids # Ensure documents were created
+
+    doc_2 = vector_store_manager.get_document(doc_2_id, index_name=index_name_2)
+    assert doc_2 and doc_2.node_ids # Ensure documents were created
+
+    # Ensure that the documents do not mix between indices
+    assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}"
+    assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}"
+
 
 def test_query_documents(vector_store_manager):
     # Add documents to index
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index b6c79292b..8a89977a9 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -6,7 +6,9 @@
 from llama_index.core import (StorageContext, VectorStoreIndex,
                               load_graph_from_storage, load_index_from_storage,
                               load_indices_from_storage)
+from llama_index.core.storage.index_store import SimpleIndexStore
 from llama_index.vector_stores.faiss import FaissVectorStore
+from llama_index.core.data_structs.data_structs import IndexStruct
 from models import Document
 
 from config import PERSIST_DIR
@@ -34,79 +36,113 @@ def __init__(self, embedding_manager):
         else:
             raise ValueError(f"Unknown index method: {index_method}")
         """
-        # TODO: We need to test if sharing storage_context is viable/correct or if we should make a new one for each index
-        self.faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes)
-        self.vector_store = FaissVectorStore(faiss_index=self.faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings
-        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) # Used to persist the vector store and its underlying data across sessions
-        self.indices = {} # Use to store the in-memory index via namespace (e.g. namespace -> index)
-
-        if not os.path.exists(PERSIST_DIR):
-            os.makedirs(PERSIST_DIR)
+        self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index)
+        self.index_store = SimpleIndexStore() # Use to store global index metadata
 
     def index_documents(self, documents: List[Document], index_name: str):
         """Recreates the entire FAISS index and vector store with new documents."""
-        if index_name in self.indices:
+        if index_name in self.index_map:
+            del self.index_map[index_name]
+            self.index_store.delete_index_struct(self.index_map[index_name])
             print(f"Index {index_name} already exists. Overwriting.")
+
+        faiss_index = faiss.IndexFlatL2(self.dimension) # Specifies FAISS indexing method (https://github.com/facebookresearch/faiss/wiki/Faiss-indexes)
+        vector_store = FaissVectorStore(faiss_index=faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings
+        storage_context = StorageContext.from_defaults(vector_store=vector_store) # Used to persist the vector store and its underlying data across sessions
+
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
         # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above
-        self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) 
-        self._persist(index_name)
+        index = VectorStoreIndex.from_documents(
+            llama_docs,
+            storage_context=storage_context,
+            embed_model=self.embed_model,
+            use_async=True # Indexing Process Performed Async
+        )
+        index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154
+        self.index_map[index_name] = index
+        self.index_store.add_index_struct(index.index_struct)
+        self._persist(index_name) # TODO: Consider just persisting the index as opposed to shared index_store
         # Return the document IDs that were indexed
         return [doc.doc_id for doc in documents]
 
     def add_document(self, document: Document, index_name: str):
         """Inserts a single document into the existing FAISS index."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.indices[index_name].insert(llama_doc)
-        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
+        self.index_map[index_name].insert(llama_doc)
+        self._persist(index_name)
 
     def query(self, query: str, top_k: int, index_name: str):
         """Queries the FAISS vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        query_engine = self.indices[index_name].as_query_engine(top_k=top_k)
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+        query_engine = self.index_map[index_name].as_query_engine(top_k=top_k)
         return query_engine.query(query)
 
     def delete_document(self, doc_id: str, index_name: str):
         """Deletes a document from the FAISS vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        self.indices[index_name].delete_ref_doc(doc_id, delete_from_docstore=True)
-        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+        self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True)
+        self._persist(index_name)
 
     def update_document(self, document: Document, index_name: str):
         """Updates an existing document in the FAISS vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.indices[index_name].update_ref_doc(llama_doc)
-        self.indices[index_name].storage_context.persist(persist_dir=PERSIST_DIR)
+        self.index_map[index_name].update_ref_doc(llama_doc)
+        self._persist(index_name)
 
     def get_document(self, doc_id: str, index_name: str):
-        """Retrieves a document by its ID."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        doc = self.indices[index_name].docstore.get_document(doc_id)
-        if not doc:
-            raise ValueError(f"Document with ID {doc_id} not found.")
-        return doc
+        """Retrieves a document's RefDocInfo by its ID."""
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+
+        # Try to retrieve the RefDocInfo associated with the doc_id
+        ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id)
+
+        if ref_doc_info is None:
+            print(f"Document with ID {doc_id} not found in index '{index_name}'.")
+            return None
+
+        return ref_doc_info
+
+    def get_nodes_by_ref_doc_id(self, doc_id: str, index_name: str):
+        """Retrieve nodes associated with a given document's ref ID."""
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+
+        ref_doc_info = self.get_document(doc_id, index_name)
+        if ref_doc_info is None:
+            return None
+
+        return ref_doc_info.node_ids
 
     def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
         """Updates existing documents and inserts new documents in the vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        refresh_results = self.indices[index_name].refresh_ref_docs(llama_docs)
+        refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs)
         self._persist(index_name)
         # Returns a list of booleans indicating whether each document was successfully refreshed.
         return refresh_results
 
     def list_documents(self, index_name: str) -> Dict[str, Document]:
         """Lists all documents in the vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
-                for doc_id, doc in self.indices[index_name].docstore.docs.items()}
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+        pass
+        # return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id)
+        #         for doc_id, doc in self.index_map[index_name].docstore.docs.items()}
 
     def document_exists(self, doc_id: str, index_name: str) -> bool:
         """Checks if a document exists in the vector store."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        return doc_id in self.indices[index_name].docstore.docs
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+        return doc_id in self.index_map[index_name].ref_doc_info
 
     def _load_index(self, index_name: str):
         """Loads the existing FAISS index from disk."""
@@ -117,11 +153,14 @@ def _load_index(self, index_name: str):
         storage_context = StorageContext.from_defaults(
             vector_store=vector_store, persist_dir=persist_dir
         )
-        self.indices[index_name] = load_index_from_storage(storage_context=storage_context)
-        return self.indices[index_name]
+        self.index_map[index_name] = load_index_from_storage(storage_context=storage_context)
+        return self.index_map[index_name]
 
     def _persist(self, index_name: str):
         """Saves the existing FAISS index to disk."""
-        assert index_name in self.indices, f"No such index: '{index_name}' exists."
-        storage_context = self.indices[index_name].storage_context
+        self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store
+        assert index_name in self.index_map, f"No such index: '{index_name}' exists."
+
+        # Persist each index's storage context separately
+        storage_context = self.index_map[index_name].storage_context
         storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name))

From 746c1564d3a4f134293a9cf8edbb1745ad72a835 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 30 Sep 2024 21:55:07 -0700
Subject: [PATCH 14/42] feat: Add the load and list indexing functions

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .../rag_service/vector_store/faiss_store.py   | 40 ++++++++++++++++---
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index 8a89977a9..a5cc0e9f6 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -7,6 +7,7 @@
                               load_graph_from_storage, load_index_from_storage,
                               load_indices_from_storage)
 from llama_index.core.storage.index_store import SimpleIndexStore
+from llama_index.core.storage.docstore import RefDocInfo
 from llama_index.vector_stores.faiss import FaissVectorStore
 from llama_index.core.data_structs.data_structs import IndexStruct
 from models import Document
@@ -130,13 +131,11 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[
         # Returns a list of booleans indicating whether each document was successfully refreshed.
         return refresh_results
 
-    def list_documents(self, index_name: str) -> Dict[str, Document]:
+    def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]:
         """Lists all documents in the vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
-        pass
-        # return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id)
-        #         for doc_id, doc in self.index_map[index_name].docstore.docs.items()}
+        return self.index_map[index_name].ref_doc_info
 
     def document_exists(self, doc_id: str, index_name: str) -> bool:
         """Checks if a document exists in the vector store."""
@@ -144,16 +143,45 @@ def document_exists(self, doc_id: str, index_name: str) -> bool:
             raise ValueError(f"No such index: '{index_name}' exists.")
         return doc_id in self.index_map[index_name].ref_doc_info
 
+    def _load_index_store(self):
+        """Loads the global SimpleIndexStore from disk."""
+        store_path = os.path.join(PERSIST_DIR, "store.json")
+
+        if not os.path.exists(store_path):
+            raise ValueError("No persisted index store found.")
+
+        # Load the global index store from the persisted JSON
+        self.index_store = SimpleIndexStore.from_persist_path(store_path)
+
     def _load_index(self, index_name: str):
         """Loads the existing FAISS index from disk."""
+        # Load the global index store if it hasn't been loaded yet
+        if not self.index_store or not self.index_store.index_structs():
+            self._load_index_store()
+
+        # Now load the specific index
         persist_dir = os.path.join(PERSIST_DIR, index_name)
+
         if not os.path.exists(persist_dir):
             raise ValueError(f"No persisted index found for '{index_name}'")
+
+        # Load the vector store from the persisted directory
         vector_store = FaissVectorStore.from_persist_dir(persist_dir)
+
+        # Create a new StorageContext using the loaded vector store
         storage_context = StorageContext.from_defaults(
-            vector_store=vector_store, persist_dir=persist_dir
+            vector_store=vector_store,
+            persist_dir=persist_dir  # Ensure it uses the correct directory for persistence
         )
-        self.index_map[index_name] = load_index_from_storage(storage_context=storage_context)
+
+        # Load the VectorStoreIndex using the storage context
+        loaded_index = load_index_from_storage(storage_context=storage_context)
+
+        # Set the index_id for the loaded index to the current index_name
+        loaded_index.set_index_id(index_name)
+
+        # Update the in-memory index map with the loaded index
+        self.index_map[index_name] = loaded_index
         return self.index_map[index_name]
 
     def _persist(self, index_name: str):

From 3a83f26904f6b4478e17a3f0617b10ae798b64d4 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 20:09:11 -0700
Subject: [PATCH 15/42] feat: Remove chromadb from PR

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .../vector_store/chromadb_store.py            | 111 ------------------
 1 file changed, 111 deletions(-)
 delete mode 100644 presets/rag_service/vector_store/chromadb_store.py

diff --git a/presets/rag_service/vector_store/chromadb_store.py b/presets/rag_service/vector_store/chromadb_store.py
deleted file mode 100644
index 927318202..000000000
--- a/presets/rag_service/vector_store/chromadb_store.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import os
-from typing import Dict, List
-
-import chromadb
-from llama_index.core import Document as LlamaDocument
-from llama_index.core import (StorageContext, VectorStoreIndex,
-                              load_index_from_storage)
-from llama_index.vector_stores.chroma import ChromaVectorStore
-from models import Document
-
-from config import PERSIST_DIR
-
-from .base import BaseVectorStore
-
-
-class ChromaDBVectorStoreManager(BaseVectorStore):
-    def __init__(self, embed_model):
-        self.embed_model = embed_model
-        # Initialize ChromaDB client and collection
-        self.chroma_client = chromadb.EphemeralClient()
-        self.collection_name = "quickstart"
-        self.chroma_collection = self.chroma_client.create_collection(self.collection_name)
-        self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
-        self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
-        self.indices = {}  # Use to store the in-memory index via namespace (e.g. namespace -> index)
-
-        if not os.path.exists(PERSIST_DIR):
-            os.makedirs(PERSIST_DIR)
-
-    def index_documents(self, documents: List[Document], index_name: str):
-        """Recreates the entire FAISS index and vector store with new documents."""
-        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above
-        self.indices[index_name] = VectorStoreIndex.from_documents(llama_docs, storage_context=self.storage_context, embed_model=self.embed_model) 
-        self._persist()
-        # Return the document IDs that were indexed
-        return [doc.doc_id for doc in documents]
-
-    def add_document(self, document: Document):
-        """Inserts a single document into the existing ChromaDB index."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.index.insert(llama_doc)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
-
-    def query(self, query: str, top_k: int):
-        """Queries the ChromaDB vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        query_engine = self.index.as_query_engine(top_k=top_k)
-        return query_engine.query(query)
-
-    def delete_document(self, doc_id: str):
-        """Deletes a document from the ChromaDB vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        self.index.delete_ref_doc(doc_id, delete_from_docstore=True)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
-
-    def update_document(self, document: Document):
-        """Updates an existing document in the ChromaDB vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.index.update_ref_doc(llama_doc)
-        self.storage_context.persist(persist_dir=PERSIST_DIR)
-
-    def get_document(self, doc_id: str):
-        """Retrieves a document by its ID from ChromaDB."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        doc = self.index.docstore.get_document(doc_id)
-        if not doc:
-            raise ValueError(f"Document with ID {doc_id} not found.")
-        return doc
-
-    def refresh_documents(self, documents: List[Document]) -> List[bool]:
-        """Updates existing documents and inserts new documents in the vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        refresh_results = self.index.refresh_ref_docs(llama_docs)
-        self._persist()
-        # Returns a list of booleans indicating whether each document was successfully refreshed.
-        return refresh_results
-
-    def list_documents(self) -> Dict[str, Document]:
-        """Lists all documents in the ChromaDB vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        return {doc_id: Document(text=doc.text, metadata=doc.metadata, doc_id=doc_id) 
-                for doc_id, doc in self.index.docstore.docs.items()}
-
-    def document_exists(self, doc_id: str) -> bool:
-        """Checks if a document exists in the ChromaDB vector store."""
-        if self.index is None:
-            self.index = self._load_index()  # Load if not already in memory
-        return doc_id in self.index.docstore.docs
-
-    def _load_index(self):
-        """Loads the existing ChromaDB index from disk."""
-        vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
-        storage_context = StorageContext.from_defaults(
-            vector_store=vector_store, persist_dir=PERSIST_DIR
-        )
-        return load_index_from_storage(storage_context=storage_context)
-
-    def _persist(self):
-        """Saves the existing ChromaDB index to disk."""
-        self.storage_context.persist(persist_dir=PERSIST_DIR)

From cb80f3e04fa64c170363b0feab1f6756091c4525 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 20:09:55 -0700
Subject: [PATCH 16/42] feat: Add CustomLLM Inference

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/inference/__init__.py     |  0
 .../rag_service/inference/custom_inference.py | 43 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 presets/rag_service/inference/__init__.py
 create mode 100644 presets/rag_service/inference/custom_inference.py

diff --git a/presets/rag_service/inference/__init__.py b/presets/rag_service/inference/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/inference/custom_inference.py b/presets/rag_service/inference/custom_inference.py
new file mode 100644
index 000000000..11ed0ad25
--- /dev/null
+++ b/presets/rag_service/inference/custom_inference.py
@@ -0,0 +1,43 @@
+from typing import Any, Optional
+from llama_index.core.llms import CustomLLM, CompletionResponse, LLMMetadata, CompletionResponseGen
+from llama_index.llms.openai import OpenAI
+from llama_index.core.llms.callbacks import llm_completion_callback
+import requests
+from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD
+
+class CustomInference(CustomLLM):
+
+    @llm_completion_callback()
+    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
+        pass
+
+    @llm_completion_callback()
+    def complete(self, prompt: str, **kwargs) -> CompletionResponse:
+        if "openai" in INFERENCE_URL:
+            return self._openai_complete(prompt, **kwargs)
+        else:
+            return self._custom_api_complete(prompt, **kwargs)
+
+    def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+        llm = OpenAI(
+            api_key=INFERENCE_ACCESS_SECRET,
+            **kwargs  # Pass all kwargs directly; kwargs may include model, temperature, max_tokens, etc.
+        )
+        return llm.complete(prompt)
+
+    def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
+        headers = {"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
+        data = {"prompt": prompt, **kwargs}
+
+        response = requests.post(INFERENCE_URL, json=data, headers=headers)
+        response_data = response.json()
+
+        # Dynamically extract the field from the response based on the specified response_field
+        completion_text = response_data.get(RESPONSE_FIELD, "No response field found")
+
+        return CompletionResponse(text=completion_text)
+
+    @property
+    def metadata(self) -> LLMMetadata:
+        """Get LLM metadata."""
+        return LLMMetadata()

From a0d1186ae4c857cec7300daaf15cbe04b9fd9a86 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 20:10:33 -0700
Subject: [PATCH 17/42] fix: Introduce Custom LLM class and top_k query

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/vector_store/faiss_store.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index a5cc0e9f6..f82ece9a1 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -7,10 +7,11 @@
                               load_graph_from_storage, load_index_from_storage,
                               load_indices_from_storage)
 from llama_index.core.storage.index_store import SimpleIndexStore
-from llama_index.core.storage.docstore import RefDocInfo
+from llama_index.core.storage.docstore.types import RefDocInfo
 from llama_index.vector_stores.faiss import FaissVectorStore
-from llama_index.core.data_structs.data_structs import IndexStruct
+
 from models import Document
+from inference.custom_inference import CustomInference
 
 from config import PERSIST_DIR
 
@@ -39,6 +40,7 @@ def __init__(self, embedding_manager):
         """
         self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index)
         self.index_store = SimpleIndexStore() # Use to store global index metadata
+        self.llm = CustomInference()
 
     def index_documents(self, documents: List[Document], index_name: str):
         """Recreates the entire FAISS index and vector store with new documents."""
@@ -78,7 +80,7 @@ def query(self, query: str, top_k: int, index_name: str):
         """Queries the FAISS vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
-        query_engine = self.index_map[index_name].as_query_engine(top_k=top_k)
+        query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k)
         return query_engine.query(query)
 
     def delete_document(self, doc_id: str, index_name: str):

From 4c663877ac5489572c5d80f9734b253fb0aab92c Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 20:44:46 -0700
Subject: [PATCH 18/42] fix: Update tests to handle faiss delete not
 implemented yet

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/config.py                 |  5 +++
 presets/rag_service/tests/test_faiss_store.py | 39 ++++++++++++++-----
 .../rag_service/vector_store/faiss_store.py   | 15 +++++--
 3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py
index e5086fed0..0745084f3 100644
--- a/presets/rag_service/config.py
+++ b/presets/rag_service/config.py
@@ -3,6 +3,11 @@
 
 EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local")
 EMBEDDING_URL = os.getenv("EMBEDDING_URL")
+
+INFERENCE_URL = os.getenv("INFERENCE_URL", "https://api.test.com/v1")
+INFERENCE_ACCESS_SECRET = os.getenv("AccessSecret")
+RESPONSE_FIELD = os.getenv("RESPONSE_FIELD", "result")
+
 MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5")
 VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss")
 INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service")
diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
index 1cf63bdcf..72cb9524d 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -1,12 +1,12 @@
 import os
 from tempfile import TemporaryDirectory
-from unittest.mock import MagicMock
+from unittest.mock import patch
 
 import pytest
 from vector_store.faiss_store import FaissVectorStoreManager
 from models import Document
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
-from config import MODEL_ID
+from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET
 
 @pytest.fixture(scope='session')
 def init_embed_manager():
@@ -57,8 +57,15 @@ def test_index_documents_isolation(vector_store_manager):
     assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}"
     assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}"
 
+@patch('requests.post')
+def test_query_documents(mock_post, vector_store_manager):
+    # Define Mock Response for Custom Inference API
+    mock_response = {
+        "result": "This is the completion from the API"
+    }
+
+    mock_post.return_value.json.return_value = mock_response
 
-def test_query_documents(vector_store_manager):
     # Add documents to index
     documents = [
         Document(doc_id="1", text="First document", metadata={"type": "text"}),
@@ -68,13 +75,19 @@ def test_query_documents(vector_store_manager):
 
     # Mock query and results
     query_result = vector_store_manager.query("First", top_k=1, index_name="test_index")
-    
+
     assert query_result is not None
+    assert query_result.response == "This is the completion from the API"
 
+    mock_post.assert_called_once_with(
+        INFERENCE_URL,
+        json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True},
+        headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
+    )
 
-def test_add_and_delete_document(vector_store_manager):
-    document = Document(doc_id="3", text="Third document", metadata={"type": "text"})
-    vector_store_manager.index_documents([document], index_name="test_index")
+def test_add_and_delete_document(vector_store_manager, capsys):
+    documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="test_index")
 
     # Add a document to the existing index
     new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"})
@@ -83,8 +96,14 @@ def test_add_and_delete_document(vector_store_manager):
     # Assert that the document exists
     assert vector_store_manager.document_exists("4", "test_index")
 
-    # Delete the document
+    # Delete the document - it should handle the NotImplementedError and not raise an exception
     vector_store_manager.delete_document("4", "test_index")
 
-    # Assert that the document no longer exists
-    assert not vector_store_manager.document_exists("4", "test_index")
+    # Capture the printed output (if any)
+    captured = capsys.readouterr()
+
+    # Check if the expected message about NotImplementedError was printed
+    assert "Delete not yet implemented for Faiss index. Skipping document 4." in captured.out
+
+    # Assert that the document still exists (since deletion wasn't implemented)
+    assert vector_store_manager.document_exists("4", "test_index")
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index f82ece9a1..13c925737 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -64,7 +64,7 @@ def index_documents(self, documents: List[Document], index_name: str):
         index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154
         self.index_map[index_name] = index
         self.index_store.add_index_struct(index.index_struct)
-        self._persist(index_name) # TODO: Consider just persisting the index as opposed to shared index_store
+        self._persist(index_name)
         # Return the document IDs that were indexed
         return [doc.doc_id for doc in documents]
 
@@ -87,8 +87,17 @@ def delete_document(self, doc_id: str, index_name: str):
         """Deletes a document from the FAISS vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
-        self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True)
-        self._persist(index_name)
+        if not self.document_exists(doc_id, index_name):
+            print(f"Document with ID {doc_id} not found in index '{index_name}'. Skipping.")
+            return
+        try:
+            self.index_map[index_name].delete_ref_doc(doc_id, delete_from_docstore=True)
+        except NotImplementedError as e:
+            print(f"Delete not yet implemented for Faiss index. Skipping document {doc_id}.")
+        except Exception as e:
+            print(f"Unable to Delete Document from the VectorStoreIndex. Skipping. Error: {e}")
+        finally:
+            self._persist(index_name)
 
     def update_document(self, document: Document, index_name: str):
         """Updates an existing document in the FAISS vector store."""

From 35b51133aff568df173010d2fc5317c9ba95cb5a Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 21:31:43 -0700
Subject: [PATCH 19/42] fix: Update tests to handle refresh documents

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/tests/test_faiss_store.py | 69 +++++++++++++++++++
 .../rag_service/vector_store/faiss_store.py   | 27 ++++++--
 2 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
index 72cb9524d..0963e1a94 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -107,3 +107,72 @@ def test_add_and_delete_document(vector_store_manager, capsys):
 
     # Assert that the document still exists (since deletion wasn't implemented)
     assert vector_store_manager.document_exists("4", "test_index")
+
+
+def test_update_document_not_implemented(vector_store_manager, capsys):
+    """Test that updating a document raises a NotImplementedError and is handled properly."""
+    # Add a document to the index
+    documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="test_index")
+
+    # Attempt to update the existing document
+    updated_document = Document(doc_id="1", text="Updated first document", metadata={"type": "text"})
+    vector_store_manager.update_document(updated_document, index_name="test_index")
+
+    # Capture the printed output (if any)
+    captured = capsys.readouterr()
+
+    # Check if the NotImplementedError message was printed
+    assert "Update is equivalent to deleting the document and then inserting it again." in captured.out
+    assert f"Update not yet implemented for Faiss index. Skipping document {updated_document.doc_id}." in captured.out
+
+    # Ensure the document remains unchanged
+    original_doc = vector_store_manager.get_document("1", index_name="test_index")
+    assert original_doc is not None
+
+
+def test_refresh_unchanged_documents(vector_store_manager, capsys):
+    """Test that refreshing documents does nothing on unchanged documents."""
+    # Add documents to the index
+    documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}),
+                 Document(doc_id="2", text="Second document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="test_index")
+
+    refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index")
+
+    # Capture the printed output (if any)
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert refresh_results == [False, False]
+
+def test_refresh_new_documents(vector_store_manager):
+    """Test that refreshing new documents creates them."""
+    vector_store_manager.index_documents([], index_name="test_index")
+
+    # Add a document to the index
+    documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}),
+                 Document(doc_id="2", text="Second document", metadata={"type": "text"})]
+
+    refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index")
+
+    inserted_documents = vector_store_manager.list_documents(index_name="test_index")
+
+    assert len(inserted_documents) == len(documents)
+    assert inserted_documents.keys() == {"1", "2"}
+    assert refresh_results == [True, True]
+
+def test_refresh_existing_documents(vector_store_manager, capsys):
+    """Test that refreshing existing documents prints error."""
+    original_documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(original_documents, index_name="test_index")
+
+    new_documents = [Document(doc_id="1", text="Updated document", metadata={"type": "text"}),
+                     Document(doc_id="2", text="Second document", metadata={"type": "text"})]
+
+    refresh_results = vector_store_manager.refresh_documents(new_documents, index_name="test_index")
+
+    captured = capsys.readouterr()
+
+    # Check if the NotImplementedError message was printed
+    assert "Refresh not yet fully implemented for index" in captured.out
+    assert not refresh_results
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index 13c925737..9e0922a97 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -104,8 +104,15 @@ def update_document(self, document: Document, index_name: str):
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
-        self.index_map[index_name].update_ref_doc(llama_doc)
-        self._persist(index_name)
+        try:
+            self.index_map[index_name].update_ref_doc(llama_doc)
+        except NotImplementedError as e:
+            print("Update is equivalent to deleting the document and then inserting it again.")
+            print(f"Update not yet fully implemented for index. Skipping document {document.doc_id}. Error: {e}")
+        except Exception as e:
+            print(f"Unable to Update Document in the VectorStoreIndex. Skipping. Error: {e}")
+        finally:
+            self._persist(index_name)
 
     def get_document(self, doc_id: str, index_name: str):
         """Retrieves a document's RefDocInfo by its ID."""
@@ -136,11 +143,19 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[
         """Updates existing documents and inserts new documents in the vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
+
         llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
-        refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs)
-        self._persist(index_name)
-        # Returns a list of booleans indicating whether each document was successfully refreshed.
-        return refresh_results
+        try:
+            refresh_results = self.index_map[index_name].refresh_ref_docs(llama_docs)
+            # Returns a list of booleans indicating whether each document was successfully refreshed.
+            return refresh_results
+        except NotImplementedError as e:
+            print(f"Refresh is equivalent to insertion and update, which is equivalent to deletion and insertion.")
+            print(f"Refresh not yet fully implemented for index '{index_name}'. Error: {e}")
+        except Exception as e:
+            print(f"Unable to Refresh Documents in the VectorStoreIndex for index '{index_name}'. Error: {e}")
+        finally:
+            self._persist(index_name)
 
     def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]:
         """Lists all documents in the vector store."""

From 742485e662d9e85e4fca2a141976d484157b2570 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 1 Oct 2024 21:47:17 -0700
Subject: [PATCH 20/42] fix: Update tests for loading and persisting data

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/main.py                   |  9 ++++--
 presets/rag_service/tests/test_faiss_store.py | 31 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/presets/rag_service/main.py b/presets/rag_service/main.py
index 97fed9151..49c77919e 100644
--- a/presets/rag_service/main.py
+++ b/presets/rag_service/main.py
@@ -2,6 +2,7 @@
 
 from crud.operations import RAGOperations
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
+from embedding.huggingface_remote import RemoteHuggingFaceEmbedding
 from fastapi import FastAPI, HTTPException
 from models import (DocumentResponse, IndexRequest, ListDocumentsResponse,
                     QueryRequest, RefreshRequest, UpdateRequest)
@@ -12,10 +13,14 @@
 app = FastAPI()
 
 # Initialize embedding model
-embed_model = LocalHuggingFaceEmbedding(MODEL_ID)
+if EMBEDDING_TYPE == "local":
+    embedding_manager = LocalHuggingFaceEmbedding(MODEL_ID)
+elif EMBEDDING_TYPE == "remote":
+    embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID)
 
 # Initialize vector store
-vector_store = FaissVectorStoreManager(embed_model=embed_model)
+# TODO: Dynamically set VectorStore from EnvVars (which ultimately comes from CRD StorageSpec)
+vector_store = FaissVectorStoreManager(embedding_manager)
 
 # Initialize RAG operations
 rag_ops = RAGOperations(vector_store)
diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
index 0963e1a94..0196d5966 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -176,3 +176,34 @@ def test_refresh_existing_documents(vector_store_manager, capsys):
     # Check if the NotImplementedError message was printed
     assert "Refresh not yet fully implemented for index" in captured.out
     assert not refresh_results
+
+def test_persist_and_load_index_store(vector_store_manager):
+    """Test that the index store is persisted and loaded correctly."""
+    # Add a document and persist the index
+    documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager._persist(index_name="test_index")
+
+    # Simulate a fresh load of the index store (clearing in-memory state)
+    vector_store_manager.index_store = None  # Clear current in-memory store
+    vector_store_manager._load_index_store()
+
+    # Verify that the store was reloaded and contains the expected index structure
+    assert vector_store_manager.index_store is not None
+    assert len(vector_store_manager.index_store.index_structs()) > 0
+
+# TODO: Prevent default re-indexing from load_index_from_storage
+# def test_persist_and_load_index(vector_store_manager):
+#     """Test that an index is persisted and then loaded correctly."""
+#     # Add a document and persist the index
+#     documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
+#     vector_store_manager.index_documents(documents, index_name="test_index")
+#     vector_store_manager._persist(index_name="test_index")
+#
+#     # Simulate a fresh load of the index (clearing in-memory state)
+#     vector_store_manager.index_map = {}  # Clear current in-memory index map
+#     loaded_index = vector_store_manager._load_index(index_name="test_index")
+#
+#     # Verify that the index was reloaded and contains the expected document
+#     assert loaded_index is not None
+#     assert vector_store_manager.document_exists("1", "test_index")

From 51c70353dd986bf4f34057c12da3f33730fb54e3 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 2 Oct 2024 13:17:14 -0700
Subject: [PATCH 21/42] fix: Update tests for loading index

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 presets/rag_service/tests/test_faiss_store.py | 32 +++++++++++--------
 .../rag_service/vector_store/faiss_store.py   |  5 +--
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/presets/rag_service/tests/test_faiss_store.py b/presets/rag_service/tests/test_faiss_store.py
index 0196d5966..3bace3f01 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/presets/rag_service/tests/test_faiss_store.py
@@ -193,17 +193,21 @@ def test_persist_and_load_index_store(vector_store_manager):
     assert len(vector_store_manager.index_store.index_structs()) > 0
 
 # TODO: Prevent default re-indexing from load_index_from_storage
-# def test_persist_and_load_index(vector_store_manager):
-#     """Test that an index is persisted and then loaded correctly."""
-#     # Add a document and persist the index
-#     documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
-#     vector_store_manager.index_documents(documents, index_name="test_index")
-#     vector_store_manager._persist(index_name="test_index")
-#
-#     # Simulate a fresh load of the index (clearing in-memory state)
-#     vector_store_manager.index_map = {}  # Clear current in-memory index map
-#     loaded_index = vector_store_manager._load_index(index_name="test_index")
-#
-#     # Verify that the index was reloaded and contains the expected document
-#     assert loaded_index is not None
-#     assert vector_store_manager.document_exists("1", "test_index")
+def test_persist_and_load_index(vector_store_manager):
+    """Test that an index is persisted and then loaded correctly."""
+    # Add a document and persist the index
+    documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="test_index")
+
+    documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})]
+    vector_store_manager.index_documents(documents, index_name="another_test_index")
+
+    vector_store_manager._persist(index_name="test_index")
+
+    # Simulate a fresh load of the index (clearing in-memory state)
+    vector_store_manager.index_map = {}  # Clear current in-memory index map
+    loaded_index = vector_store_manager._load_index(index_name="test_index")
+
+    # Verify that the index was reloaded and contains the expected document
+    assert loaded_index is not None
+    assert vector_store_manager.document_exists("1", "test_index")
diff --git a/presets/rag_service/vector_store/faiss_store.py b/presets/rag_service/vector_store/faiss_store.py
index 9e0922a97..4ed314454 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/presets/rag_service/vector_store/faiss_store.py
@@ -197,11 +197,12 @@ def _load_index(self, index_name: str):
         # Create a new StorageContext using the loaded vector store
         storage_context = StorageContext.from_defaults(
             vector_store=vector_store,
-            persist_dir=persist_dir  # Ensure it uses the correct directory for persistence
+            index_store = self.index_store,
+            persist_dir=persist_dir,  # Ensure it uses the correct directory for persistence
         )
 
         # Load the VectorStoreIndex using the storage context
-        loaded_index = load_index_from_storage(storage_context=storage_context)
+        loaded_index = load_index_from_storage(storage_context=storage_context, embed_model=self.embed_model)
 
         # Set the index_id for the loaded index to the current index_name
         loaded_index.set_index_id(index_name)

From 6e7b82731827bc19a357be33bbf90682bdb96802 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 2 Oct 2024 18:57:00 -0700
Subject: [PATCH 22/42] feat: Move to ragengine folder and remove unneeded CRUD
 operations (refresh, update, delete)

---
 presets/rag_service/config.py                 | 15 ------
 .../__init__.py => ragengine/README.md        |  0
 .../crud => ragengine}/__init__.py            |  0
 .../embedding => ragengine/crud}/__init__.py  |  0
 .../crud/operations.py                        | 15 +++---
 .../embedding}/__init__.py                    |  0
 .../embedding/base.py                         |  0
 .../embedding/huggingface_local.py            |  2 +-
 .../embedding/huggingface_remote.py           |  0
 .../tests => ragengine/inference}/__init__.py |  0
 .../inference/custom_inference.py             |  0
 {presets/rag_service => ragengine}/main.py    |  4 +-
 {presets/rag_service => ragengine}/models.py  |  0
 .../requirements.txt                          |  0
 .../tests}/__init__.py                        |  0
 .../tests/conftest.py                         |  0
 .../tests/test_faiss_store.py                 |  1 +
 ragengine/vector_store/__init__.py            |  0
 .../vector_store/base.py                      | 12 +++--
 .../vector_store/faiss_store.py               | 51 ++++++++-----------
 ragengine/vector_store/playground/__init__.py |  0
 .../playground/chromadb_playground.py         |  0
 22 files changed, 42 insertions(+), 58 deletions(-)
 delete mode 100644 presets/rag_service/config.py
 rename presets/rag_service/__init__.py => ragengine/README.md (100%)
 rename {presets/rag_service/crud => ragengine}/__init__.py (100%)
 rename {presets/rag_service/embedding => ragengine/crud}/__init__.py (100%)
 rename {presets/rag_service => ragengine}/crud/operations.py (85%)
 rename {presets/rag_service/inference => ragengine/embedding}/__init__.py (100%)
 rename {presets/rag_service => ragengine}/embedding/base.py (100%)
 rename {presets/rag_service => ragengine}/embedding/huggingface_local.py (93%)
 rename {presets/rag_service => ragengine}/embedding/huggingface_remote.py (100%)
 rename {presets/rag_service/tests => ragengine/inference}/__init__.py (100%)
 rename {presets/rag_service => ragengine}/inference/custom_inference.py (100%)
 rename {presets/rag_service => ragengine}/main.py (99%)
 rename {presets/rag_service => ragengine}/models.py (100%)
 rename {presets/rag_service => ragengine}/requirements.txt (100%)
 rename {presets/rag_service/vector_store => ragengine/tests}/__init__.py (100%)
 rename {presets/rag_service => ragengine}/tests/conftest.py (100%)
 rename {presets/rag_service => ragengine}/tests/test_faiss_store.py (99%)
 create mode 100644 ragengine/vector_store/__init__.py
 rename {presets/rag_service => ragengine}/vector_store/base.py (98%)
 rename {presets/rag_service => ragengine}/vector_store/faiss_store.py (93%)
 create mode 100644 ragengine/vector_store/playground/__init__.py
 rename {presets/rag_service => ragengine}/vector_store/playground/chromadb_playground.py (100%)

diff --git a/presets/rag_service/config.py b/presets/rag_service/config.py
deleted file mode 100644
index 0745084f3..000000000
--- a/presets/rag_service/config.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# config.py
-import os
-
-EMBEDDING_TYPE = os.getenv("EMBEDDING_TYPE", "local")
-EMBEDDING_URL = os.getenv("EMBEDDING_URL")
-
-INFERENCE_URL = os.getenv("INFERENCE_URL", "https://api.test.com/v1")
-INFERENCE_ACCESS_SECRET = os.getenv("AccessSecret")
-RESPONSE_FIELD = os.getenv("RESPONSE_FIELD", "result")
-
-MODEL_ID = os.getenv("MODEL_ID", "BAAI/bge-small-en-v1.5")
-VECTOR_DB_TYPE = os.getenv("VECTOR_DB_TYPE", "faiss")
-INDEX_SERVICE_NAME = os.getenv("INDEX_SERVICE_NAME", "default-index-service")
-ACCESS_SECRET = os.getenv("ACCESS_SECRET")
-PERSIST_DIR = "./storage"
\ No newline at end of file
diff --git a/presets/rag_service/__init__.py b/ragengine/README.md
similarity index 100%
rename from presets/rag_service/__init__.py
rename to ragengine/README.md
diff --git a/presets/rag_service/crud/__init__.py b/ragengine/__init__.py
similarity index 100%
rename from presets/rag_service/crud/__init__.py
rename to ragengine/__init__.py
diff --git a/presets/rag_service/embedding/__init__.py b/ragengine/crud/__init__.py
similarity index 100%
rename from presets/rag_service/embedding/__init__.py
rename to ragengine/crud/__init__.py
diff --git a/presets/rag_service/crud/operations.py b/ragengine/crud/operations.py
similarity index 85%
rename from presets/rag_service/crud/operations.py
rename to ragengine/crud/operations.py
index 5218e4508..de45974dc 100644
--- a/presets/rag_service/crud/operations.py
+++ b/ragengine/crud/operations.py
@@ -4,7 +4,7 @@
 from vector_store.base import BaseVectorStore
 
 
-class RAGOperations:
+class VectorStoreManager:
     def __init__(self, vector_store: BaseVectorStore):
         self.vector_store = vector_store
 
@@ -16,8 +16,9 @@ def read(self, query: str, top_k: int):
         """Query the indexed documents."""
         return self.vector_store.query(query, top_k)
 
+    """
     def update(self, documents: List[Document]) -> Dict[str, List[str]]:
-        """Update existing documents, or insert new ones if they don’t exist."""
+        # Update existing documents, or insert new ones if they don’t exist.
         updated_docs = []
         new_docs = []
         for doc in documents:
@@ -30,8 +31,13 @@ def update(self, documents: List[Document]) -> Dict[str, List[str]]:
         return {"updated": updated_docs, "inserted": new_docs}
 
     def delete(self, doc_id: str):
-        """Delete a document by ID."""
+        # Delete a document by ID.
         return self.vector_store.delete_document(doc_id)
+    
+     def refresh(self, documents: List[Document]) -> List[bool]:
+        # Refresh Documents.
+        return self.vector_store.refresh_documents(documents)
+    """
 
     def get(self, doc_id: str) -> Document:
         """Retrieve a document by ID."""
@@ -41,6 +47,3 @@ def list_all(self) -> Dict[str, Document]:
         """List all documents."""
         return self.vector_store.list_documents()
 
-    def refresh(self, documents: List[Document]) -> List[bool]:
-        """Refresh Documents."""
-        return self.vector_store.refresh_documents(documents)
diff --git a/presets/rag_service/inference/__init__.py b/ragengine/embedding/__init__.py
similarity index 100%
rename from presets/rag_service/inference/__init__.py
rename to ragengine/embedding/__init__.py
diff --git a/presets/rag_service/embedding/base.py b/ragengine/embedding/base.py
similarity index 100%
rename from presets/rag_service/embedding/base.py
rename to ragengine/embedding/base.py
diff --git a/presets/rag_service/embedding/huggingface_local.py b/ragengine/embedding/huggingface_local.py
similarity index 93%
rename from presets/rag_service/embedding/huggingface_local.py
rename to ragengine/embedding/huggingface_local.py
index cf58c7a3e..3dab51e9a 100644
--- a/presets/rag_service/embedding/huggingface_local.py
+++ b/ragengine/embedding/huggingface_local.py
@@ -5,7 +5,7 @@
 
 class LocalHuggingFaceEmbedding(BaseEmbeddingModel):
     def __init__(self, model_name: str):
-        self.model = HuggingFaceEmbedding(model_name=model_name)
+        self.model = HuggingFaceEmbedding(model_name=model_name) # TODO: Ensure/test loads on GPU (when available)
 
     def get_text_embedding(self, text: str):
         """Returns the text embedding for a given input string."""
diff --git a/presets/rag_service/embedding/huggingface_remote.py b/ragengine/embedding/huggingface_remote.py
similarity index 100%
rename from presets/rag_service/embedding/huggingface_remote.py
rename to ragengine/embedding/huggingface_remote.py
diff --git a/presets/rag_service/tests/__init__.py b/ragengine/inference/__init__.py
similarity index 100%
rename from presets/rag_service/tests/__init__.py
rename to ragengine/inference/__init__.py
diff --git a/presets/rag_service/inference/custom_inference.py b/ragengine/inference/custom_inference.py
similarity index 100%
rename from presets/rag_service/inference/custom_inference.py
rename to ragengine/inference/custom_inference.py
diff --git a/presets/rag_service/main.py b/ragengine/main.py
similarity index 99%
rename from presets/rag_service/main.py
rename to ragengine/main.py
index 49c77919e..350aa1a57 100644
--- a/presets/rag_service/main.py
+++ b/ragengine/main.py
@@ -41,6 +41,7 @@ async def query_index(request: QueryRequest): # TODO: Research async/sync what t
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
+"""
 @app.put("/update", response_model=Dict[str, List[str]])
 async def update_documents(request: UpdateRequest):
     try:
@@ -56,7 +57,7 @@ async def refresh_documents(request: RefreshRequest):
         return result
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-
+        
 @app.delete("/document/{doc_id}")
 async def delete_document(doc_id: str):
     try:
@@ -64,6 +65,7 @@ async def delete_document(doc_id: str):
         return {"message": "Document deleted successfully"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+"""
 
 @app.get("/document/{doc_id}", response_model=DocumentResponse)
 async def get_document(doc_id: str):
diff --git a/presets/rag_service/models.py b/ragengine/models.py
similarity index 100%
rename from presets/rag_service/models.py
rename to ragengine/models.py
diff --git a/presets/rag_service/requirements.txt b/ragengine/requirements.txt
similarity index 100%
rename from presets/rag_service/requirements.txt
rename to ragengine/requirements.txt
diff --git a/presets/rag_service/vector_store/__init__.py b/ragengine/tests/__init__.py
similarity index 100%
rename from presets/rag_service/vector_store/__init__.py
rename to ragengine/tests/__init__.py
diff --git a/presets/rag_service/tests/conftest.py b/ragengine/tests/conftest.py
similarity index 100%
rename from presets/rag_service/tests/conftest.py
rename to ragengine/tests/conftest.py
diff --git a/presets/rag_service/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py
similarity index 99%
rename from presets/rag_service/tests/test_faiss_store.py
rename to ragengine/tests/test_faiss_store.py
index 3bace3f01..eeaab1069 100644
--- a/presets/rag_service/tests/test_faiss_store.py
+++ b/ragengine/tests/test_faiss_store.py
@@ -81,6 +81,7 @@ def test_query_documents(mock_post, vector_store_manager):
 
     mock_post.assert_called_once_with(
         INFERENCE_URL,
+        # Auto-Generated by LlamaIndex
         json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True},
         headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
     )
diff --git a/ragengine/vector_store/__init__.py b/ragengine/vector_store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/vector_store/base.py b/ragengine/vector_store/base.py
similarity index 98%
rename from presets/rag_service/vector_store/base.py
rename to ragengine/vector_store/base.py
index d9b92315c..789afe08a 100644
--- a/presets/rag_service/vector_store/base.py
+++ b/ragengine/vector_store/base.py
@@ -17,28 +17,30 @@ def query(self, query: str, top_k: int, index_name: str):
     def add_document(self, document: Document, index_name: str): 
         pass
 
+    """
     @abstractmethod
     def delete_document(self, doc_id: str, index_name: str):
         pass
-
+        
     @abstractmethod
     def update_document(self, document: Document, index_name: str) -> str:
         pass
 
     @abstractmethod
-    def get_document(self, doc_id: str, index_name: str) -> Document:
+    def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
         pass
+    """
 
     @abstractmethod
-    def list_documents(self, index_name: str) -> Dict[str, Document]:
+    def get_document(self, doc_id: str, index_name: str) -> Document:
         pass
 
     @abstractmethod
-    def document_exists(self, doc_id: str, index_name: str) -> bool:
+    def list_documents(self, index_name: str) -> Dict[str, Document]:
         pass
 
     @abstractmethod
-    def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
+    def document_exists(self, doc_id: str, index_name: str) -> bool:
         pass
 
     @abstractmethod
diff --git a/presets/rag_service/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
similarity index 93%
rename from presets/rag_service/vector_store/faiss_store.py
rename to ragengine/vector_store/faiss_store.py
index 4ed314454..63f1f535c 100644
--- a/presets/rag_service/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -23,7 +23,7 @@ def __init__(self, embedding_manager):
         self.embedding_manager = embedding_manager
         self.embed_model =  self.embedding_manager.model
         self.dimension = self.embedding_manager.get_embedding_dimension()
-        # TODO: Consider allowing user custom indexing method e.g.
+        # TODO: Consider allowing user custom indexing method (would require configmap?) e.g.
         """
         # Choose the FAISS index type based on the provided index_method
         if index_method == 'FlatL2':
@@ -38,7 +38,7 @@ def __init__(self, embedding_manager):
         else:
             raise ValueError(f"Unknown index method: {index_method}")
         """
-        self.index_map = {} # Used to store the in-memory index via namespace (e.g. namespace -> index)
+        self.index_map = {} # Used to store the in-memory index via namespace (e.g. index_name -> VectorStoreIndex)
         self.index_store = SimpleIndexStore() # Use to store global index metadata
         self.llm = CustomInference()
 
@@ -83,8 +83,23 @@ def query(self, query: str, top_k: int, index_name: str):
         query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k)
         return query_engine.query(query)
 
+    def get_document(self, doc_id: str, index_name: str):
+        """Retrieves a document's RefDocInfo by its ID."""
+        if index_name not in self.index_map:
+            raise ValueError(f"No such index: '{index_name}' exists.")
+
+        # Try to retrieve the RefDocInfo associated with the doc_id
+        ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id)
+
+        if ref_doc_info is None:
+            print(f"Document with ID {doc_id} not found in index '{index_name}'.")
+            return None
+
+        return ref_doc_info
+
+    """
     def delete_document(self, doc_id: str, index_name: str):
-        """Deletes a document from the FAISS vector store."""
+        # Deletes a document from the FAISS vector store.
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
         if not self.document_exists(doc_id, index_name):
@@ -100,7 +115,7 @@ def delete_document(self, doc_id: str, index_name: str):
             self._persist(index_name)
 
     def update_document(self, document: Document, index_name: str):
-        """Updates an existing document in the FAISS vector store."""
+        # Updates an existing document in the FAISS vector store.
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
         llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
@@ -114,33 +129,8 @@ def update_document(self, document: Document, index_name: str):
         finally:
             self._persist(index_name)
 
-    def get_document(self, doc_id: str, index_name: str):
-        """Retrieves a document's RefDocInfo by its ID."""
-        if index_name not in self.index_map:
-            raise ValueError(f"No such index: '{index_name}' exists.")
-
-        # Try to retrieve the RefDocInfo associated with the doc_id
-        ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id)
-
-        if ref_doc_info is None:
-            print(f"Document with ID {doc_id} not found in index '{index_name}'.")
-            return None
-
-        return ref_doc_info
-
-    def get_nodes_by_ref_doc_id(self, doc_id: str, index_name: str):
-        """Retrieve nodes associated with a given document's ref ID."""
-        if index_name not in self.index_map:
-            raise ValueError(f"No such index: '{index_name}' exists.")
-
-        ref_doc_info = self.get_document(doc_id, index_name)
-        if ref_doc_info is None:
-            return None
-
-        return ref_doc_info.node_ids
-
     def refresh_documents(self, documents: List[Document], index_name: str) -> List[bool]:
-        """Updates existing documents and inserts new documents in the vector store."""
+        # Updates existing documents and inserts new documents in the vector store.
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
 
@@ -156,6 +146,7 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[
             print(f"Unable to Refresh Documents in the VectorStoreIndex for index '{index_name}'. Error: {e}")
         finally:
             self._persist(index_name)
+    """
 
     def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]:
         """Lists all documents in the vector store."""
diff --git a/ragengine/vector_store/playground/__init__.py b/ragengine/vector_store/playground/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/presets/rag_service/vector_store/playground/chromadb_playground.py b/ragengine/vector_store/playground/chromadb_playground.py
similarity index 100%
rename from presets/rag_service/vector_store/playground/chromadb_playground.py
rename to ragengine/vector_store/playground/chromadb_playground.py

From aaaa21b3e695964f97e03212307ae2da83308c8c Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 2 Oct 2024 19:01:13 -0700
Subject: [PATCH 23/42] fix: Update to include rag unit tests

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 .github/workflows/tests.yml |  4 +++-
 Makefile                    | 14 ++++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a2c16bbc8..5015d742f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -45,8 +45,10 @@ jobs:
       - name: Run unit tests & Generate coverage
         run: |
           make unit-test
+          make rag-service-test
+          make tuning-metrics-server-test
 
-      - name: Run inference api unit tests
+      - name: Run inference api e2e tests
         run: |
           make inference-api-e2e
 
diff --git a/Makefile b/Makefile
index 59d09d7e3..025a0562f 100644
--- a/Makefile
+++ b/Makefile
@@ -96,13 +96,23 @@ unit-test: ## Run unit tests.
 	-race -coverprofile=coverage.txt -covermode=atomic
 	go tool cover -func=coverage.txt
 
+.PHONY: rag-service-test
+rag-service-test:
+    pip install -r presets/rag_service/requirements.txt
+    pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests
+
+.PHONY: tuning-metrics-server-test
+tuning-metrics-server-test:
+    pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics
+
 ## --------------------------------------
 ## E2E tests
 ## --------------------------------------
 
-inference-api-e2e: 
+.PHONY: inference-api-e2e
+inference-api-e2e:
 	pip install -r presets/inference/text-generation/requirements.txt
-	pytest -o log_cli=true -o log_cli_level=INFO .
+	pytest -o log_cli=true -o log_cli_level=INFO presets/inference/text-generation/tests
 
 # Ginkgo configurations
 GINKGO_FOCUS ?=

From be9d6ed89376999f35f8145b5348a7e7c2e6f41a Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 3 Oct 2024 18:00:08 -0700
Subject: [PATCH 24/42] fix: Update persisting and loading logic

Signed-off-by: ishaansehgal99 <ishaanforthewin@gmail.com>
---
 ragengine/tests/test_faiss_store.py           | 17 +++++++---
 ragengine/vector_store/faiss_store.py         | 34 ++++++++++++++-----
 .../__init__.py                               |  0
 .../manager.py}                               |  0
 4 files changed, 39 insertions(+), 12 deletions(-)
 rename ragengine/{crud => vector_store_manager}/__init__.py (100%)
 rename ragengine/{crud/operations.py => vector_store_manager/manager.py} (100%)

diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py
index eeaab1069..5d0a2f1fd 100644
--- a/ragengine/tests/test_faiss_store.py
+++ b/ragengine/tests/test_faiss_store.py
@@ -3,7 +3,7 @@
 from unittest.mock import patch
 
 import pytest
-from vector_store.faiss_store import FaissVectorStoreManager
+from vector_store.faiss_store import FaissVectorStoreHandler
 from models import Document
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
 from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET
@@ -18,7 +18,7 @@ def vector_store_manager(init_embed_manager):
         print(f"Saving Temporary Test Storage at: {temp_dir}")
         # Mock the persistence directory
         os.environ['PERSIST_DIR'] = temp_dir
-        yield FaissVectorStoreManager(init_embed_manager)
+        yield FaissVectorStoreHandler(init_embed_manager)
 
 
 def test_index_documents(vector_store_manager):
@@ -203,12 +203,21 @@ def test_persist_and_load_index(vector_store_manager):
     documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})]
     vector_store_manager.index_documents(documents, index_name="another_test_index")
 
-    vector_store_manager._persist(index_name="test_index")
+    vector_store_manager._persist_all()
 
     # Simulate a fresh load of the index (clearing in-memory state)
     vector_store_manager.index_map = {}  # Clear current in-memory index map
-    loaded_index = vector_store_manager._load_index(index_name="test_index")
+    loaded_indices = vector_store_manager._load_indices()
 
     # Verify that the index was reloaded and contains the expected document
+    assert loaded_indices is not None
+    assert vector_store_manager.document_exists("1", "test_index")
+    assert vector_store_manager.document_exists("1", "another_test_index")
+
+    vector_store_manager.index_map = {}  # Clear current in-memory index map
+    loaded_index = vector_store_manager._load_index(index_name="test_index")
+
     assert loaded_index is not None
     assert vector_store_manager.document_exists("1", "test_index")
+    assert not vector_store_manager.document_exists("1", "another_test_index") # Since we didn't load this index
+
diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
index 63f1f535c..13a362218 100644
--- a/ragengine/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -18,7 +18,7 @@
 from .base import BaseVectorStore
 
 
-class FaissVectorStoreManager(BaseVectorStore):
+class FaissVectorStoreHandler(BaseVectorStore):
     def __init__(self, embedding_manager):
         self.embedding_manager = embedding_manager
         self.embed_model =  self.embedding_manager.model
@@ -157,7 +157,8 @@ def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]:
     def document_exists(self, doc_id: str, index_name: str) -> bool:
         """Checks if a document exists in the vector store."""
         if index_name not in self.index_map:
-            raise ValueError(f"No such index: '{index_name}' exists.")
+            print(f"No such index: '{index_name}' exists in vector store.")
+            return False
         return doc_id in self.index_map[index_name].ref_doc_info
 
     def _load_index_store(self):
@@ -170,8 +171,22 @@ def _load_index_store(self):
         # Load the global index store from the persisted JSON
         self.index_store = SimpleIndexStore.from_persist_path(store_path)
 
+    def _load_indices(self):
+        """Loads the existing indices from disk."""
+        # Load the global index store if it hasn't been loaded yet
+        if not self.index_store or not self.index_store.index_structs():
+            self._load_index_store()
+
+        if not os.path.exists(PERSIST_DIR):
+            raise ValueError(f"No persisted index found in '{PERSIST_DIR}'")
+
+        for idx in self.index_store.index_structs():
+            self._load_index(idx.index_id)
+
+        return self.index_map
+
     def _load_index(self, index_name: str):
-        """Loads the existing FAISS index from disk."""
+        """Loads the existing index from disk."""
         # Load the global index store if it hasn't been loaded yet
         if not self.index_store or not self.index_store.index_structs():
             self._load_index_store()
@@ -188,20 +203,21 @@ def _load_index(self, index_name: str):
         # Create a new StorageContext using the loaded vector store
         storage_context = StorageContext.from_defaults(
             vector_store=vector_store,
-            index_store = self.index_store,
             persist_dir=persist_dir,  # Ensure it uses the correct directory for persistence
         )
 
         # Load the VectorStoreIndex using the storage context
         loaded_index = load_index_from_storage(storage_context=storage_context, embed_model=self.embed_model)
 
-        # Set the index_id for the loaded index to the current index_name
-        loaded_index.set_index_id(index_name)
-
         # Update the in-memory index map with the loaded index
         self.index_map[index_name] = loaded_index
         return self.index_map[index_name]
 
+    def _persist_all(self):
+        self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store
+        for idx in self.index_store.index_structs():
+            self._persist(idx.index_id)
+
     def _persist(self, index_name: str):
         """Saves the existing FAISS index to disk."""
         self.index_store.persist(os.path.join(PERSIST_DIR, "store.json")) # Persist global index store
@@ -209,4 +225,6 @@ def _persist(self, index_name: str):
 
         # Persist each index's storage context separately
         storage_context = self.index_map[index_name].storage_context
-        storage_context.persist(persist_dir=os.path.join(PERSIST_DIR, index_name))
+        storage_context.persist(
+            persist_dir=os.path.join(PERSIST_DIR, index_name)
+        )
diff --git a/ragengine/crud/__init__.py b/ragengine/vector_store_manager/__init__.py
similarity index 100%
rename from ragengine/crud/__init__.py
rename to ragengine/vector_store_manager/__init__.py
diff --git a/ragengine/crud/operations.py b/ragengine/vector_store_manager/manager.py
similarity index 100%
rename from ragengine/crud/operations.py
rename to ragengine/vector_store_manager/manager.py

From cf24953696f1eed37fa43a901b7d31ec91fb5c98 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 7 Oct 2024 16:14:37 -0700
Subject: [PATCH 25/42] feat: Custom params for llm

---
 ragengine/inference/custom_inference.py   | 19 +++++++++++++++----
 ragengine/models.py                       |  1 +
 ragengine/tests/test_faiss_store.py       |  6 +++---
 ragengine/vector_store/base.py            |  2 +-
 ragengine/vector_store/faiss_store.py     |  4 +++-
 ragengine/vector_store_manager/manager.py |  4 ++--
 6 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py
index 11ed0ad25..5e49e04b6 100644
--- a/ragengine/inference/custom_inference.py
+++ b/ragengine/inference/custom_inference.py
@@ -6,6 +6,13 @@
 from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD
 
 class CustomInference(CustomLLM):
+    params: dict = {}
+
+    def set_params(self, params: dict) -> None:
+        self.params = params
+
+    def get_param(self, key, default=None):
+        return self.params.get(key, default)
 
     @llm_completion_callback()
     def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
@@ -13,10 +20,14 @@ def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
 
     @llm_completion_callback()
     def complete(self, prompt: str, **kwargs) -> CompletionResponse:
-        if "openai" in INFERENCE_URL:
-            return self._openai_complete(prompt, **kwargs)
-        else:
-            return self._custom_api_complete(prompt, **kwargs)
+        try:
+            if "openai" in INFERENCE_URL:
+                return self._openai_complete(prompt, **kwargs, **self.params)
+            else:
+                return self._custom_api_complete(prompt, **kwargs, **self.params)
+        finally:
+            # Clear params after the completion is done
+            self.params = {}
 
     def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         llm = OpenAI(
diff --git a/ragengine/models.py b/ragengine/models.py
index a1d21537b..69f913712 100644
--- a/ragengine/models.py
+++ b/ragengine/models.py
@@ -14,6 +14,7 @@ class IndexRequest(BaseModel):
 class QueryRequest(BaseModel):
     query: str
     top_k: int = 10
+    params: Optional[Dict] = None  # Accept a dictionary for parameters
 
 class UpdateRequest(BaseModel):
     documents: List[Document]
diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/test_faiss_store.py
index 5d0a2f1fd..650ee03cc 100644
--- a/ragengine/tests/test_faiss_store.py
+++ b/ragengine/tests/test_faiss_store.py
@@ -20,7 +20,6 @@ def vector_store_manager(init_embed_manager):
         os.environ['PERSIST_DIR'] = temp_dir
         yield FaissVectorStoreHandler(init_embed_manager)
 
-
 def test_index_documents(vector_store_manager):
     documents = [
         Document(doc_id="1", text="First document", metadata={"type": "text"}),
@@ -73,8 +72,9 @@ def test_query_documents(mock_post, vector_store_manager):
     ]
     vector_store_manager.index_documents(documents, index_name="test_index")
 
+    params = {"temperature": 0.7}
     # Mock query and results
-    query_result = vector_store_manager.query("First", top_k=1, index_name="test_index")
+    query_result = vector_store_manager.query("First", top_k=1, index_name="test_index", params=params)
 
     assert query_result is not None
     assert query_result.response == "This is the completion from the API"
@@ -82,7 +82,7 @@ def test_query_documents(mock_post, vector_store_manager):
     mock_post.assert_called_once_with(
         INFERENCE_URL,
         # Auto-Generated by LlamaIndex
-        json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True},
+        json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True, 'temperature': 0.7},
         headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
     )
 
diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py
index 789afe08a..74254f2e3 100644
--- a/ragengine/vector_store/base.py
+++ b/ragengine/vector_store/base.py
@@ -10,7 +10,7 @@ def index_documents(self, documents: List[Document], index_name: str) -> List[st
         pass
 
     @abstractmethod
-    def query(self, query: str, top_k: int, index_name: str):
+    def query(self, query: str, top_k: int, index_name: str, params: dict):
         pass
 
     @abstractmethod
diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
index 13a362218..ca9f50f31 100644
--- a/ragengine/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -76,10 +76,12 @@ def add_document(self, document: Document, index_name: str):
         self.index_map[index_name].insert(llama_doc)
         self._persist(index_name)
 
-    def query(self, query: str, top_k: int, index_name: str):
+    def query(self, query: str, top_k: int, index_name: str, params: dict):
         """Queries the FAISS vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
+        self.llm.set_params(params)
+
         query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k)
         return query_engine.query(query)
 
diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py
index de45974dc..d6a17229b 100644
--- a/ragengine/vector_store_manager/manager.py
+++ b/ragengine/vector_store_manager/manager.py
@@ -12,9 +12,9 @@ def create(self, documents: List[Document]) -> List[str]:
         """Index new documents."""
         return self.vector_store.index_documents(documents)
 
-    def read(self, query: str, top_k: int):
+    def read(self, query: str, top_k: int, params: dict):
         """Query the indexed documents."""
-        return self.vector_store.query(query, top_k)
+        return self.vector_store.query(query, top_k, params)
 
     """
     def update(self, documents: List[Document]) -> Dict[str, List[str]]:

From eff5b3778a5bb835d78b6c3610ef07847f5931c0 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 14:55:30 -0700
Subject: [PATCH 26/42] feat: massive update, improvements all across service
 and enhanced unit testing

---
 ragengine/main.py                             |  72 ++++++---
 ragengine/models.py                           |  17 +--
 ragengine/tests/api/__init__.py               |   0
 ragengine/tests/{ => api}/conftest.py         |   2 +-
 ragengine/tests/api/test_main.py              | 140 ++++++++++++++++++
 ragengine/tests/vector_store/__init__.py      |   0
 ragengine/tests/vector_store/conftest.py      |   6 +
 .../{ => vector_store}/test_faiss_store.py    |  78 +++++-----
 ragengine/vector_store/base.py                |  21 +--
 ragengine/vector_store/faiss_store.py         |  38 +++--
 ragengine/vector_store_manager/manager.py     |  18 +--
 11 files changed, 272 insertions(+), 120 deletions(-)
 create mode 100644 ragengine/tests/api/__init__.py
 rename ragengine/tests/{ => api}/conftest.py (94%)
 create mode 100644 ragengine/tests/api/test_main.py
 create mode 100644 ragengine/tests/vector_store/__init__.py
 create mode 100644 ragengine/tests/vector_store/conftest.py
 rename ragengine/tests/{ => vector_store}/test_faiss_store.py (70%)

diff --git a/ragengine/main.py b/ragengine/main.py
index 350aa1a57..bcecfb966 100644
--- a/ragengine/main.py
+++ b/ragengine/main.py
@@ -1,42 +1,52 @@
 from typing import Dict, List
 
-from crud.operations import RAGOperations
+from llama_index.core.schema import TextNode
+
+from vector_store_manager.manager import VectorStoreManager
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
 from embedding.huggingface_remote import RemoteHuggingFaceEmbedding
+from llama_index.core.storage.docstore.types import RefDocInfo
 from fastapi import FastAPI, HTTPException
-from models import (DocumentResponse, IndexRequest, ListDocumentsResponse,
-                    QueryRequest, RefreshRequest, UpdateRequest)
-from vector_store.faiss_store import FaissVectorStoreManager
+from models import (IndexRequest, ListDocumentsResponse,
+                    QueryRequest, Document)
+from vector_store.faiss_store import FaissVectorStoreHandler
 
 from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID
 
 app = FastAPI()
 
 # Initialize embedding model
-if EMBEDDING_TYPE == "local":
+if EMBEDDING_TYPE.lower() == "local":
     embedding_manager = LocalHuggingFaceEmbedding(MODEL_ID)
-elif EMBEDDING_TYPE == "remote":
-    embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID)
+elif EMBEDDING_TYPE.lower() == "remote":
+    embedding_manager = RemoteHuggingFaceEmbedding(MODEL_ID, ACCESS_SECRET)
+else:
+    raise ValueError("Invalid Embedding Type Specified (Must be Local or Remote)")
 
 # Initialize vector store
 # TODO: Dynamically set VectorStore from EnvVars (which ultimately comes from CRD StorageSpec)
-vector_store = FaissVectorStoreManager(embedding_manager)
+vector_store_handler = FaissVectorStoreHandler(embedding_manager)
 
 # Initialize RAG operations
-rag_ops = RAGOperations(vector_store)
+rag_ops = VectorStoreManager(vector_store_handler)
 
-@app.post("/index", response_model=List[str])
-async def index_documents(request: IndexRequest):
+@app.post("/index", response_model=List[Document])
+async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling)
     try:
-        doc_ids = rag_ops.create(request.documents)
-        return doc_ids
+        doc_ids = rag_ops.create(request.index_name, request.documents)
+        documents = [
+            Document(doc_id=doc_id, text=doc.text, metadata=doc.metadata)
+            for doc_id, doc in zip(doc_ids, request.documents)
+        ]
+        return documents
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
-@app.post("/query")
-async def query_index(request: QueryRequest): # TODO: Research async/sync what to use (inference is calling)
+@app.post("/query", response_model=Dict[str, str])
+async def query_index(request: QueryRequest):
     try:
-        response = rag_ops.read(request.query, request.top_k)
+        llm_params = request.llm_params or {} # Default to empty dict if no params provided
+        response = rag_ops.read(request.index_name, request.query, request.top_k, llm_params)
         return {"response": str(response)}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -67,19 +77,33 @@ async def delete_document(doc_id: str):
         raise HTTPException(status_code=500, detail=str(e))
 """
 
-@app.get("/document/{doc_id}", response_model=DocumentResponse)
-async def get_document(doc_id: str):
+@app.get("/document/{index_name}/{doc_id}", response_model=RefDocInfo)
+async def get_document(index_name: str, doc_id: str):
     try:
-        document = rag_ops.get(doc_id)
-        return DocumentResponse(doc_id=doc_id, document=document)
+        document = rag_ops.get(index_name, doc_id)
+        if document:
+            return document
+        else:
+            raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} "
+                                                        f"not found in index '{index_name}'.")
+    except ValueError as ve:
+        raise HTTPException(status_code=404, detail=str(ve))
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
-@app.get("/documents", response_model=ListDocumentsResponse)
-async def list_documents():
+@app.get("/indexed-documents", response_model=ListDocumentsResponse)
+async def list_all_indexed_documents():
     try:
-        documents = rag_ops.list_all()
-        return ListDocumentsResponse(documents=documents)
+        documents = rag_ops.list_all_indexed_documents()
+        serialized_documents = {
+            index_name: {
+                doc_name: {
+                    "text": doc_info.text, "hash": doc_info.hash
+                } for doc_name, doc_info in vector_store_index.docstore.docs.items()
+            }
+            for index_name, vector_store_index in documents.items()
+        }
+        return ListDocumentsResponse(documents=serialized_documents)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
diff --git a/ragengine/models.py b/ragengine/models.py
index 69f913712..c6bc8f723 100644
--- a/ragengine/models.py
+++ b/ragengine/models.py
@@ -2,29 +2,20 @@
 
 from pydantic import BaseModel
 
-
 class Document(BaseModel):
     text: str
     metadata: Optional[dict] = {}
     doc_id: Optional[str] = None
 
 class IndexRequest(BaseModel):
+    index_name: str
     documents: List[Document]
 
 class QueryRequest(BaseModel):
+    index_name: str
     query: str
     top_k: int = 10
-    params: Optional[Dict] = None  # Accept a dictionary for parameters
-
-class UpdateRequest(BaseModel):
-    documents: List[Document]
-
-class RefreshRequest(BaseModel):
-    documents: List[Document]
-
-class DocumentResponse(BaseModel):
-    doc_id: str
-    document: Document
+    llm_params: Optional[Dict] = None  # Accept a dictionary for parameters
 
 class ListDocumentsResponse(BaseModel):
-    documents: Dict[str, Document]
\ No newline at end of file
+    documents:Dict[str, Dict[str, Dict[str, str]]]
diff --git a/ragengine/tests/api/__init__.py b/ragengine/tests/api/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/ragengine/tests/conftest.py b/ragengine/tests/api/conftest.py
similarity index 94%
rename from ragengine/tests/conftest.py
rename to ragengine/tests/api/conftest.py
index afb6c4713..08ad12a74 100644
--- a/ragengine/tests/conftest.py
+++ b/ragengine/tests/api/conftest.py
@@ -1,6 +1,6 @@
 import sys
 import os
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing
 os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model
 os.environ["MKL_NUM_THREADS"] = "1"  # Force MKL to use a single thread
diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py
new file mode 100644
index 000000000..7a5ff3b17
--- /dev/null
+++ b/ragengine/tests/api/test_main.py
@@ -0,0 +1,140 @@
+import os
+from tempfile import TemporaryDirectory
+from unittest.mock import patch
+
+import pytest
+from vector_store.faiss_store import FaissVectorStoreHandler
+from models import Document
+from embedding.huggingface_local import LocalHuggingFaceEmbedding
+from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET
+
+from main import app, rag_ops
+from fastapi.testclient import TestClient
+from unittest.mock import MagicMock
+
+AUTO_GEN_DOC_ID_LEN = 36
+
+client = TestClient(app)
+
+def test_index_documents_success():
+    request_data = {
+        "index_name": "test_index",
+        "documents": [
+            {"text": "This is a test document"},
+            {"text": "Another test document"}
+        ]
+    }
+
+    response = client.post("/index", json=request_data)
+    assert response.status_code == 200
+    doc1, doc2 = response.json()
+    assert (doc1["text"] == "This is a test document")
+    assert len(doc1["doc_id"]) == AUTO_GEN_DOC_ID_LEN
+    assert not doc1["metadata"]
+
+    assert (doc2["text"] == "Another test document")
+    assert len(doc2["doc_id"]) == AUTO_GEN_DOC_ID_LEN
+    assert not doc2["metadata"]
+
+@patch('requests.post')
+def test_query_index_success(mock_post):
+    # Define Mock Response for Custom Inference API
+    mock_response = {
+        "result": "This is the completion from the API"
+    }
+    mock_post.return_value.json.return_value = mock_response
+     # Index
+    request_data = {
+        "index_name": "test_index",
+        "documents": [
+            {"text": "This is a test document"},
+            {"text": "Another test document"}
+        ]
+    }
+
+    response = client.post("/index", json=request_data)
+    assert response.status_code == 200
+
+    # Query
+    request_data = {
+        "index_name": "test_index",
+        "query": "test query",
+        "top_k": 1,
+        "llm_params": {"temperature": 0.7}
+    }
+
+    response = client.post("/query", json=request_data)
+    assert response.status_code == 200
+    assert response.json() == {"response": "This is the completion from the API"}
+    assert mock_post.call_count == 1
+
+def test_query_index_failure():
+    # Prepare request data for querying.
+    request_data = {
+        "index_name": "non_existent_index",  # Use an index name that doesn't exist
+        "query": "test query",
+        "top_k": 1,
+        "llm_params": {"temperature": 0.7}
+    }
+
+    response = client.post("/query", json=request_data)
+    assert response.status_code == 500
+    assert response.json()["detail"] == "No such index: 'non_existent_index' exists."
+
+
+def test_get_document_success():
+    request_data = {
+        "index_name": "test_index",
+        "documents": [
+            # {"doc_id": "doc1", "text": "This is a test document"},
+            {"doc_id": "doc1", "text": "This is a test document"},
+            {"text": "Another test document"}
+        ]
+    }
+
+    index_response = client.post("/index", json=request_data)
+    assert index_response.status_code == 200
+
+    # Call the GET document endpoint.
+    get_response = client.get("/document/test_index/doc1")
+    assert get_response.status_code == 200
+
+    response_json = get_response.json()
+
+    assert response_json.keys() == {"node_ids", 'metadata'}
+    assert response_json['metadata'] == {}
+
+    assert isinstance(response_json["node_ids"], list) and len(response_json["node_ids"]) == 1
+
+
+def test_get_document_failure():
+    # Call the GET document endpoint.
+    response = client.get("/document/test_index/doc1")
+    assert response.status_code == 404
+
+def test_list_all_indexed_documents_success():
+    response = client.get("/indexed-documents")
+    assert response.status_code == 200
+    assert response.json() == {'documents': {}}
+
+    request_data = {
+        "index_name": "test_index",
+        "documents": [
+            {"text": "This is a test document"},
+            {"text": "Another test document"}
+        ]
+    }
+
+    response = client.post("/index", json=request_data)
+    assert response.status_code == 200
+
+    response = client.get("/indexed-documents")
+    assert response.status_code == 200
+    assert "test_index" in response.json()["documents"]
+    response_idx = response.json()["documents"]["test_index"]
+    assert len(response_idx) == 2 # Two Documents Indexed
+    assert ({item["text"] for item in response_idx.values()}
+            == {item["text"] for item in request_data["documents"]})
+
+
+
diff --git a/ragengine/tests/vector_store/__init__.py b/ragengine/tests/vector_store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/ragengine/tests/vector_store/conftest.py b/ragengine/tests/vector_store/conftest.py
new file mode 100644
index 000000000..08ad12a74
--- /dev/null
+++ b/ragengine/tests/vector_store/conftest.py
@@ -0,0 +1,6 @@
+import sys
+import os
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing
+os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model
+os.environ["MKL_NUM_THREADS"] = "1"  # Force MKL to use a single thread
diff --git a/ragengine/tests/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py
similarity index 70%
rename from ragengine/tests/test_faiss_store.py
rename to ragengine/tests/vector_store/test_faiss_store.py
index 650ee03cc..029a864b3 100644
--- a/ragengine/tests/test_faiss_store.py
+++ b/ragengine/tests/vector_store/test_faiss_store.py
@@ -26,7 +26,7 @@ def test_index_documents(vector_store_manager):
         Document(doc_id="2", text="Second document", metadata={"type": "text"})
     ]
     
-    doc_ids = vector_store_manager.index_documents(documents, index_name="test_index")
+    doc_ids = vector_store_manager.index_documents("test_index", documents)
     
     assert len(doc_ids) == 2
     assert doc_ids == ["1", "2"]
@@ -42,19 +42,19 @@ def test_index_documents_isolation(vector_store_manager):
 
     # Index documents in separate indices
     index_name_1, index_name_2 = "index1", "index2"
-    vector_store_manager.index_documents(documents1, index_name=index_name_1)
-    vector_store_manager.index_documents(documents2, index_name=index_name_2)
+    vector_store_manager.index_documents(index_name_1, documents1)
+    vector_store_manager.index_documents(index_name_2, documents2)
 
     # Ensure documents are correctly persisted and separated by index
-    doc_1 = vector_store_manager.get_document(doc_1_id, index_name=index_name_1)
+    doc_1 = vector_store_manager.get_document(index_name_1, doc_1_id)
     assert doc_1 and doc_1.node_ids # Ensure documents were created
 
-    doc_2 = vector_store_manager.get_document(doc_2_id, index_name=index_name_2)
+    doc_2 = vector_store_manager.get_document(index_name_2, doc_2_id)
     assert doc_2 and doc_2.node_ids # Ensure documents were created
 
     # Ensure that the documents do not mix between indices
-    assert vector_store_manager.get_document(doc_1_id, index_name=index_name_2) is None, f"Document {doc_1_id} should not exist in {index_name_2}"
-    assert vector_store_manager.get_document(doc_2_id, index_name=index_name_1) is None, f"Document {doc_2_id} should not exist in {index_name_1}"
+    assert vector_store_manager.get_document(index_name_2, doc_1_id) is None, f"Document {doc_1_id} should not exist in {index_name_2}"
+    assert vector_store_manager.get_document(index_name_1, doc_2_id) is None, f"Document {doc_2_id} should not exist in {index_name_1}"
 
 @patch('requests.post')
 def test_query_documents(mock_post, vector_store_manager):
@@ -70,11 +70,11 @@ def test_query_documents(mock_post, vector_store_manager):
         Document(doc_id="1", text="First document", metadata={"type": "text"}),
         Document(doc_id="2", text="Second document", metadata={"type": "text"})
     ]
-    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
 
     params = {"temperature": 0.7}
     # Mock query and results
-    query_result = vector_store_manager.query("First", top_k=1, index_name="test_index", params=params)
+    query_result = vector_store_manager.query("test_index", "First", top_k=1, params=params)
 
     assert query_result is not None
     assert query_result.response == "This is the completion from the API"
@@ -86,19 +86,21 @@ def test_query_documents(mock_post, vector_store_manager):
         headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
     )
 
+"""
+Commented because Refresh, Update, and Delete functionality are commented
 def test_add_and_delete_document(vector_store_manager, capsys):
     documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
 
     # Add a document to the existing index
     new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"})
-    vector_store_manager.add_document(new_document, index_name="test_index")
+    vector_store_manager.add_document("test_index", new_document)
 
     # Assert that the document exists
-    assert vector_store_manager.document_exists("4", "test_index")
+    assert vector_store_manager.document_exists("test_index", "4")
 
     # Delete the document - it should handle the NotImplementedError and not raise an exception
-    vector_store_manager.delete_document("4", "test_index")
+    vector_store_manager.delete_document("test_index", "4")
 
     # Capture the printed output (if any)
     captured = capsys.readouterr()
@@ -107,18 +109,18 @@ def test_add_and_delete_document(vector_store_manager, capsys):
     assert "Delete not yet implemented for Faiss index. Skipping document 4." in captured.out
 
     # Assert that the document still exists (since deletion wasn't implemented)
-    assert vector_store_manager.document_exists("4", "test_index")
+    assert vector_store_manager.document_exists("test_index", "4")
 
 
 def test_update_document_not_implemented(vector_store_manager, capsys):
-    """Test that updating a document raises a NotImplementedError and is handled properly."""
+    # Test that updating a document raises a NotImplementedError and is handled properly.
     # Add a document to the index
     documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
 
     # Attempt to update the existing document
     updated_document = Document(doc_id="1", text="Updated first document", metadata={"type": "text"})
-    vector_store_manager.update_document(updated_document, index_name="test_index")
+    vector_store_manager.update_document("test_index", updated_document)
 
     # Capture the printed output (if any)
     captured = capsys.readouterr()
@@ -128,18 +130,17 @@ def test_update_document_not_implemented(vector_store_manager, capsys):
     assert f"Update not yet implemented for Faiss index. Skipping document {updated_document.doc_id}." in captured.out
 
     # Ensure the document remains unchanged
-    original_doc = vector_store_manager.get_document("1", index_name="test_index")
+    original_doc = vector_store_manager.get_document("test_index", "1")
     assert original_doc is not None
 
-
 def test_refresh_unchanged_documents(vector_store_manager, capsys):
-    """Test that refreshing documents does nothing on unchanged documents."""
+    # Test that refreshing documents does nothing on unchanged documents.
     # Add documents to the index
     documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}),
                  Document(doc_id="2", text="Second document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
 
-    refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index")
+    refresh_results = vector_store_manager.refresh_documents("test_index", documents)
 
     # Capture the printed output (if any)
     captured = capsys.readouterr()
@@ -147,43 +148,44 @@ def test_refresh_unchanged_documents(vector_store_manager, capsys):
     assert refresh_results == [False, False]
 
 def test_refresh_new_documents(vector_store_manager):
-    """Test that refreshing new documents creates them."""
-    vector_store_manager.index_documents([], index_name="test_index")
+    # Test that refreshing new documents creates them.
+    vector_store_manager.index_documents("test_index", [])
 
     # Add a document to the index
     documents = [Document(doc_id="1", text="First document", metadata={"type": "text"}),
                  Document(doc_id="2", text="Second document", metadata={"type": "text"})]
 
-    refresh_results = vector_store_manager.refresh_documents(documents, index_name="test_index")
+    refresh_results = vector_store_manager.refresh_documents("test_index", documents)
 
-    inserted_documents = vector_store_manager.list_documents(index_name="test_index")
+    inserted_documents = vector_store_manager.list_all_documents("test_index")
 
     assert len(inserted_documents) == len(documents)
     assert inserted_documents.keys() == {"1", "2"}
     assert refresh_results == [True, True]
 
 def test_refresh_existing_documents(vector_store_manager, capsys):
-    """Test that refreshing existing documents prints error."""
+    # Test that refreshing existing documents prints error.
     original_documents = [Document(doc_id="1", text="First document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(original_documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", original_documents)
 
     new_documents = [Document(doc_id="1", text="Updated document", metadata={"type": "text"}),
                      Document(doc_id="2", text="Second document", metadata={"type": "text"})]
 
-    refresh_results = vector_store_manager.refresh_documents(new_documents, index_name="test_index")
+    refresh_results = vector_store_manager.refresh_documents("test_index", new_documents)
 
     captured = capsys.readouterr()
 
     # Check if the NotImplementedError message was printed
     assert "Refresh not yet fully implemented for index" in captured.out
     assert not refresh_results
+"""
 
 def test_persist_and_load_index_store(vector_store_manager):
     """Test that the index store is persisted and loaded correctly."""
     # Add a document and persist the index
     documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="test_index")
-    vector_store_manager._persist(index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
+    vector_store_manager._persist("test_index")
 
     # Simulate a fresh load of the index store (clearing in-memory state)
     vector_store_manager.index_store = None  # Clear current in-memory store
@@ -198,10 +200,10 @@ def test_persist_and_load_index(vector_store_manager):
     """Test that an index is persisted and then loaded correctly."""
     # Add a document and persist the index
     documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="test_index")
+    vector_store_manager.index_documents("test_index", documents)
 
     documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})]
-    vector_store_manager.index_documents(documents, index_name="another_test_index")
+    vector_store_manager.index_documents("another_test_index", documents)
 
     vector_store_manager._persist_all()
 
@@ -211,13 +213,13 @@ def test_persist_and_load_index(vector_store_manager):
 
     # Verify that the index was reloaded and contains the expected document
     assert loaded_indices is not None
-    assert vector_store_manager.document_exists("1", "test_index")
-    assert vector_store_manager.document_exists("1", "another_test_index")
+    assert vector_store_manager.document_exists("test_index", "1")
+    assert vector_store_manager.document_exists("another_test_index", "1")
 
     vector_store_manager.index_map = {}  # Clear current in-memory index map
-    loaded_index = vector_store_manager._load_index(index_name="test_index")
+    loaded_index = vector_store_manager._load_index("test_index")
 
     assert loaded_index is not None
-    assert vector_store_manager.document_exists("1", "test_index")
-    assert not vector_store_manager.document_exists("1", "another_test_index") # Since we didn't load this index
+    assert vector_store_manager.document_exists("test_index", "1")
+    assert not vector_store_manager.document_exists("another_test_index", "1") # Since we didn't load this index
 
diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py
index 74254f2e3..31142d71e 100644
--- a/ragengine/vector_store/base.py
+++ b/ragengine/vector_store/base.py
@@ -2,19 +2,20 @@
 from typing import Dict, List
 
 from models import Document
+from llama_index.core import VectorStoreIndex
 
 
 class BaseVectorStore(ABC):
     @abstractmethod
-    def index_documents(self, documents: List[Document], index_name: str) -> List[str]:
+    def index_documents(self, index_name: str, documents: List[Document]) -> List[str]:
         pass
 
     @abstractmethod
-    def query(self, query: str, top_k: int, index_name: str, params: dict):
+    def query(self, index_name: str, query: str, top_k: int, params: dict):
         pass
 
     @abstractmethod
-    def add_document(self, document: Document, index_name: str): 
+    def add_document(self, index_name: str, document: Document):
         pass
 
     """
@@ -32,21 +33,13 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[
     """
 
     @abstractmethod
-    def get_document(self, doc_id: str, index_name: str) -> Document:
+    def get_document(self, index_name: str, doc_id: str) -> Document:
         pass
 
     @abstractmethod
-    def list_documents(self, index_name: str) -> Dict[str, Document]:
+    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
         pass
 
     @abstractmethod
-    def document_exists(self, doc_id: str, index_name: str) -> bool:
+    def document_exists(self, index_name: str, doc_id: str) -> bool:
         pass
-
-    @abstractmethod
-    def list_documents(self, index_name: str) -> Dict[str, Document]:
-        pass
-
-    @abstractmethod
-    def document_exists(self, doc_id: str, index_name: str) -> bool:
-        pass
\ No newline at end of file
diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
index ca9f50f31..28dae799b 100644
--- a/ragengine/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -3,9 +3,7 @@
 
 import faiss
 from llama_index.core import Document as LlamaDocument
-from llama_index.core import (StorageContext, VectorStoreIndex,
-                              load_graph_from_storage, load_index_from_storage,
-                              load_indices_from_storage)
+from llama_index.core import (StorageContext, VectorStoreIndex, load_index_from_storage)
 from llama_index.core.storage.index_store import SimpleIndexStore
 from llama_index.core.storage.docstore.types import RefDocInfo
 from llama_index.vector_stores.faiss import FaissVectorStore
@@ -42,7 +40,7 @@ def __init__(self, embedding_manager):
         self.index_store = SimpleIndexStore() # Use to store global index metadata
         self.llm = CustomInference()
 
-    def index_documents(self, documents: List[Document], index_name: str):
+    def index_documents(self, index_name: str, documents: List[Document]):
         """Recreates the entire FAISS index and vector store with new documents."""
         if index_name in self.index_map:
             del self.index_map[index_name]
@@ -53,22 +51,27 @@ def index_documents(self, documents: List[Document], index_name: str):
         vector_store = FaissVectorStore(faiss_index=faiss_index) # Specifies in-memory data structure for storing and retrieving document embeddings
         storage_context = StorageContext.from_defaults(vector_store=vector_store) # Used to persist the vector store and its underlying data across sessions
 
-        llama_docs = [LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id) for doc in documents]
+        llama_docs = [
+            LlamaDocument(text=doc.text, metadata=doc.metadata, id_=doc.doc_id)
+            if doc.doc_id is not None
+            else LlamaDocument(text=doc.text, metadata=doc.metadata)
+            for doc in documents
+        ]
         # Creates the actual vector-based index using indexing method, vector store, storage method and embedding model specified above
         index = VectorStoreIndex.from_documents(
             llama_docs,
             storage_context=storage_context,
             embed_model=self.embed_model,
-            use_async=True # Indexing Process Performed Async
+            # use_async=True # TODO: Indexing Process Performed Async
         )
         index.set_index_id(index_name) # https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/indices/base.py#L138-L154
         self.index_map[index_name] = index
         self.index_store.add_index_struct(index.index_struct)
         self._persist(index_name)
         # Return the document IDs that were indexed
-        return [doc.doc_id for doc in documents]
+        return [doc.doc_id for doc in llama_docs]
 
-    def add_document(self, document: Document, index_name: str):
+    def add_document(self, index_name: str, document: Document):
         """Inserts a single document into the existing FAISS index."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
@@ -76,27 +79,22 @@ def add_document(self, document: Document, index_name: str):
         self.index_map[index_name].insert(llama_doc)
         self._persist(index_name)
 
-    def query(self, query: str, top_k: int, index_name: str, params: dict):
+    def query(self, index_name: str, query: str, top_k: int, llm_params: dict):
         """Queries the FAISS vector store."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
-        self.llm.set_params(params)
+        self.llm.set_params(llm_params)
 
         query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k)
         return query_engine.query(query)
 
-    def get_document(self, doc_id: str, index_name: str):
+    def get_document(self, index_name: str, doc_id: str) -> RefDocInfo:
         """Retrieves a document's RefDocInfo by its ID."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
 
         # Try to retrieve the RefDocInfo associated with the doc_id
         ref_doc_info = self.index_map[index_name].ref_doc_info.get(doc_id)
-
-        if ref_doc_info is None:
-            print(f"Document with ID {doc_id} not found in index '{index_name}'.")
-            return None
-
         return ref_doc_info
 
     """
@@ -150,13 +148,11 @@ def refresh_documents(self, documents: List[Document], index_name: str) -> List[
             self._persist(index_name)
     """
 
-    def list_documents(self, index_name: str) -> Dict[str, RefDocInfo]:
+    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
         """Lists all documents in the vector store."""
-        if index_name not in self.index_map:
-            raise ValueError(f"No such index: '{index_name}' exists.")
-        return self.index_map[index_name].ref_doc_info
+        return self.index_map
 
-    def document_exists(self, doc_id: str, index_name: str) -> bool:
+    def document_exists(self, index_name: str, doc_id: str) -> bool:
         """Checks if a document exists in the vector store."""
         if index_name not in self.index_map:
             print(f"No such index: '{index_name}' exists in vector store.")
diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py
index d6a17229b..0e47a8fe7 100644
--- a/ragengine/vector_store_manager/manager.py
+++ b/ragengine/vector_store_manager/manager.py
@@ -3,18 +3,19 @@
 from models import Document
 from vector_store.base import BaseVectorStore
 
+from llama_index.core import VectorStoreIndex
 
 class VectorStoreManager:
     def __init__(self, vector_store: BaseVectorStore):
         self.vector_store = vector_store
 
-    def create(self, documents: List[Document]) -> List[str]:
+    def create(self, index_name: str, documents: List[Document]) -> List[str]:
         """Index new documents."""
-        return self.vector_store.index_documents(documents)
+        return self.vector_store.index_documents(index_name, documents)
 
-    def read(self, query: str, top_k: int, params: dict):
+    def read(self, index_name: str, query: str, top_k: int, llm_params: dict):
         """Query the indexed documents."""
-        return self.vector_store.query(query, top_k, params)
+        return self.vector_store.query(index_name, query, top_k, llm_params)
 
     """
     def update(self, documents: List[Document]) -> Dict[str, List[str]]:
@@ -39,11 +40,10 @@ def refresh(self, documents: List[Document]) -> List[bool]:
         return self.vector_store.refresh_documents(documents)
     """
 
-    def get(self, doc_id: str) -> Document:
+    def get(self, index_name: str, doc_id: str) -> Document:
         """Retrieve a document by ID."""
-        return self.vector_store.get_document(doc_id)
+        return self.vector_store.get_document(index_name, doc_id)
 
-    def list_all(self) -> Dict[str, Document]:
+    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
         """List all documents."""
-        return self.vector_store.list_documents()
-
+        return self.vector_store.list_all_indexed_documents()

From 9f52ee87e0951a56875713d4c202a0e88b792511 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 18:04:51 -0700
Subject: [PATCH 27/42] fix: Slight fix no need to parse inference result

---
 ragengine/inference/custom_inference.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py
index 5e49e04b6..bdfb0a61d 100644
--- a/ragengine/inference/custom_inference.py
+++ b/ragengine/inference/custom_inference.py
@@ -3,7 +3,7 @@
 from llama_index.llms.openai import OpenAI
 from llama_index.core.llms.callbacks import llm_completion_callback
 import requests
-from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET, RESPONSE_FIELD
+from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET #, RESPONSE_FIELD
 
 class CustomInference(CustomLLM):
     params: dict = {}
@@ -44,9 +44,8 @@ def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse
         response_data = response.json()
 
         # Dynamically extract the field from the response based on the specified response_field
-        completion_text = response_data.get(RESPONSE_FIELD, "No response field found")
-
-        return CompletionResponse(text=completion_text)
+        # completion_text = response_data.get(RESPONSE_FIELD, "No response field found") # not necessary for now
+        return CompletionResponse(text=str(response_data))
 
     @property
     def metadata(self) -> LLMMetadata:

From a232d67a7b3b0fdbabc93759c6fdcfc02f69a3a8 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 18:05:48 -0700
Subject: [PATCH 28/42] nit

---
 ragengine/tests/api/test_main.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py
index 7a5ff3b17..0f4e3396e 100644
--- a/ragengine/tests/api/test_main.py
+++ b/ragengine/tests/api/test_main.py
@@ -136,5 +136,3 @@ def test_list_all_indexed_documents_success():
     assert ({item["text"] for item in response_idx.values()}
             == {item["text"] for item in request_data["documents"]})
 
-
-

From afb860661bf4ce10afae64f87d6a8d8e4991b0bc Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 18:13:48 -0700
Subject: [PATCH 29/42] nit

---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index a29eb8776..4b72a9f68 100644
--- a/Makefile
+++ b/Makefile
@@ -99,12 +99,12 @@ unit-test: ## Run unit tests.
 
 .PHONY: rag-service-test
 rag-service-test:
-    pip install -r presets/rag_service/requirements.txt
-    pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests
+	pip install -r presets/rag_service/requirements.txt
+	pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests
 
 .PHONY: tuning-metrics-server-test
 tuning-metrics-server-test:
-    pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics
+	pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics
 
 ## --------------------------------------
 ## E2E tests

From 2455dfd653eaf61469f0b274303cd593d0fcd893 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 18:15:14 -0700
Subject: [PATCH 30/42] fix: remove unused files

---
 ragengine/vector_store/playground/__init__.py |  0
 .../playground/chromadb_playground.py         | 62 -------------------
 2 files changed, 62 deletions(-)
 delete mode 100644 ragengine/vector_store/playground/__init__.py
 delete mode 100644 ragengine/vector_store/playground/chromadb_playground.py

diff --git a/ragengine/vector_store/playground/__init__.py b/ragengine/vector_store/playground/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/ragengine/vector_store/playground/chromadb_playground.py b/ragengine/vector_store/playground/chromadb_playground.py
deleted file mode 100644
index 31a5af077..000000000
--- a/ragengine/vector_store/playground/chromadb_playground.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from llama_index.core import Settings
-from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-
-remote_llm_api = HuggingFaceInferenceAPI(
-    model_name="HuggingFaceH4/zephyr-7b-alpha"
-)
-
-Settings.llm = remote_llm_api
-
-import logging
-
-import chromadb
-from IPython.display import Markdown, display
-from llama_index.core import (SimpleDirectoryReader, StorageContext,
-                              VectorStoreIndex)
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.vector_stores.chroma import ChromaVectorStore
-
-# Enable DEBUG logging for ChromaDB
-logging.basicConfig(level=logging.DEBUG)
-
-# create ChromaDB client and a new collection
-chroma_client = chromadb.EphemeralClient()
-chroma_collection = chroma_client.create_collection("quickstart")
-
-# define embedding function
-embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
-
-# load documents from directory
-documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
-
-# set up ChromaVectorStore and load in data
-vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
-storage_context = StorageContext.from_defaults(vector_store=vector_store)
-index = VectorStoreIndex.from_documents(
-    documents, storage_context=storage_context, embed_model=embed_model
-)
-
-# Log collection contents before querying
-logging.debug("Documents in ChromaDB collection before querying:")
-all_documents = chroma_collection.get(include=["documents"])
-logging.debug(all_documents["documents"])
-
-# Query Data
-query_engine = index.as_query_engine()
-response = query_engine.query("What did the author do growing up?")
-display(Markdown(f"{response}"))
-
-# Log collection contents after querying
-logging.debug("Documents in ChromaDB collection after querying:")
-all_documents_after_query = chroma_collection.get(include=["documents"])
-logging.debug(all_documents_after_query["documents"])
-
-# Log embeddings stored in ChromaDB
-logging.debug("Embeddings stored in ChromaDB:")
-all_embeddings = chroma_collection.get(include=["embeddings"])
-logging.debug(all_embeddings["embeddings"])
-
-# Log metadata stored in ChromaDB
-logging.debug("Metadata stored in ChromaDB:")
-all_metadata = chroma_collection.get(include=["metadatas"])
-logging.debug(all_metadata["metadatas"])

From d32169f00b3c2f295ce6b6c82210a87715fc4900 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 10 Oct 2024 18:22:27 -0700
Subject: [PATCH 31/42] fix: Example of live test

---
 ragengine/tests/api/test_main.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py
index 0f4e3396e..cfa74716c 100644
--- a/ragengine/tests/api/test_main.py
+++ b/ragengine/tests/api/test_main.py
@@ -136,3 +136,35 @@ def test_list_all_indexed_documents_success():
     assert ({item["text"] for item in response_idx.values()}
             == {item["text"] for item in request_data["documents"]})
 
+
+"""
+Example of a live query test. This test is currently commented out as it requires a valid 
+INFERENCE_URL in config.py. To run the test, ensure that a valid INFERENCE_URL is provided. 
+Upon execution, RAG results should be observed.
+
+def test_live_query_test():
+    # Index
+    request_data = {
+        "index_name": "test_index",
+        "documents": [
+            {"text": "Polar bear – can lift 450Kg (approximately 0.7 times their body weight) \
+                Adult male polar bears can grow to be anywhere between 300 and 700kg"},
+            {"text": "Giraffes are the tallest mammals and are well-adapted to living in trees. \
+                They have few predators as adults."}
+        ]
+    }
+
+    response = client.post("/index", json=request_data)
+    assert response.status_code == 200
+
+    # Query
+    request_data = {
+        "index_name": "test_index",
+        "query": "What is the strongest bear?",
+        "top_k": 1,
+        "llm_params": {"temperature": 0.7}
+    }
+
+    response = client.post("/query", json=request_data)
+    assert response.status_code == 200
+"""
\ No newline at end of file

From 42f288b1409f30b410e90f61ed56955b4d978e8b Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 21 Oct 2024 14:14:43 -0700
Subject: [PATCH 32/42] Update endpoints and remove old class

---
 ragengine/inference/custom_inference.py | 53 -------------------------
 ragengine/main.py                       | 40 -------------------
 2 files changed, 93 deletions(-)
 delete mode 100644 ragengine/inference/custom_inference.py

diff --git a/ragengine/inference/custom_inference.py b/ragengine/inference/custom_inference.py
deleted file mode 100644
index bdfb0a61d..000000000
--- a/ragengine/inference/custom_inference.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from typing import Any, Optional
-from llama_index.core.llms import CustomLLM, CompletionResponse, LLMMetadata, CompletionResponseGen
-from llama_index.llms.openai import OpenAI
-from llama_index.core.llms.callbacks import llm_completion_callback
-import requests
-from config import INFERENCE_URL, INFERENCE_ACCESS_SECRET #, RESPONSE_FIELD
-
-class CustomInference(CustomLLM):
-    params: dict = {}
-
-    def set_params(self, params: dict) -> None:
-        self.params = params
-
-    def get_param(self, key, default=None):
-        return self.params.get(key, default)
-
-    @llm_completion_callback()
-    def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
-        pass
-
-    @llm_completion_callback()
-    def complete(self, prompt: str, **kwargs) -> CompletionResponse:
-        try:
-            if "openai" in INFERENCE_URL:
-                return self._openai_complete(prompt, **kwargs, **self.params)
-            else:
-                return self._custom_api_complete(prompt, **kwargs, **self.params)
-        finally:
-            # Clear params after the completion is done
-            self.params = {}
-
-    def _openai_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
-        llm = OpenAI(
-            api_key=INFERENCE_ACCESS_SECRET,
-            **kwargs  # Pass all kwargs directly; kwargs may include model, temperature, max_tokens, etc.
-        )
-        return llm.complete(prompt)
-
-    def _custom_api_complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
-        headers = {"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
-        data = {"prompt": prompt, **kwargs}
-
-        response = requests.post(INFERENCE_URL, json=data, headers=headers)
-        response_data = response.json()
-
-        # Dynamically extract the field from the response based on the specified response_field
-        # completion_text = response_data.get(RESPONSE_FIELD, "No response field found") # not necessary for now
-        return CompletionResponse(text=str(response_data))
-
-    @property
-    def metadata(self) -> LLMMetadata:
-        """Get LLM metadata."""
-        return LLMMetadata()
diff --git a/ragengine/main.py b/ragengine/main.py
index bcecfb966..3ca9cc6de 100644
--- a/ragengine/main.py
+++ b/ragengine/main.py
@@ -51,46 +51,6 @@ async def query_index(request: QueryRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
-"""
-@app.put("/update", response_model=Dict[str, List[str]])
-async def update_documents(request: UpdateRequest):
-    try:
-        result = rag_ops.update(request.documents)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-@app.post("/refresh", response_model=List[bool])
-async def refresh_documents(request: RefreshRequest):
-    try:
-        result = rag_ops.refresh(request.documents)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-        
-@app.delete("/document/{doc_id}")
-async def delete_document(doc_id: str):
-    try:
-        rag_ops.delete(doc_id)
-        return {"message": "Document deleted successfully"}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-"""
-
-@app.get("/document/{index_name}/{doc_id}", response_model=RefDocInfo)
-async def get_document(index_name: str, doc_id: str):
-    try:
-        document = rag_ops.get(index_name, doc_id)
-        if document:
-            return document
-        else:
-            raise HTTPException(status_code=404, detail=f"Document with ID {doc_id} "
-                                                        f"not found in index '{index_name}'.")
-    except ValueError as ve:
-        raise HTTPException(status_code=404, detail=str(ve))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
 @app.get("/indexed-documents", response_model=ListDocumentsResponse)
 async def list_all_indexed_documents():
     try:

From e652935fcd908ab05e40fcf7d4926128ffeeedbe Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 21 Oct 2024 14:46:58 -0700
Subject: [PATCH 33/42] pytest fix target

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4b72a9f68..fa138ecd3 100644
--- a/Makefile
+++ b/Makefile
@@ -99,8 +99,8 @@ unit-test: ## Run unit tests.
 
 .PHONY: rag-service-test
 rag-service-test:
-	pip install -r presets/rag_service/requirements.txt
-	pytest -o log_cli=true -o log_cli_level=INFO presets/rag_service/tests
+	pip install -r ragengine/requirements.txt
+	pytest -o log_cli=true -o log_cli_level=INFO ragengine/tests
 
 .PHONY: tuning-metrics-server-test
 tuning-metrics-server-test:

From 7748420e91d9bc7ab8c47af080ec6e65a1bbdb04 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 22 Oct 2024 18:43:11 -0700
Subject: [PATCH 34/42] feat: Updated UTs, models and API

---
 ragengine/main.py                             | 27 ++----
 ragengine/models.py                           | 19 +++-
 ragengine/tests/api/test_main.py              | 55 +++--------
 .../tests/vector_store/test_faiss_store.py    | 95 ++++++++-----------
 ragengine/vector_store/base.py                |  4 +-
 ragengine/vector_store/faiss_store.py         | 53 +++++++----
 ragengine/vector_store_manager/manager.py     |  4 +-
 7 files changed, 116 insertions(+), 141 deletions(-)

diff --git a/ragengine/main.py b/ragengine/main.py
index 3ca9cc6de..eff696790 100644
--- a/ragengine/main.py
+++ b/ragengine/main.py
@@ -1,17 +1,13 @@
 from typing import Dict, List
-
-from llama_index.core.schema import TextNode
-
 from vector_store_manager.manager import VectorStoreManager
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
 from embedding.huggingface_remote import RemoteHuggingFaceEmbedding
-from llama_index.core.storage.docstore.types import RefDocInfo
 from fastapi import FastAPI, HTTPException
 from models import (IndexRequest, ListDocumentsResponse,
-                    QueryRequest, Document)
+                    QueryRequest, QueryResponse, DocumentResponse)
 from vector_store.faiss_store import FaissVectorStoreHandler
 
-from config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID
+from ragengine.config import ACCESS_SECRET, EMBEDDING_TYPE, MODEL_ID
 
 app = FastAPI()
 
@@ -30,24 +26,23 @@
 # Initialize RAG operations
 rag_ops = VectorStoreManager(vector_store_handler)
 
-@app.post("/index", response_model=List[Document])
+@app.post("/index", response_model=List[DocumentResponse])
 async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling)
     try:
         doc_ids = rag_ops.create(request.index_name, request.documents)
         documents = [
-            Document(doc_id=doc_id, text=doc.text, metadata=doc.metadata)
+            DocumentResponse(doc_id=doc_id, text=doc.text, metadata=doc.metadata)
             for doc_id, doc in zip(doc_ids, request.documents)
         ]
         return documents
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
-@app.post("/query", response_model=Dict[str, str])
+@app.post("/query", response_model=QueryResponse)
 async def query_index(request: QueryRequest):
     try:
         llm_params = request.llm_params or {} # Default to empty dict if no params provided
-        response = rag_ops.read(request.index_name, request.query, request.top_k, llm_params)
-        return {"response": str(response)}
+        return rag_ops.read(request.index_name, request.query, request.top_k, llm_params)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
@@ -55,15 +50,7 @@ async def query_index(request: QueryRequest):
 async def list_all_indexed_documents():
     try:
         documents = rag_ops.list_all_indexed_documents()
-        serialized_documents = {
-            index_name: {
-                doc_name: {
-                    "text": doc_info.text, "hash": doc_info.hash
-                } for doc_name, doc_info in vector_store_index.docstore.docs.items()
-            }
-            for index_name, vector_store_index in documents.items()
-        }
-        return ListDocumentsResponse(documents=serialized_documents)
+        return ListDocumentsResponse(documents=documents)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
diff --git a/ragengine/models.py b/ragengine/models.py
index 941b845a4..a74d63608 100644
--- a/ragengine/models.py
+++ b/ragengine/models.py
@@ -6,6 +6,11 @@ class Document(BaseModel):
     text: str
     metadata: Optional[dict] = {}
 
+class DocumentResponse(BaseModel):
+    doc_id: str
+    text: str
+    metadata: Optional[dict] = None
+
 class IndexRequest(BaseModel):
     index_name: str
     documents: List[Document]
@@ -17,4 +22,16 @@ class QueryRequest(BaseModel):
     llm_params: Optional[Dict] = None  # Accept a dictionary for parameters
 
 class ListDocumentsResponse(BaseModel):
-    documents:Dict[str, Dict[str, Dict[str, str]]]
+    documents: Dict[str, Dict[str, Dict[str, str]]]
+
+# Define models for TextNode, NodeWithScore, and the main Response
+class NodeWithScore(BaseModel):
+    node_id: str
+    text: str
+    score: float
+    metadata: Optional[dict] = None
+
+class QueryResponse(BaseModel):
+    response: str
+    source_nodes: List[NodeWithScore]
+    metadata: Optional[dict] = None
\ No newline at end of file
diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py
index cfa74716c..1e7366c86 100644
--- a/ragengine/tests/api/test_main.py
+++ b/ragengine/tests/api/test_main.py
@@ -1,21 +1,20 @@
-import os
-from tempfile import TemporaryDirectory
 from unittest.mock import patch
 
-import pytest
-from vector_store.faiss_store import FaissVectorStoreHandler
-from models import Document
-from embedding.huggingface_local import LocalHuggingFaceEmbedding
-from config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET
+from llama_index.core.storage.index_store import SimpleIndexStore
 
-from main import app, rag_ops
+from ragengine.main import app, vector_store_handler, rag_ops
 from fastapi.testclient import TestClient
-from unittest.mock import MagicMock
+import pytest
 
-AUTO_GEN_DOC_ID_LEN = 36
+AUTO_GEN_DOC_ID_LEN = 64
 
 client = TestClient(app)
 
+@pytest.fixture(autouse=True)
+def clear_index():
+    vector_store_handler.index_map.clear()
+    vector_store_handler.index_store = SimpleIndexStore()
+
 def test_index_documents_success():
     request_data = {
         "index_name": "test_index",
@@ -65,7 +64,11 @@ def test_query_index_success(mock_post):
 
     response = client.post("/query", json=request_data)
     assert response.status_code == 200
-    assert response.json() == {"response": "This is the completion from the API"}
+    assert response.json()["response"] == "{'result': 'This is the completion from the API'}"
+    assert len(response.json()["source_nodes"]) == 1
+    assert response.json()["source_nodes"][0]["text"] == "This is a test document"
+    assert response.json()["source_nodes"][0]["score"] == 0.5354418754577637
+    assert response.json()["source_nodes"][0]["metadata"] == {}
     assert mock_post.call_count == 1
 
 def test_query_index_failure():
@@ -82,36 +85,6 @@ def test_query_index_failure():
     assert response.json()["detail"] == "No such index: 'non_existent_index' exists."
 
 
-def test_get_document_success():
-    request_data = {
-        "index_name": "test_index",
-        "documents": [
-            # {"doc_id": "doc1", "text": "This is a test document"},
-            {"doc_id": "doc1", "text": "This is a test document"},
-            {"text": "Another test document"}
-        ]
-    }
-
-    index_response = client.post("/index", json=request_data)
-    assert index_response.status_code == 200
-
-    # Call the GET document endpoint.
-    get_response = client.get("/document/test_index/doc1")
-    assert get_response.status_code == 200
-
-    response_json = get_response.json()
-
-    assert response_json.keys() == {"node_ids", 'metadata'}
-    assert response_json['metadata'] == {}
-
-    assert isinstance(response_json["node_ids"], list) and len(response_json["node_ids"]) == 1
-
-
-def test_get_document_failure():
-    # Call the GET document endpoint.
-    response = client.get("/document/test_index/doc1")
-    assert response.status_code == 404
-
 def test_list_all_indexed_documents_success():
     response = client.get("/indexed-documents")
     assert response.status_code == 200
diff --git a/ragengine/tests/vector_store/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py
index 452388aeb..116c89ba2 100644
--- a/ragengine/tests/vector_store/test_faiss_store.py
+++ b/ragengine/tests/vector_store/test_faiss_store.py
@@ -3,10 +3,13 @@
 from unittest.mock import patch
 
 import pytest
+
+from ragengine.vector_store.base import BaseVectorStore
 from ragengine.vector_store.faiss_store import FaissVectorStoreHandler
 from ragengine.models import Document
 from ragengine.embedding.huggingface_local import LocalHuggingFaceEmbedding
 from ragengine.config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET
+from ragengine.config import PERSIST_DIR
 
 @pytest.fixture(scope='session')
 def init_embed_manager():
@@ -21,23 +24,24 @@ def vector_store_manager(init_embed_manager):
         yield FaissVectorStoreHandler(init_embed_manager)
 
 def test_index_documents(vector_store_manager):
+    first_doc_text, second_doc_text = "First document", "Second document"
     documents = [
-        Document(doc_id="1", text="First document", metadata={"type": "text"}),
-        Document(doc_id="2", text="Second document", metadata={"type": "text"})
+        Document(text=first_doc_text, metadata={"type": "text"}),
+        Document(text=second_doc_text, metadata={"type": "text"})
     ]
     
     doc_ids = vector_store_manager.index_documents("test_index", documents)
     
     assert len(doc_ids) == 2
-    assert doc_ids == ["1", "2"]
+    assert set(doc_ids) == {BaseVectorStore.generate_doc_id(first_doc_text),
+                            BaseVectorStore.generate_doc_id(second_doc_text)}
 
 def test_index_documents_isolation(vector_store_manager):
-    doc_1_id, doc_2_id = "1", "2"
     documents1 = [
-        Document(doc_id=doc_1_id, text="First document in index1", metadata={"type": "text"}),
+        Document(text="First document in index1", metadata={"type": "text"}),
     ]
     documents2 = [
-        Document(doc_id=doc_2_id, text="First document in index2", metadata={"type": "text"}),
+        Document(text="First document in index2", metadata={"type": "text"}),
     ]
 
     # Index documents in separate indices
@@ -45,16 +49,14 @@ def test_index_documents_isolation(vector_store_manager):
     vector_store_manager.index_documents(index_name_1, documents1)
     vector_store_manager.index_documents(index_name_2, documents2)
 
-    # Ensure documents are correctly persisted and separated by index
-    doc_1 = vector_store_manager.get_document(index_name_1, doc_1_id)
-    assert doc_1 and doc_1.node_ids # Ensure documents were created
-
-    doc_2 = vector_store_manager.get_document(index_name_2, doc_2_id)
-    assert doc_2 and doc_2.node_ids # Ensure documents were created
-
-    # Ensure that the documents do not mix between indices
-    assert vector_store_manager.get_document(index_name_2, doc_1_id) is None, f"Document {doc_1_id} should not exist in {index_name_2}"
-    assert vector_store_manager.get_document(index_name_1, doc_2_id) is None, f"Document {doc_2_id} should not exist in {index_name_1}"
+    assert vector_store_manager.list_all_indexed_documents() == {
+        'index1': {"87117028123498eb7d757b1507aa3e840c63294f94c27cb5ec83c939dedb32fd":
+                       {'hash': '1e64a170be48c45efeaa8667ab35919106da0489ec99a11d0029f2842db133aa',
+                        'text': 'First document in index1'}},
+        'index2': {"49b198c0e126a99e1975f17b564756c25b4ad691a57eda583e232fd9bee6de91":
+                       {'hash': 'a222f875b83ce8b6eb72b3cae278b620de9bcc7c6b73222424d3ce979d1a463b',
+                        'text': 'First document in index2'}}
+    }
 
 @patch('requests.post')
 def test_query_documents(mock_post, vector_store_manager):
@@ -67,17 +69,19 @@ def test_query_documents(mock_post, vector_store_manager):
 
     # Add documents to index
     documents = [
-        Document(doc_id="1", text="First document", metadata={"type": "text"}),
-        Document(doc_id="2", text="Second document", metadata={"type": "text"})
+        Document(text="First document", metadata={"type": "text"}),
+        Document(text="Second document", metadata={"type": "text"})
     ]
     vector_store_manager.index_documents("test_index", documents)
 
     params = {"temperature": 0.7}
     # Mock query and results
-    query_result = vector_store_manager.query("test_index", "First", top_k=1, params=params)
+    query_result = vector_store_manager.query("test_index", "First", top_k=1, llm_params=params)
 
     assert query_result is not None
-    assert query_result.response == "This is the completion from the API"
+    assert query_result["response"] == "{'result': 'This is the completion from the API'}"
+    assert query_result["source_nodes"][0]["text"] == "First document"
+    assert query_result["source_nodes"][0]["score"] == 0.5795239210128784
 
     mock_post.assert_called_once_with(
         INFERENCE_URL,
@@ -86,57 +90,34 @@ def test_query_documents(mock_post, vector_store_manager):
         headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"}
     )
 
-def test_add_document(vector_store_manager, capsys):
-    documents = [Document(doc_id="3", text="Third document", metadata={"type": "text"})]
+def test_add_document(vector_store_manager):
+    documents = [Document(text="Third document", metadata={"type": "text"})]
     vector_store_manager.index_documents("test_index", documents)
 
     # Add a document to the existing index
-    new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"})
+    new_document = [Document(text="Fourth document", metadata={"type": "text"})]
     vector_store_manager.index_documents("test_index", new_document)
 
     # Assert that the document exists
-    assert vector_store_manager.document_exists("test_index", "4")
+    assert vector_store_manager.document_exists("test_index",
+                                                BaseVectorStore.generate_doc_id("Fourth document"))
 
-def test_persist_and_load_index_store(vector_store_manager):
-    """Test that the index store is persisted and loaded correctly."""
+def test_persist_index_1(vector_store_manager):
+    """Test that the index store is persisted."""
     # Add a document and persist the index
-    documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
+    documents = [Document(text="Test document", metadata={"type": "text"})]
     vector_store_manager.index_documents("test_index", documents)
     vector_store_manager._persist("test_index")
+    assert os.path.exists(PERSIST_DIR)
 
-    # Simulate a fresh load of the index store (clearing in-memory state)
-    vector_store_manager.index_store = None  # Clear current in-memory store
-    vector_store_manager._load_index_store()
-
-    # Verify that the store was reloaded and contains the expected index structure
-    assert vector_store_manager.index_store is not None
-    assert len(vector_store_manager.index_store.index_structs()) > 0
-
-# TODO: Prevent default re-indexing from load_index_from_storage
-def test_persist_and_load_index(vector_store_manager):
-    """Test that an index is persisted and then loaded correctly."""
+def test_persist_index_2(vector_store_manager):
+    """Test that an index store is persisted."""
     # Add a document and persist the index
-    documents = [Document(doc_id="1", text="Test document", metadata={"type": "text"})]
+    documents = [Document(text="Test document", metadata={"type": "text"})]
     vector_store_manager.index_documents("test_index", documents)
 
-    documents = [Document(doc_id="1", text="Another Test document", metadata={"type": "text"})]
+    documents = [Document(text="Another Test document", metadata={"type": "text"})]
     vector_store_manager.index_documents("another_test_index", documents)
 
     vector_store_manager._persist_all()
-
-    # Simulate a fresh load of the index (clearing in-memory state)
-    vector_store_manager.index_map = {}  # Clear current in-memory index map
-    loaded_indices = vector_store_manager._load_indices()
-
-    # Verify that the index was reloaded and contains the expected document
-    assert loaded_indices is not None
-    assert vector_store_manager.document_exists("test_index", "1")
-    assert vector_store_manager.document_exists("another_test_index", "1")
-
-    vector_store_manager.index_map = {}  # Clear current in-memory index map
-    loaded_index = vector_store_manager._load_index("test_index")
-
-    assert loaded_index is not None
-    assert vector_store_manager.document_exists("test_index", "1")
-    assert not vector_store_manager.document_exists("another_test_index", "1") # Since we didn't load this index
-
+    assert os.path.exists(PERSIST_DIR)
diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py
index 938330fa2..062342b60 100644
--- a/ragengine/vector_store/base.py
+++ b/ragengine/vector_store/base.py
@@ -20,11 +20,11 @@ def query(self, index_name: str, query: str, top_k: int, params: dict):
         pass
 
     @abstractmethod
-    def add_document(self, index_name: str, document: Document):
+    def add_document_to_index(self, index_name: str, document: Document, doc_id: str):
         pass
 
     @abstractmethod
-    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
+    def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]:
         pass
 
     @abstractmethod
diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
index 35472d004..5bf71e314 100644
--- a/ragengine/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -5,13 +5,12 @@
 from llama_index.core import Document as LlamaDocument
 from llama_index.core import (StorageContext, VectorStoreIndex)
 from llama_index.core.storage.index_store import SimpleIndexStore
-from llama_index.core.storage.docstore.types import RefDocInfo
 from llama_index.vector_stores.faiss import FaissVectorStore
 
-from ragengine.models import Document
+from ragengine.models import Document, NodeWithScore
 from ragengine.inference.inference import Inference
 
-from config import PERSIST_DIR
+from ragengine.config import PERSIST_DIR
 
 from .base import BaseVectorStore
 from ragengine.embedding.base import BaseEmbeddingModel
@@ -75,12 +74,12 @@ def _append_documents_to_index(self, index_name: str, documents: List[Document])
         indexed_doc_ids = set()
 
         for doc in documents:
-            doc.doc_id = self.generate_doc_id(doc.text)
-            if not self.document_exists(index_name, doc.doc_id):
-                self.add_document_to_index(index_name, doc)
-                indexed_doc_ids.add(doc.doc_id)
+            doc_id = BaseVectorStore.generate_doc_id(doc.text)
+            if not self.document_exists(index_name, doc_id):
+                self.add_document_to_index(index_name, doc, doc_id)
+                indexed_doc_ids.add(doc_id)
             else:
-                print(f"Document {doc.doc_id} already exists in index {index_name}. Skipping.")
+                print(f"Document {doc_id} already exists in index {index_name}. Skipping.")
 
         if indexed_doc_ids:
             self._persist(index_name)
@@ -105,10 +104,10 @@ def _create_new_index(self, index_name: str, documents: List[Document]) -> List[
         indexed_doc_ids = set()
 
         for doc in documents:
-            doc.doc_id = self.generate_doc_id(doc.text)
-            llama_doc = LlamaDocument(id_=doc.doc_id, text=doc.text, metadata=doc.metadata)
+            doc_id = BaseVectorStore.generate_doc_id(doc.text)
+            llama_doc = LlamaDocument(id_=doc_id, text=doc.text, metadata=doc.metadata)
             llama_docs.append(llama_doc)
-            indexed_doc_ids.add(doc.doc_id)
+            indexed_doc_ids.add(doc_id)
 
         if llama_docs:
             index = VectorStoreIndex.from_documents(
@@ -123,11 +122,11 @@ def _create_new_index(self, index_name: str, documents: List[Document]) -> List[
             self._persist(index_name)
         return list(indexed_doc_ids)
 
-    def add_document_to_index(self, index_name: str, document: Document):
+    def add_document_to_index(self, index_name: str, document: Document, doc_id: str):
         """Inserts a single document into the existing FAISS index."""
         if index_name not in self.index_map:
             raise ValueError(f"No such index: '{index_name}' exists.")
-        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=document.doc_id)
+        llama_doc = LlamaDocument(text=document.text, metadata=document.metadata, id_=doc_id)
         self.index_map[index_name].insert(llama_doc)
 
     def query(self, index_name: str, query: str, top_k: int, llm_params: dict):
@@ -137,11 +136,31 @@ def query(self, index_name: str, query: str, top_k: int, llm_params: dict):
         self.llm.set_params(llm_params)
 
         query_engine = self.index_map[index_name].as_query_engine(llm=self.llm, similarity_top_k=top_k)
-        return query_engine.query(query)
-
-    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
+        query_result = query_engine.query(query)
+        return {
+            "response": query_result.response,
+            "source_nodes": [
+                {
+                    "node_id": node.node_id,
+                    "text": node.text,
+                    "score": node.score,
+                    "metadata": node.metadata
+                }
+                for node in query_result.source_nodes
+            ],
+            "metadata": query_result.metadata,
+        }
+
+    def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]:
         """Lists all documents in the vector store."""
-        return self.index_map
+        return {
+            index_name: {
+                doc_info.ref_doc_id: {
+                    "text": doc_info.text, "hash": doc_info.hash
+                } for doc_name, doc_info in vector_store_index.docstore.docs.items()
+            }
+            for index_name, vector_store_index in self.index_map.items()
+        }
 
     def document_exists(self, index_name: str, doc_id: str) -> bool:
         """Checks if a document exists in the vector store."""
diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py
index e69192f1d..6976b4f9c 100644
--- a/ragengine/vector_store_manager/manager.py
+++ b/ragengine/vector_store_manager/manager.py
@@ -3,8 +3,6 @@
 from ragengine.models import Document
 from ragengine.vector_store.base import BaseVectorStore
 
-from llama_index.core import VectorStoreIndex
-
 class VectorStoreManager:
     def __init__(self, vector_store: BaseVectorStore):
         self.vector_store = vector_store
@@ -17,6 +15,6 @@ def read(self, index_name: str, query: str, top_k: int, llm_params: dict):
         """Query the indexed documents."""
         return self.vector_store.query(index_name, query, top_k, llm_params)
 
-    def list_all_indexed_documents(self) -> Dict[str, VectorStoreIndex]:
+    def list_all_indexed_documents(self) -> Dict[str, Dict[str, Dict[str, str]]]:
         """List all documents."""
         return self.vector_store.list_all_indexed_documents()

From 1b0a7a033d7e07c09b34b3166e0a364970e847d4 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 22 Oct 2024 18:44:38 -0700
Subject: [PATCH 35/42] feat: Updated UTs, models and API

---
 ragengine/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ragengine/models.py b/ragengine/models.py
index a74d63608..982c1b1b9 100644
--- a/ragengine/models.py
+++ b/ragengine/models.py
@@ -24,7 +24,7 @@ class QueryRequest(BaseModel):
 class ListDocumentsResponse(BaseModel):
     documents: Dict[str, Dict[str, Dict[str, str]]]
 
-# Define models for TextNode, NodeWithScore, and the main Response
+# Define models for NodeWithScore, and QueryResponse
 class NodeWithScore(BaseModel):
     node_id: str
     text: str

From 1c34fb041275954191ab69f6dd159367bead19d2 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 22 Oct 2024 18:46:34 -0700
Subject: [PATCH 36/42] feat: Updated UTs, models and API

---
 ragengine/main.py                     | 2 +-
 ragengine/vector_store/base.py        | 1 -
 ragengine/vector_store/faiss_store.py | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ragengine/main.py b/ragengine/main.py
index eff696790..a60b7a5a9 100644
--- a/ragengine/main.py
+++ b/ragengine/main.py
@@ -1,4 +1,4 @@
-from typing import Dict, List
+from typing import List
 from vector_store_manager.manager import VectorStoreManager
 from embedding.huggingface_local import LocalHuggingFaceEmbedding
 from embedding.huggingface_remote import RemoteHuggingFaceEmbedding
diff --git a/ragengine/vector_store/base.py b/ragengine/vector_store/base.py
index 062342b60..bf3be9624 100644
--- a/ragengine/vector_store/base.py
+++ b/ragengine/vector_store/base.py
@@ -2,7 +2,6 @@
 from typing import Dict, List
 
 from ragengine.models import Document
-from llama_index.core import VectorStoreIndex
 import hashlib
 
 
diff --git a/ragengine/vector_store/faiss_store.py b/ragengine/vector_store/faiss_store.py
index 5bf71e314..ddd5b670d 100644
--- a/ragengine/vector_store/faiss_store.py
+++ b/ragengine/vector_store/faiss_store.py
@@ -7,7 +7,7 @@
 from llama_index.core.storage.index_store import SimpleIndexStore
 from llama_index.vector_stores.faiss import FaissVectorStore
 
-from ragengine.models import Document, NodeWithScore
+from ragengine.models import Document
 from ragengine.inference.inference import Inference
 
 from ragengine.config import PERSIST_DIR

From bc946693110d3b7c3f06b7c9ba961b6e547111dc Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 11:33:01 -0700
Subject: [PATCH 37/42] feat: Updated UTs, models and API

---
 ragengine/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt
index 18ca062fc..7d8177c2a 100644
--- a/ragengine/requirements.txt
+++ b/ragengine/requirements.txt
@@ -5,3 +5,5 @@ fastapi
 faiss-cpu
 llama-index-vector-stores-faiss
 uvicorn
+# For UTs
+pytest
\ No newline at end of file

From bc076bd84e1d9af5c7aca84798275793ce310dad Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 11:45:23 -0700
Subject: [PATCH 38/42] feat: Updated UTs, models and API

---
 ragengine/requirements.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt
index 7d8177c2a..7be7a8c38 100644
--- a/ragengine/requirements.txt
+++ b/ragengine/requirements.txt
@@ -1,6 +1,11 @@
 # RAG Library Requirements
 llama-index
+# HF Embeddings
 llama-index-embeddings-huggingface
+llama-index-embeddings-huggingface-api
+# HF LLMs
+llama-index-llms-huggingface
+llama-index-llms-huggingface-api
 fastapi
 faiss-cpu
 llama-index-vector-stores-faiss

From f93669bc63084aae41df11e8447ec609ffcba079 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 11:45:44 -0700
Subject: [PATCH 39/42] feat: Updated UTs, models and API

---
 ragengine/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ragengine/requirements.txt b/ragengine/requirements.txt
index 7be7a8c38..4a324766c 100644
--- a/ragengine/requirements.txt
+++ b/ragengine/requirements.txt
@@ -6,6 +6,7 @@ llama-index-embeddings-huggingface-api
 # HF LLMs
 llama-index-llms-huggingface
 llama-index-llms-huggingface-api
+
 fastapi
 faiss-cpu
 llama-index-vector-stores-faiss

From a5dd527d2b6a84ae80dfcb693d29c4c6cc12ca17 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 11:57:12 -0700
Subject: [PATCH 40/42] approx

---
 ragengine/tests/api/test_main.py                 | 2 +-
 ragengine/tests/vector_store/test_faiss_store.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ragengine/tests/api/test_main.py b/ragengine/tests/api/test_main.py
index 1e7366c86..c0b6ef13b 100644
--- a/ragengine/tests/api/test_main.py
+++ b/ragengine/tests/api/test_main.py
@@ -67,7 +67,7 @@ def test_query_index_success(mock_post):
     assert response.json()["response"] == "{'result': 'This is the completion from the API'}"
     assert len(response.json()["source_nodes"]) == 1
     assert response.json()["source_nodes"][0]["text"] == "This is a test document"
-    assert response.json()["source_nodes"][0]["score"] == 0.5354418754577637
+    assert response.json()["source_nodes"][0]["score"] == pytest.approx(0.5354418754577637, rel=1e-6)
     assert response.json()["source_nodes"][0]["metadata"] == {}
     assert mock_post.call_count == 1
 
diff --git a/ragengine/tests/vector_store/test_faiss_store.py b/ragengine/tests/vector_store/test_faiss_store.py
index 116c89ba2..0fc17a912 100644
--- a/ragengine/tests/vector_store/test_faiss_store.py
+++ b/ragengine/tests/vector_store/test_faiss_store.py
@@ -81,7 +81,7 @@ def test_query_documents(mock_post, vector_store_manager):
     assert query_result is not None
     assert query_result["response"] == "{'result': 'This is the completion from the API'}"
     assert query_result["source_nodes"][0]["text"] == "First document"
-    assert query_result["source_nodes"][0]["score"] == 0.5795239210128784
+    assert query_result["source_nodes"][0]["score"] == pytest.approx(0.5795239210128784, rel=1e-6)
 
     mock_post.assert_called_once_with(
         INFERENCE_URL,

From 1000732209b9eec098314888b2f60b0f5c7c0e0b Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 12:08:39 -0700
Subject: [PATCH 41/42] fix: add ut dependency

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index fa138ecd3..699e557b5 100644
--- a/Makefile
+++ b/Makefile
@@ -104,6 +104,7 @@ rag-service-test:
 
 .PHONY: tuning-metrics-server-test
 tuning-metrics-server-test:
+	pip install -r presets/inference/text-generation/requirements.txt
 	pytest -o log_cli=true -o log_cli_level=INFO presets/tuning/text-generation/metrics
 
 ## --------------------------------------

From 3d6a623624ac37a6d3e91ca23ed1332567276a09 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 23 Oct 2024 12:15:05 -0700
Subject: [PATCH 42/42] fix: renamed

---
 ragengine/main.py                         | 4 ++--
 ragengine/vector_store_manager/manager.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ragengine/main.py b/ragengine/main.py
index a60b7a5a9..53bdb1997 100644
--- a/ragengine/main.py
+++ b/ragengine/main.py
@@ -29,7 +29,7 @@
 @app.post("/index", response_model=List[DocumentResponse])
 async def index_documents(request: IndexRequest): # TODO: Research async/sync what to use (inference is calling)
     try:
-        doc_ids = rag_ops.create(request.index_name, request.documents)
+        doc_ids = rag_ops.index(request.index_name, request.documents)
         documents = [
             DocumentResponse(doc_id=doc_id, text=doc.text, metadata=doc.metadata)
             for doc_id, doc in zip(doc_ids, request.documents)
@@ -42,7 +42,7 @@ async def index_documents(request: IndexRequest): # TODO: Research async/sync wh
 async def query_index(request: QueryRequest):
     try:
         llm_params = request.llm_params or {} # Default to empty dict if no params provided
-        return rag_ops.read(request.index_name, request.query, request.top_k, llm_params)
+        return rag_ops.query(request.index_name, request.query, request.top_k, llm_params)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
diff --git a/ragengine/vector_store_manager/manager.py b/ragengine/vector_store_manager/manager.py
index 6976b4f9c..d8871b93a 100644
--- a/ragengine/vector_store_manager/manager.py
+++ b/ragengine/vector_store_manager/manager.py
@@ -7,11 +7,11 @@ class VectorStoreManager:
     def __init__(self, vector_store: BaseVectorStore):
         self.vector_store = vector_store
 
-    def create(self, index_name: str, documents: List[Document]) -> List[str]:
+    def index(self, index_name: str, documents: List[Document]) -> List[str]:
         """Index new documents."""
         return self.vector_store.index_documents(index_name, documents)
 
-    def read(self, index_name: str, query: str, top_k: int, llm_params: dict):
+    def query(self, index_name: str, query: str, top_k: int, llm_params: dict):
         """Query the indexed documents."""
         return self.vector_store.query(index_name, query, top_k, llm_params)