Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maciekbanas/81/prepare mocks for testing vectordatabase #83

Merged
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GitAI
Title: Extracts Knowledge From Git Repositories
Version: 0.0.0.9012
Version: 0.0.0.9013
Authors@R: c(
person("Kamil", "Wais", , "[email protected]", role = c("aut", "cre")),
person("Krystian", "Igras", , "[email protected]", role = "aut"),
Expand Down Expand Up @@ -30,4 +30,3 @@ Suggests:
shiny,
withr
Config/testthat/edition: 3
Config/testthat/parallel: true
216 changes: 216 additions & 0 deletions R/test-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,219 @@ Mocker <- R6::R6Class(
}
)
)

PineconeMocked <- R6::R6Class(
"PineconeMocked",
inherit = Pinecone,
public = list(
get_index_metadata = function() {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://api.pinecone.io/indexes/", private$.index)

response <- httr2::response_json(
body = test_fixtures[["pinecone_index_response"]]
)
httr2::resp_body_json(response)
},

write_record = function(id, text, metadata = list()) {

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

embeddings <- private$.get_embeddings(text = text)

metadata$text <- text

body <- list(
namespace = private$.namespace,
vectors = list(
id = id,
values = embeddings,
metadata = metadata
)
)

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors/upsert") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)

response <- httr2::response_json(
body = list("upsertedCount" = 1)
)

response_body <- httr2::resp_body_json(response)
response_body
},

read_record = function(id) {

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors") |>
httr2::req_url_path_append("fetch") |>
httr2::req_url_query(
ids = id,
namespace = private$.namespace
) |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
)

response <- httr2::response_json(
body = test_fixtures[["read_record"]]
)

response_body <- httr2::resp_body_json(response)
results <- response_body$vectors

results
},

find_records = function(query, top_k = 1) {

embeddings <- private$.get_embeddings(query)

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

body <- list(
namespace = private$.namespace,
vector = embeddings,
topK = top_k,
includeValues = FALSE,
includeMetadata = TRUE
)

request <- httr2::request(url) |>
httr2::req_url_path_append("query") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)

response <- httr2::response_json(
body = test_fixtures[["matched_records"]]
)

response_body <- httr2::resp_body_json(response)
results <- response_body$matches

results |>
purrr::map(function(result) {
result$values <- NULL
result
})
}
),

private = list(
.get_embeddings = function(text) {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- "https://api.pinecone.io"

body <- list(
model = "multilingual-e5-large",
parameters = list(
input_type = "passage",
truncate = "END"
),
inputs = list(
list(text = text)
)
)

request <- httr2::request(url) |>
httr2::req_url_path_append("embed") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)

response <- httr2::response_json(
body = test_fixtures[["embeddings"]]
)

response_body <- httr2::resp_body_json(response)

response_body$data[[1]]$values |> unlist()
}
)
)

test_fixtures <- list()

test_fixtures[["pinecone_index_response"]] <- list(
"name" = "gitai",
"metric" = "cosine",
"dimension" = 1024L,
"status" = list(
"ready" = TRUE,
"state" = "Ready"
),
"host" = "gitai-test-host",
"spec" = list(
"serverless" = list(
"region" = "us-east-1",
"cloud" = "aws"
)
)
)

test_fixtures[["embeddings"]] <- list(
"model" = "multilingual-e5-large",
"data" = list(
list(
"values" = list(
runif(1024L, -1, 1) |> as.list()
)
)
),
"usage" = list(
"total_tokens" = 78L
)
)

test_fixtures[["matched_records"]] <- list(
"results" = list(),
"matches" = list(
list(
"id" = "id_2",
"score" = 0.820673,
"values" = list(),
"metadata" = list(
"files" = c("test_file1", "test_file2"),
"repo_url" = "test_url",
"text" = "This package will best suite you.",
"timestamp" = Sys.Date()
)
)
),
"namespace" = "gitai-tests",
"usage" = list("readUnits" = 10L)
)

test_fixtures[["read_record"]] <- list(
"vectors" = list(
"TestProject" = list(
"values" = test_fixtures[["embeddings"]][["data"]][[1]]["values"],
"metadata" = test_fixtures[["matched_records"]][["matches"]][[1]][["metadata"]]
)
),
"namespace" = "gitai-tests",
"usage" = list("readUnits" = 1L)
)
15 changes: 15 additions & 0 deletions inst/example_workflow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
gitai_demo <- initialize_project("gitai-tests") |>
set_database(index = "gitai-mb",
namespace = "gitai-demo-2") |>
set_github_repos(
orgs = "r-world-devs"
) |>
add_files(files = "\\.md") |>
set_llm() |>
set_prompt("Provide a one-two sentence description of the product based on input.")

process_repos(gitai_demo)

gitai_demo$db$find_records("Find package with which I can plot data.")

gitai_demo$db$read_record("GitStats")
18 changes: 5 additions & 13 deletions tests/testthat/test-Pinecone.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("getting index metadata", {

db <- Pinecone$new(
db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)
Expand All @@ -11,7 +11,7 @@ test_that("getting index metadata", {

test_that("getting embeddings", {

db <- Pinecone$new(
db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)
Expand All @@ -24,7 +24,7 @@ test_that("getting embeddings", {

test_that("writting records", {

db <- Pinecone$new(
db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)
Expand All @@ -51,9 +51,7 @@ test_that("writting records", {

test_that("finding records", {

Sys.sleep(3)

db <- Pinecone$new(
db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)
Expand All @@ -68,17 +66,11 @@ test_that("finding records", {
result[[1]]$metadata$text |> is.character() |> expect_true()
result[[1]]$score |> is.numeric() |> expect_true()

result_2 <- db$find_records(
query = "Tell me about apple fruit.",
top_k = 1
)

expect_false(result_2[[1]]$id == result[[1]]$id)
})

test_that("reading records", {

db <- Pinecone$new(
db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)
Expand Down
14 changes: 7 additions & 7 deletions tests/testthat/test-set_database.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
test_that("setting database provider with default namespace", {

gitai <- initialize_project("gitai-demo") |>
set_database(
provider = "Pinecone",
provider = "PineconeMocked",
index = "gitai"
)
)

gitai$db$index |> expect_equal("gitai")
gitai$db$namespace |> expect_equal("gitai-demo")
})
Expand All @@ -14,11 +14,11 @@ test_that("setting database provider with custom namepsace", {

gitai <- initialize_project("gitai-demo") |>
set_database(
provider = "Pinecone",
provider = "PineconeMocked",
index = "gitai",
namespace = "test_namespace"
)
)

gitai$db$index |> expect_equal("gitai")
gitai$db$namespace |> expect_equal("test_namespace")
})
Loading