From 7def8a46369e6a1a16647cff3874cde1cd38b588 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 23 Dec 2024 15:14:57 +0000 Subject: [PATCH 01/10] Add Mocked Pinecone object and mock 2 tests. --- DESCRIPTION | 3 +- tests/testthat/helper-fixtures.R | 32 ++++++++++++++++++++ tests/testthat/setup.R | 51 ++++++++++++++++++++++++++++++++ tests/testthat/test-Pinecone.R | 4 +-- 4 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/helper-fixtures.R diff --git a/DESCRIPTION b/DESCRIPTION index 5bee9d5..afe1d34 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9011 +Version: 0.0.0.9012 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), @@ -30,4 +30,3 @@ Suggests: shiny, withr Config/testthat/edition: 3 -Config/testthat/parallel: true diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R new file mode 100644 index 0000000..0271fba --- /dev/null +++ b/tests/testthat/helper-fixtures.R @@ -0,0 +1,32 @@ +test_fixtures <- list() + +test_fixtures[["pinecone_index_response"]] <- list( + "name" = "gitai", + "metric" = "cosine", + "dimension" = 1024L, + "status" = list( + "ready" = TRUE, + "state" = "Ready" + ), + "host" = "gitai-test-host", + "spec" = list( + "serverless" = list( + "region" = "us-east-1", + "cloud" = "aws" + ) + ) +) + +test_fixtures[["embeddings"]] <- list( + "model" = "multilingual-e5-large", + "data" = list( + list( + "values" = list( + runif(1024L, -1, 1) |> as.list() + ) + ) + ), + "usage" = list( + "total_tokens" = 78L + ) +) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 1f8ac48..20b5ee4 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -84,3 +84,54 @@ chat_bedrock_mocked <- function(system_prompt = NULL, provider_class = elmer:::ProviderBedrock ) } + +PineconeMocked <- R6::R6Class( + "PineconeMocked", + inherit = Pinecone, + public = list( + get_index_metadata = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://api.pinecone.io/indexes/", private$.index) + + httr2::request(url) |> + httr2::req_headers("Api-Key" = pinecone_api_key) |> + httr2::req_dry_run(quiet = TRUE) + test_fixtures[["pinecone_index_response"]] + } + ), + + private = list( + .get_embeddings = function(text) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- "https://api.pinecone.io" + + body <- list( + model = "multilingual-e5-large", + parameters = list( + input_type = "passage", + truncate = "END" + ), + inputs = list( + list(text = text) + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- request |> + httr2::req_dry_run(quiet = TRUE) + + response_body <- test_fixtures[["embeddings"]] + + response_body$data[[1]]$values |> unlist() + } + ) +) diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index f18b741..df4ba28 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -1,6 +1,6 @@ test_that("getting index metadata", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -11,7 +11,7 @@ test_that("getting index metadata", { test_that("getting embeddings", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) From bd892f6f12d1bbb4648d10d9b2554fd61285a915 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Mon, 23 Dec 2024 15:15:12 +0000 Subject: [PATCH 02/10] Add an example workflow. --- inst/example_workflow.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 inst/example_workflow.R diff --git a/inst/example_workflow.R b/inst/example_workflow.R new file mode 100644 index 0000000..5076610 --- /dev/null +++ b/inst/example_workflow.R @@ -0,0 +1,12 @@ +# remotes::install_github("r-world-devs/GitStats@devel") + +gitai_demo <- initialize_project("gitai-demo-2") |> + set_database(index = "gitai-mb") |> + set_github_repos( + orgs = "pharmaverse" + ) |> + add_files(files = "\\.md") |> + set_llm() |> + set_prompt("Provide a one-two sentence description of the product based on input.") + +process_repos(gitai_demo) From 8f41fcc13e67a9668346e3260a684f8ef6ed50ea Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 09:47:40 +0000 Subject: [PATCH 03/10] Move PineconeMocked class and fixtures to test helpers file, mock two tests. --- R/test-helpers.R | 212 +++++++++++++++++++++++++++++ tests/testthat/helper-fixtures.R | 31 ----- tests/testthat/setup.R | 51 ------- tests/testthat/test-Pinecone.R | 8 +- tests/testthat/test-set_database.R | 14 +- 5 files changed, 222 insertions(+), 94 deletions(-) diff --git a/R/test-helpers.R b/R/test-helpers.R index b1d3010..2153254 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -19,3 +19,215 @@ Mocker <- R6::R6Class( } ) ) + +PineconeMocked <- R6::R6Class( + "PineconeMocked", + inherit = Pinecone, + public = list( + get_index_metadata = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://api.pinecone.io/indexes/", private$.index) + + httr2::request(url) |> + httr2::req_headers("Api-Key" = pinecone_api_key) |> + httr2::req_dry_run(quiet = TRUE) + test_fixtures[["pinecone_index_response"]] + }, + + write_record = function(id, text, metadata = list()) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + embeddings <- private$.get_embeddings(text = text) + + metadata$text <- text + + body <- list( + namespace = private$.namespace, + vectors = list( + id = id, + values = embeddings, + metadata = metadata + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- request |> + httr2::req_dry_run(quiet = TRUE) + + response_body <- list("upsertedCount" = 1) + response_body + }, + + read_record = function(id) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("fetch") |> + httr2::req_url_query( + ids = id, + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- request |> + httr2::req_dry_run(quiet = TRUE) + + response_body <- test_fixtures[["read_record"]] + results <- response_body$vectors + + results + }, + + find_records = function(query, top_k = 1) { + + embeddings <- private$.get_embeddings(query) + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + body <- list( + namespace = private$.namespace, + vector = embeddings, + topK = top_k, + includeValues = FALSE, + includeMetadata = TRUE + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("query") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- request |> + httr2::req_dry_run(quiet = TRUE) + + response_body <- test_fixtures[["matched_records"]] + results <- response_body$matches + + results |> + purrr::map(function(result) { + result$values <- NULL + result + }) + } + ), + + private = list( + .get_embeddings = function(text) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- "https://api.pinecone.io" + + body <- list( + model = "multilingual-e5-large", + parameters = list( + input_type = "passage", + truncate = "END" + ), + inputs = list( + list(text = text) + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- request |> + httr2::req_dry_run(quiet = TRUE) + + response_body <- test_fixtures[["embeddings"]] + + response_body$data[[1]]$values |> unlist() + } + ) +) + +test_fixtures <- list() + +test_fixtures[["pinecone_index_response"]] <- list( + "name" = "gitai", + "metric" = "cosine", + "dimension" = 1024L, + "status" = list( + "ready" = TRUE, + "state" = "Ready" + ), + "host" = "gitai-test-host", + "spec" = list( + "serverless" = list( + "region" = "us-east-1", + "cloud" = "aws" + ) + ) +) + +test_fixtures[["embeddings"]] <- list( + "model" = "multilingual-e5-large", + "data" = list( + list( + "values" = list( + runif(1024L, -1, 1) |> as.list() + ) + ) + ), + "usage" = list( + "total_tokens" = 78L + ) +) + +test_fixtures[["matched_records"]] <- list( + "results" = list(), + "matches" = list( + list( + "id" = "id_2", + "score" = 0.820673, + "values" = list(), + "metadata" = list( + "files" = c("test_file1", "test_file2"), + "repo_url" = "test_url", + "text" = "This package will best suite you.", + "timestamp" = Sys.Date() + ) + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 10L) +) + +test_fixtures[["read_record"]] <- list( + "vectors" = list( + "TestProject" = list( + "values" = test_fixtures[["embeddings"]][["data"]][[1]]["values"], + "metadata" = test_fixtures[["matched_records"]][["matches"]][[1]][["metadata"]] + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R index 0271fba..8b13789 100644 --- a/tests/testthat/helper-fixtures.R +++ b/tests/testthat/helper-fixtures.R @@ -1,32 +1 @@ -test_fixtures <- list() -test_fixtures[["pinecone_index_response"]] <- list( - "name" = "gitai", - "metric" = "cosine", - "dimension" = 1024L, - "status" = list( - "ready" = TRUE, - "state" = "Ready" - ), - "host" = "gitai-test-host", - "spec" = list( - "serverless" = list( - "region" = "us-east-1", - "cloud" = "aws" - ) - ) -) - -test_fixtures[["embeddings"]] <- list( - "model" = "multilingual-e5-large", - "data" = list( - list( - "values" = list( - runif(1024L, -1, 1) |> as.list() - ) - ) - ), - "usage" = list( - "total_tokens" = 78L - ) -) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 20b5ee4..1f8ac48 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -84,54 +84,3 @@ chat_bedrock_mocked <- function(system_prompt = NULL, provider_class = elmer:::ProviderBedrock ) } - -PineconeMocked <- R6::R6Class( - "PineconeMocked", - inherit = Pinecone, - public = list( - get_index_metadata = function() { - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - - url <- paste0("https://api.pinecone.io/indexes/", private$.index) - - httr2::request(url) |> - httr2::req_headers("Api-Key" = pinecone_api_key) |> - httr2::req_dry_run(quiet = TRUE) - test_fixtures[["pinecone_index_response"]] - } - ), - - private = list( - .get_embeddings = function(text) { - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - - url <- "https://api.pinecone.io" - - body <- list( - model = "multilingual-e5-large", - parameters = list( - input_type = "passage", - truncate = "END" - ), - inputs = list( - list(text = text) - ) - ) - - request <- httr2::request(url) |> - httr2::req_url_path_append("embed") |> - httr2::req_headers( - "Api-Key" = pinecone_api_key, - "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> - httr2::req_dry_run(quiet = TRUE) - - response_body <- test_fixtures[["embeddings"]] - - response_body$data[[1]]$values |> unlist() - } - ) -) diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index df4ba28..514e8dc 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -24,7 +24,7 @@ test_that("getting embeddings", { test_that("writting records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -51,9 +51,7 @@ test_that("writting records", { test_that("finding records", { - Sys.sleep(3) - - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -78,7 +76,7 @@ test_that("finding records", { test_that("reading records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) diff --git a/tests/testthat/test-set_database.R b/tests/testthat/test-set_database.R index f3dee7a..202c244 100644 --- a/tests/testthat/test-set_database.R +++ b/tests/testthat/test-set_database.R @@ -1,11 +1,11 @@ test_that("setting database provider with default namespace", { - + gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("gitai-demo") }) @@ -14,11 +14,11 @@ test_that("setting database provider with custom namepsace", { gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai", namespace = "test_namespace" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("test_namespace") }) From 1e4103d235132a8258689c0beb9a5f96c132633b Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 10:35:21 +0000 Subject: [PATCH 04/10] Change a bit structure of ock, use response_json instead of req_perform, leave other code intact (notably return the resp_body_json). --- R/test-helpers.R | 36 ++++++++++++++++++-------------- tests/testthat/helper-fixtures.R | 1 - 2 files changed, 20 insertions(+), 17 deletions(-) delete mode 100644 tests/testthat/helper-fixtures.R diff --git a/R/test-helpers.R b/R/test-helpers.R index 2153254..59a94b8 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -29,10 +29,10 @@ PineconeMocked <- R6::R6Class( url <- paste0("https://api.pinecone.io/indexes/", private$.index) - httr2::request(url) |> - httr2::req_headers("Api-Key" = pinecone_api_key) |> - httr2::req_dry_run(quiet = TRUE) - test_fixtures[["pinecone_index_response"]] + response <- httr2::response_json( + body = test_fixtures[["pinecone_index_response"]] + ) + httr2::resp_body_json(response) }, write_record = function(id, text, metadata = list()) { @@ -62,10 +62,11 @@ PineconeMocked <- R6::R6Class( ) |> httr2::req_body_json(body) - response <- request |> - httr2::req_dry_run(quiet = TRUE) + response <- httr2::response_json( + body = list("upsertedCount" = 1) + ) - response_body <- list("upsertedCount" = 1) + response_body <- httr2::resp_body_json(response) response_body }, @@ -87,10 +88,11 @@ PineconeMocked <- R6::R6Class( "X-Pinecone-API-Version" = "2024-10" ) - response <- request |> - httr2::req_dry_run(quiet = TRUE) + response <- httr2::response_json( + body = test_fixtures[["read_record"]] + ) - response_body <- test_fixtures[["read_record"]] + response_body <- httr2::resp_body_json(response) results <- response_body$vectors results @@ -120,10 +122,11 @@ PineconeMocked <- R6::R6Class( ) |> httr2::req_body_json(body) - response <- request |> - httr2::req_dry_run(quiet = TRUE) + response <- httr2::response_json( + body = test_fixtures[["matched_records"]] + ) - response_body <- test_fixtures[["matched_records"]] + response_body <- httr2::resp_body_json(response) results <- response_body$matches results |> @@ -159,10 +162,11 @@ PineconeMocked <- R6::R6Class( ) |> httr2::req_body_json(body) - response <- request |> - httr2::req_dry_run(quiet = TRUE) + response <- httr2::response_json( + body = test_fixtures[["embeddings"]] + ) - response_body <- test_fixtures[["embeddings"]] + response_body <- httr2::resp_body_json(response) response_body$data[[1]]$values |> unlist() } diff --git a/tests/testthat/helper-fixtures.R b/tests/testthat/helper-fixtures.R deleted file mode 100644 index 8b13789..0000000 --- a/tests/testthat/helper-fixtures.R +++ /dev/null @@ -1 +0,0 @@ - From a48e62127f1f33379a8dbf36746c3e6409342449 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 10:35:50 +0000 Subject: [PATCH 05/10] Remove test, that would need actual connect to Pinecone. --- tests/testthat/test-Pinecone.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index 514e8dc..7350ff1 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -66,12 +66,6 @@ test_that("finding records", { result[[1]]$metadata$text |> is.character() |> expect_true() result[[1]]$score |> is.numeric() |> expect_true() - result_2 <- db$find_records( - query = "Tell me about apple fruit.", - top_k = 1 - ) - - expect_false(result_2[[1]]$id == result[[1]]$id) }) test_that("reading records", { From 4b35c55338912b8651c655b2f2cf8cd3cd6e8d35 Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 10:41:39 +0000 Subject: [PATCH 06/10] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index afe1d34..905d8ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9012 +Version: 0.0.0.9013 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), From b6fdc4b7c411d59f3df471a905a5d01fe80c503a Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 10:42:06 +0000 Subject: [PATCH 07/10] Add an example workflow. --- inst/example_workflow.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/inst/example_workflow.R b/inst/example_workflow.R index 5076610..d90acec 100644 --- a/inst/example_workflow.R +++ b/inst/example_workflow.R @@ -1,12 +1,15 @@ -# remotes::install_github("r-world-devs/GitStats@devel") - -gitai_demo <- initialize_project("gitai-demo-2") |> - set_database(index = "gitai-mb") |> +gitai_demo <- initialize_project("gitai-tests") |> + set_database(index = "gitai-mb", + namespace = "gitai-demo-2") |> set_github_repos( - orgs = "pharmaverse" + orgs = "r-world-devs" ) |> add_files(files = "\\.md") |> set_llm() |> set_prompt("Provide a one-two sentence description of the product based on input.") process_repos(gitai_demo) + +gitai_demo$db$find_records("Find package with which I can plot data.") + +gitai_demo$db$read_record("GitStats") From 1532e95c5c320d3827c422f3a8b3436c06b943ac Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 14:38:26 +0000 Subject: [PATCH 08/10] Add new method to list record IDs. --- R/Pinecone.R | 154 ++++++++++++++++++++------------- R/test-helpers.R | 47 ++++++++++ tests/testthat/test-Pinecone.R | 13 +++ 3 files changed, 155 insertions(+), 59 deletions(-) diff --git a/R/Pinecone.R b/R/Pinecone.R index 13c86c4..003f369 100644 --- a/R/Pinecone.R +++ b/R/Pinecone.R @@ -8,23 +8,23 @@ Pinecone <- R6::R6Class( pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") url <- paste0("https://api.pinecone.io/indexes/", private$.index) - - httr2::request(url) |> - httr2::req_headers("Api-Key" = pinecone_api_key) |> - httr2::req_perform() |> + + httr2::request(url) |> + httr2::req_headers("Api-Key" = pinecone_api_key) |> + httr2::req_perform() |> httr2::resp_body_json() - }, - + }, + write_record = function(id, text, metadata = list()) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - - url <- paste0("https://", private$.index_host) - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + embeddings <- private$.get_embeddings(text = text) - + metadata$text <- text - + body <- list( namespace = private$.namespace, vectors = list( @@ -33,18 +33,18 @@ Pinecone <- R6::R6Class( metadata = metadata ) ) - - request <- httr2::request(url) |> - httr2::req_url_path_append("vectors/upsert") |> + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) response_body }, @@ -52,9 +52,9 @@ Pinecone <- R6::R6Class( read_record = function(id) { pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + request <- httr2::request(url) |> httr2::req_url_path_append("vectors") |> httr2::req_url_path_append("fetch") |> @@ -65,26 +65,26 @@ Pinecone <- R6::R6Class( httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) - - response <- request |> + ) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$vectors - - results + + results }, - + find_records = function(query, top_k = 1) { - + embeddings <- private$.get_embeddings(query) - + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + body <- list( namespace = private$.namespace, vector = embeddings, @@ -92,7 +92,7 @@ Pinecone <- R6::R6Class( includeValues = FALSE, includeMetadata = TRUE ) - + request <- httr2::request(url) |> httr2::req_url_path_append("query") |> httr2::req_headers( @@ -100,23 +100,59 @@ Pinecone <- R6::R6Class( "X-Pinecone-API-Version" = "2024-10" ) |> httr2::req_body_json(body) - - response <- request |> + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$matches - - results |> + + results |> purrr::map(function(result) { result$values <- NULL result }) + }, + + list_record_IDs = function() { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + response_body <- NULL + has_next_page <- TRUE + record_ids <- c() + + while (has_next_page) { + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace, + paginationToken = response_body$pagination$`next` + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- request |> + httr2::req_perform() + + response_body <- httr2::resp_body_json(response) + record_ids <- c(record_ids, + purrr::map_vec(response_body$vectors, ~ .$id)) + has_next_page <- "pagination" %in% names(response_body) + } + + return(record_ids) } ), active = list( - + namespace = function(value) { if (missing(value)) return(private$.namespace) private$.namespace <- value @@ -127,14 +163,14 @@ Pinecone <- R6::R6Class( private$.index <- value } ), - + private = list( - + .project_id = NULL, .index = NULL, .namespace = NULL, .index_host = NULL, - + .initialize = function(index, namespace) { private$.index <- index @@ -143,37 +179,37 @@ Pinecone <- R6::R6Class( }, .get_embeddings = function(text) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + url <- "https://api.pinecone.io" - + body <- list( model = "multilingual-e5-large", parameters = list( input_type = "passage", truncate = "END" - ), + ), inputs = list( list(text = text) - ) + ) ) - request <- httr2::request(url) |> - httr2::req_url_path_append("embed") |> + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) - + response_body$data[[1]]$values |> unlist() - + } ) ) diff --git a/R/test-helpers.R b/R/test-helpers.R index 59a94b8..3cd5c56 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -134,6 +134,31 @@ PineconeMocked <- R6::R6Class( result$values <- NULL result }) + }, + + list_record_IDs = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- httr2::response_json( + body = test_fixtures[["list_record_IDs"]] + ) + + response_body <- httr2::resp_body_json(response) + + purrr::map_vec(response_body$vectors, ~ .$id) } ), @@ -235,3 +260,25 @@ test_fixtures[["read_record"]] <- list( "namespace" = "gitai-tests", "usage" = list("readUnits" = 1L) ) + +test_fixtures[["list_record_IDs"]] <- list( + "vectors" = list( + list( + "id" = "project_1" + ), + list( + "id" = "project_2" + ), + list( + "id" = "project_3" + ), + list( + "id" = "project_4" + ), + list( + "id" = "project_5" + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index 7350ff1..0fd4296 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -81,3 +81,16 @@ test_that("reading records", { is.character() |> expect_true() }) + +test_that("listing all records IDs", { + + db <- PineconeMocked$new( + namespace = "test_project_id", + index = "gitai" + ) + + result <- db$list_record_IDs() + + expect_type(result, "character") + expect_gt(length(result), 1) +}) From 0186226318838150b8c0fe358977f2027d0705ad Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 14:44:27 +0000 Subject: [PATCH 09/10] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 905d8ba..1d98d80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9013 +Version: 0.0.0.9014 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), From ec757eba6d12bfe67c1fe36d5636edf139217b6e Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 15:21:17 +0000 Subject: [PATCH 10/10] Add method to purge records from namespace. --- DESCRIPTION | 2 +- R/Pinecone.R | 21 +++++++++++++++++++++ inst/example_workflow.R | 9 ++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d98d80..c78c2b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9014 +Version: 0.0.0.9015 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), diff --git a/R/Pinecone.R b/R/Pinecone.R index 003f369..e0e4fbc 100644 --- a/R/Pinecone.R +++ b/R/Pinecone.R @@ -148,6 +148,27 @@ Pinecone <- R6::R6Class( } return(record_ids) + }, + + purge_records = function(ids) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + body <- list( + ids = ids, + namespace = private$.namespace + ) + + httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("delete") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) |> + httr2::req_perform() } ), diff --git a/inst/example_workflow.R b/inst/example_workflow.R index d90acec..bc5ae28 100644 --- a/inst/example_workflow.R +++ b/inst/example_workflow.R @@ -1,6 +1,5 @@ -gitai_demo <- initialize_project("gitai-tests") |> - set_database(index = "gitai-mb", - namespace = "gitai-demo-2") |> +gitai_demo <- initialize_project("gitai-demo-2") |> + set_database(index = "gitai-mb") |> set_github_repos( orgs = "r-world-devs" ) |> @@ -13,3 +12,7 @@ process_repos(gitai_demo) gitai_demo$db$find_records("Find package with which I can plot data.") gitai_demo$db$read_record("GitStats") + +record_ids <- gitai_demo$db$list_record_IDs() + +gitai_demo$db$purge_records(record_ids)