diff --git a/ee/tabby-db/src/lib.rs b/ee/tabby-db/src/lib.rs index fc91d0775ec..b4e10f9ceea 100644 --- a/ee/tabby-db/src/lib.rs +++ b/ee/tabby-db/src/lib.rs @@ -21,6 +21,7 @@ use user_completions::UserCompletionDailyStatsDAO; pub use user_events::UserEventDAO; pub use users::UserDAO; pub use web_crawler::WebCrawlerUrlDAO; +pub use web_documents::WebDocumentDAO; pub mod cache; mod email_setting; diff --git a/ee/tabby-schema/graphql/schema.graphql b/ee/tabby-schema/graphql/schema.graphql index 667c3e145b9..4c570ca87f3 100644 --- a/ee/tabby-schema/graphql/schema.graphql +++ b/ee/tabby-schema/graphql/schema.graphql @@ -99,6 +99,11 @@ input CodeSearchParamsOverrideInput { numToScore: Int } +input CreateCustomDocumentInput { + name: String! + url: String! +} + input CreateIntegrationInput { displayName: String! accessToken: String! @@ -182,6 +187,11 @@ input SecuritySettingInput { disableClientSideTelemetry: Boolean! } +input SetPresetDocumentActiveInput { + name: String! + active: Boolean! +} + input ThreadRunDebugOptionsInput { codeSearchParamsOverride: CodeSearchParamsOverrideInput = null } @@ -232,6 +242,24 @@ type CompletionStats { selects: Int! } +type CustomDocumentConnection { + edges: [CustomDocumentEdge!]! + pageInfo: PageInfo! +} + +type CustomDocumentEdge { + node: CustomWebDocument! + cursor: String! +} + +type CustomWebDocument { + url: String! + name: String! + id: ID! + createdAt: DateTime! + jobInfo: JobInfo! +} + type DiskUsage { filepath: [String!]! "Size in kilobytes." @@ -475,6 +503,9 @@ type Mutation { triggerJobRun(command: String!): ID! createWebCrawlerUrl(input: CreateWebCrawlerUrlInput!): ID! deleteWebCrawlerUrl(id: ID!): Boolean! + createCustomDocument(input: CreateCustomDocumentInput!): ID! + deleteCustomDocument(id: ID!): Boolean! + setPresetDocumentActive(input: SetPresetDocumentActiveInput!): ID! } type NetworkSetting { @@ -495,6 +526,24 @@ type PageInfo { endCursor: String } +type PresetDocumentConnection { + edges: [PresetDocumentEdge!]! + pageInfo: PageInfo! +} + +type PresetDocumentEdge { + node: PresetWebDocument! + cursor: String! +} + +type PresetWebDocument { + name: String! + id: ID! + active: Boolean! + updatedAt: DateTime + jobInfo: JobInfo +} + type ProvidedRepository { id: ID! integrationId: ID! @@ -566,6 +615,8 @@ type Query { Thread is public within an instance, so no need to check for ownership. """ threadMessages(threadId: ID!, after: String, before: String, first: Int, last: Int): MessageConnection! + customWebDocuments(after: String, before: String, first: Int, last: Int): CustomDocumentConnection! + presetWebDocuments(after: String, before: String, first: Int, last: Int, active: Boolean!): PresetDocumentConnection! } type RefreshTokenResponse { diff --git a/ee/tabby-schema/src/schema/constants.rs b/ee/tabby-schema/src/schema/constants.rs index 66270f823e2..98c9f227edb 100644 --- a/ee/tabby-schema/src/schema/constants.rs +++ b/ee/tabby-schema/src/schema/constants.rs @@ -5,6 +5,7 @@ lazy_static! { pub static ref REPOSITORY_NAME_REGEX: Regex = Regex::new("^[a-zA-Z][\\w.-]+$").unwrap(); pub static ref USERNAME_REGEX: Regex = Regex::new(r"^[^0-9±!@£$%^&*_+§¡€#¢¶•ªº«\\/<>?:;|=.,]{2,20}$").unwrap(); + pub static ref WEB_DOCUMENT_NAME_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9\ ]*$").unwrap(); } #[cfg(test)] @@ -40,4 +41,22 @@ mod tests { assert_eq!(result, expected, "Failed for name: {}", name); } } + + #[test] + fn test_web_document_name_regex() { + let test_cases = vec![ + ("John", true), // English name + ("Müller", false), // German name + ("abc123", true), + ("Abc 123", true), + (" abc 123", false), + ("abc123*", false), + ("abc123_", false), + ]; + + for (name, expected) in test_cases { + let result = WEB_DOCUMENT_NAME_REGEX.is_match(name); + assert_eq!(result, expected, "Failed for name: {}", name); + } + } } diff --git a/ee/tabby-schema/src/schema/mod.rs b/ee/tabby-schema/src/schema/mod.rs index 6d396d14be8..fe39b351ec3 100644 --- a/ee/tabby-schema/src/schema/mod.rs +++ b/ee/tabby-schema/src/schema/mod.rs @@ -10,6 +10,7 @@ pub mod setting; pub mod thread; pub mod user_event; pub mod web_crawler; +pub mod web_documents; pub mod worker; use std::sync::Arc; @@ -51,7 +52,9 @@ use self::{ }, user_event::{UserEvent, UserEventService}, web_crawler::{CreateWebCrawlerUrlInput, WebCrawlerService, WebCrawlerUrl}, + web_documents::{CreateCustomDocumentInput, CustomWebDocument, WebDocumentService}, }; +use crate::web_documents::{PresetWebDocument, SetPresetDocumentActiveInput}; use crate::{ env, juniper::relay::{self, query_async, Connection}, @@ -71,6 +74,7 @@ pub trait ServiceLocator: Send + Sync { fn analytic(&self) -> Arc; fn user_event(&self) -> Arc; fn web_crawler(&self) -> Arc; + fn web_documents(&self) -> Arc; fn thread(&self) -> Arc; } @@ -564,6 +568,50 @@ impl Query { ) .await } + + async fn custom_web_documents( + ctx: &Context, + after: Option, + before: Option, + first: Option, + last: Option, + ) -> Result> { + query_async( + after, + before, + first, + last, + |after, before, first, last| async move { + ctx.locator + .web_documents() + .list_custom_web_documents(after, before, first, last) + .await + }, + ) + .await + } + async fn preset_web_documents( + ctx: &Context, + after: Option, + before: Option, + first: Option, + last: Option, + active: bool, + ) -> Result> { + query_async( + after, + before, + first, + last, + |after, before, first, last| async move { + ctx.locator + .web_documents() + .list_preset_web_documents(after, before, first, last, active) + .await + }, + ) + .await + } } #[derive(GraphQLObject)] @@ -916,6 +964,37 @@ impl Mutation { ctx.locator.web_crawler().delete_web_crawler_url(id).await?; Ok(true) } + + async fn create_custom_document(ctx: &Context, input: CreateCustomDocumentInput) -> Result { + input.validate()?; + let id = ctx + .locator + .web_documents() + .create_custom_web_document(input.name, input.url) + .await?; + Ok(id) + } + + async fn delete_custom_document(ctx: &Context, id: ID) -> Result { + ctx.locator + .web_documents() + .delete_custom_web_document(id) + .await?; + Ok(true) + } + + async fn set_preset_document_active( + ctx: &Context, + input: SetPresetDocumentActiveInput, + ) -> Result { + input.validate()?; + let id = ctx + .locator + .web_documents() + .set_preset_web_documents_active(input.name, input.active) + .await?; + Ok(id) + } } async fn check_analytic_access(ctx: &Context, users: &[ID]) -> Result<(), CoreError> { diff --git a/ee/tabby-schema/src/schema/web_documents.rs b/ee/tabby-schema/src/schema/web_documents.rs new file mode 100644 index 00000000000..523016595d7 --- /dev/null +++ b/ee/tabby-schema/src/schema/web_documents.rs @@ -0,0 +1,115 @@ +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use juniper::{GraphQLInputObject, GraphQLObject, ID}; +use validator::Validate; + +use crate::{job::JobInfo, juniper::relay::NodeType, Context, Result}; + +#[derive(GraphQLObject)] +#[graphql(context = Context)] +pub struct CustomWebDocument { + pub url: String, + pub name: String, + pub id: ID, + pub created_at: DateTime, + pub job_info: JobInfo, +} + +#[derive(GraphQLObject)] +#[graphql(context = Context)] +pub struct PresetWebDocument { + pub name: String, + pub id: ID, + pub active: bool, + /// `updated_at` is only filled when the preset is active. + pub updated_at: Option>, + pub job_info: Option, +} + +impl CustomWebDocument { + pub fn source_id(&self) -> String { + Self::format_source_id(&self.id) + } + + pub fn format_source_id(id: &ID) -> String { + format!("web_document:{}", id) + } +} + +#[derive(Validate, GraphQLInputObject)] +pub struct CreateCustomDocumentInput { + #[validate(regex( + code = "name", + path = "*crate::schema::constants::WEB_DOCUMENT_NAME_REGEX", + message = "Invalid document name" + ))] + pub name: String, + #[validate(url(code = "url", message = "Invalid URL"))] + pub url: String, +} + +#[derive(Validate, GraphQLInputObject)] +pub struct SetPresetDocumentActiveInput { + #[validate(regex( + code = "name", + path = "*crate::schema::constants::WEB_DOCUMENT_NAME_REGEX", + message = "Invalid document name" + ))] + pub name: String, + pub active: bool, +} + +impl NodeType for CustomWebDocument { + type Cursor = String; + + fn cursor(&self) -> Self::Cursor { + self.id.to_string() + } + + fn connection_type_name() -> &'static str { + "CustomDocumentConnection" + } + + fn edge_type_name() -> &'static str { + "CustomDocumentEdge" + } +} + +impl NodeType for PresetWebDocument { + type Cursor = String; + + fn cursor(&self) -> Self::Cursor { + self.name.clone() + } + + fn connection_type_name() -> &'static str { + "PresetDocumentConnection" + } + + fn edge_type_name() -> &'static str { + "PresetDocumentEdge" + } +} + +#[async_trait] +pub trait WebDocumentService: Send + Sync { + async fn list_custom_web_documents( + &self, + after: Option, + before: Option, + first: Option, + last: Option, + ) -> Result>; + + async fn create_custom_web_document(&self, name: String, url: String) -> Result; + async fn delete_custom_web_document(&self, id: ID) -> Result<()>; + async fn list_preset_web_documents( + &self, + after: Option, + before: Option, + first: Option, + last: Option, + active: bool, + ) -> Result>; + async fn set_preset_web_documents_active(&self, name: String, active: bool) -> Result; +} diff --git a/ee/tabby-webserver/src/service/mod.rs b/ee/tabby-webserver/src/service/mod.rs index f9e5c10def3..e36a8c33ca0 100644 --- a/ee/tabby-webserver/src/service/mod.rs +++ b/ee/tabby-webserver/src/service/mod.rs @@ -12,6 +12,7 @@ mod setting; mod thread; mod user_event; pub mod web_crawler; +pub mod web_documents; use std::sync::Arc; @@ -43,6 +44,7 @@ use tabby_schema::{ thread::ThreadService, user_event::UserEventService, web_crawler::WebCrawlerService, + web_documents::WebDocumentService, worker::WorkerService, AsID, AsRowid, CoreError, Result, ServiceLocator, }; @@ -60,6 +62,7 @@ struct ServerContext { user_event: Arc, job: Arc, web_crawler: Arc, + web_documents: Arc, thread: Arc, logger: Arc, @@ -77,6 +80,7 @@ impl ServerContext { repository: Arc, integration: Arc, web_crawler: Arc, + web_documents: Arc, job: Arc, answer: Option>, db_conn: DbConn, @@ -105,6 +109,7 @@ impl ServerContext { setting.clone(), )), web_crawler, + web_documents, thread, license, repository, @@ -260,6 +265,10 @@ impl ServiceLocator for ArcServerContext { self.0.web_crawler.clone() } + fn web_documents(&self) -> Arc { + self.0.web_documents.clone() + } + fn thread(&self) -> Arc { self.0.thread.clone() } @@ -271,6 +280,7 @@ pub async fn create_service_locator( repository: Arc, integration: Arc, web_crawler: Arc, + web_documents: Arc, job: Arc, answer: Option>, db: DbConn, @@ -283,6 +293,7 @@ pub async fn create_service_locator( repository, integration, web_crawler, + web_documents, job, answer, db, diff --git a/ee/tabby-webserver/src/service/web_documents.rs b/ee/tabby-webserver/src/service/web_documents.rs new file mode 100644 index 00000000000..8e1086e0f13 --- /dev/null +++ b/ee/tabby-webserver/src/service/web_documents.rs @@ -0,0 +1,57 @@ +use std::sync::Arc; + +use super::{background_job::BackgroundJobEvent, graphql_pagination_to_filter}; +use async_trait::async_trait; +use juniper::ID; +use tabby_db::{DbConn, WebDocumentDAO}; +use tabby_schema::web_documents::PresetWebDocument; +use tabby_schema::{ + job::{JobInfo, JobService}, + web_documents::{CustomWebDocument, WebDocumentService}, + AsID, AsRowid, Result, +}; + +pub fn create(db: DbConn, job_service: Arc) -> impl WebDocumentService { + WebDocumentServiceImpl { db, job_service } +} + +struct WebDocumentServiceImpl { + db: DbConn, + job_service: Arc, +} + +#[async_trait] +impl WebDocumentService for WebDocumentServiceImpl { + async fn list_custom_web_documents( + &self, + after: Option, + before: Option, + first: Option, + last: Option, + ) -> Result> { + Ok(vec![]) + } + + async fn create_custom_web_document(&self, name: String, url: String) -> Result { + Ok(ID::new("0")) + } + + async fn delete_custom_web_document(&self, id: ID) -> Result<()> { + Ok(()) + } + + async fn list_preset_web_documents( + &self, + after: Option, + before: Option, + first: Option, + last: Option, + active: bool, + ) -> Result> { + Ok(vec![]) + } + + async fn set_preset_web_documents_active(&self, name: String, active: bool) -> Result { + Ok(ID::new("0")) + } +} diff --git a/ee/tabby-webserver/src/webserver.rs b/ee/tabby-webserver/src/webserver.rs index 8fd3ae3739d..3b1875f2b63 100644 --- a/ee/tabby-webserver/src/webserver.rs +++ b/ee/tabby-webserver/src/webserver.rs @@ -1,5 +1,13 @@ use std::sync::Arc; +use crate::{ + path::db_file, + routes, + service::{ + background_job, create_service_locator, event_logger::create_event_logger, integration, + job, repository, web_crawler, web_documents, + }, +}; use axum::Router; use tabby_common::{ api::{ @@ -11,26 +19,19 @@ use tabby_common::{ }; use tabby_db::DbConn; use tabby_inference::{ChatCompletionStream, Embedding}; +use tabby_schema::web_documents::WebDocumentService; use tabby_schema::{ integration::IntegrationService, job::JobService, repository::RepositoryService, web_crawler::WebCrawlerService, }; -use crate::{ - path::db_file, - routes, - service::{ - background_job, create_service_locator, event_logger::create_event_logger, integration, - job, repository, web_crawler, - }, -}; - pub struct Webserver { db: DbConn, logger: Arc, repository: Arc, integration: Arc, web_crawler: Arc, + web_documents: Arc, job: Arc, } @@ -61,6 +62,7 @@ impl Webserver { let repository = repository::create(db.clone(), integration.clone(), job.clone()); let web_crawler = Arc::new(web_crawler::create(db.clone(), job.clone())); + let web_documents = Arc::new(web_documents::create(db.clone(), job.clone())); let logger2 = create_event_logger(db.clone()); let logger = Arc::new(ComposedLogger::new(logger1, logger2)); @@ -70,6 +72,7 @@ impl Webserver { repository: repository.clone(), integration: integration.clone(), web_crawler: web_crawler.clone(), + web_documents: web_documents.clone(), job: job.clone(), }); @@ -121,6 +124,7 @@ impl Webserver { self.repository.clone(), self.integration.clone(), self.web_crawler.clone(), + self.web_documents.clone(), self.job.clone(), answer.clone(), self.db.clone(),