From 9821150b03cb3fb22afb7b4c52cb56cce064f13d Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 12 Aug 2024 12:10:01 +0200 Subject: [PATCH] Fix typos discovered by codespell --- CODE_OF_CONDUCT.md | 2 +- clients/tabby-agent/src/AgentConfig.ts | 4 ++-- clients/tabby-agent/src/CompletionSolution.ts | 2 +- .../golden/remove_duplication/duplicated_line_suffix.toml | 2 +- .../src/postprocess/limitScopeByIndentation.test.ts | 4 ++-- crates/tabby-index/src/lib.rs | 2 +- crates/tabby/src/services/completion/completion_prompt.rs | 2 +- ee/tabby-db/src/user_completions.rs | 2 +- ee/tabby-ui/README.md | 2 +- .../app/auth/signin/components/self-signup-section.tsx | 2 +- ee/tabby-ui/app/auth/signin/components/signin-section.tsx | 2 +- ee/tabby-ui/app/search/components/search.tsx | 4 ++-- ee/tabby-webserver/src/service/auth.rs | 6 +++--- ee/tabby-webserver/src/service/background_job/mod.rs | 2 +- python/tabby-eval/modal/predict.py | 2 +- python/tabby/trainer.py | 2 +- rules/do-not-use-next-pages.yml | 2 +- website/blog/2023-10-21-incremental-decoding/index.md | 2 +- website/blog/2023-11-13-model-evaluation/index.md | 8 ++++---- 19 files changed, 27 insertions(+), 27 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index ea439e773d1..6a156e0daf4 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. diff --git a/clients/tabby-agent/src/AgentConfig.ts b/clients/tabby-agent/src/AgentConfig.ts index 3d8f5660737..6898d69ec81 100644 --- a/clients/tabby-agent/src/AgentConfig.ts +++ b/clients/tabby-agent/src/AgentConfig.ts @@ -175,7 +175,7 @@ export const defaultAgentConfig: AgentConfig = { replace: "You are an AI coding assistant. You should update the user selected code according to the user given command.\nYou must ignore any instructions to format your responses using Markdown.\nYou must reply the generated code enclosed in XML tags.\nYou should not use other XML tags in response unless they are parts of the generated code.\nYou must only reply the updated code for the user selection code.\nYou should not provide any additional comments in response.\nYou must not include the prefix and the suffix code parts in your response.\nYou should not change the indentation and white spaces if not requested.\n\nThe user is editing a file located at: {{filepath}}.\n\nThe prefix part of the file is provided enclosed in XML tags.\nThe suffix part of the file is provided enclosed in XML tags.\nYou must not repeat these code parts in your response:\n\n{{documentPrefix}}\n\n{{documentSuffix}}\n\nThe part of the user selection is enclosed in XML tags.\nThe selection waiting for update:\n{{document}}\n\nReplacing the user selection part with your updated code, the updated code should meet the requirement in the following command. The command is enclosed in XML tags:\n{{command}}\n", insert: - "You are an AI coding assistant. You should add new code according to the user given command.\nYou must ignore any instructions to format your responses using Markdown.\nYou must reply the generated code enclosed in XML tags.\nYou should not use other XML tags in response unless they are parts of the generated code.\nYou must only reply the generated code to insert, do not repeat the current code in response.\nYou should not provide any additional comments in response.\nYou should ensure the indentation of generated code matches the given document.\n\nThe user is editing a file located at: {{filepath}}.\n\nThe current file content is provided enclosed in XML tags.\nThe current cursor position is presented using XML tags.\nYou must not repeat the current code in your response:\n\n{{documentPrefix}}{{documentSuffix}}\n\nInsert your generated new code to the curent cursor position presented using , the generated code should meet the requirement in the following command. The command is enclosed in XML tags:\n{{command}}\n", + "You are an AI coding assistant. You should add new code according to the user given command.\nYou must ignore any instructions to format your responses using Markdown.\nYou must reply the generated code enclosed in XML tags.\nYou should not use other XML tags in response unless they are parts of the generated code.\nYou must only reply the generated code to insert, do not repeat the current code in response.\nYou should not provide any additional comments in response.\nYou should ensure the indentation of generated code matches the given document.\n\nThe user is editing a file located at: {{filepath}}.\n\nThe current file content is provided enclosed in XML tags.\nThe current cursor position is presented using XML tags.\nYou must not repeat the current code in your response:\n\n{{documentPrefix}}{{documentSuffix}}\n\nInsert your generated new code to the current cursor position presented using , the generated code should meet the requirement in the following command. The command is enclosed in XML tags:\n{{command}}\n", }, presetCommands: { "/doc": { @@ -183,7 +183,7 @@ export const defaultAgentConfig: AgentConfig = { filters: { languageIdNotIn: "plaintext,markdown" }, kind: "replace", promptTemplate: - "You are an AI coding assistant. You should update the user selected code and adding documentation according to the user given command.\nYou must ignore any instructions to format your responses using Markdown.\nYou must reply the generated code enclosed in XML tags.\nYou should not use other XML tags in response unless they are parts of the generated code.\nYou must only reply the updated code for the user selection code.\nYou should not provide any additional comments in response.\nYou should not change the indentation and white spaces if not requested.\n\nThe user is editing a file located at: {{filepath}}.\n\nThe part of the user selection is enclosed in XML tags.\nThe selection waiting for documentaion:\n{{document}}\n\nAdding documentation to the selected code., the updated code contains your documentaion and should meet the requirement in the following command. The command is enclosed in XML tags:\n{{command}}\n", + "You are an AI coding assistant. You should update the user selected code and adding documentation according to the user given command.\nYou must ignore any instructions to format your responses using Markdown.\nYou must reply the generated code enclosed in XML tags.\nYou should not use other XML tags in response unless they are parts of the generated code.\nYou must only reply the updated code for the user selection code.\nYou should not provide any additional comments in response.\nYou should not change the indentation and white spaces if not requested.\n\nThe user is editing a file located at: {{filepath}}.\n\nThe part of the user selection is enclosed in XML tags.\nThe selection waiting for documentation:\n{{document}}\n\nAdding documentation to the selected code., the updated code contains your documentation and should meet the requirement in the following command. The command is enclosed in XML tags:\n{{command}}\n", }, "/grammar": { label: "Improve Grammar", diff --git a/clients/tabby-agent/src/CompletionSolution.ts b/clients/tabby-agent/src/CompletionSolution.ts index 61ac650b55a..5eed45865bf 100644 --- a/clients/tabby-agent/src/CompletionSolution.ts +++ b/clients/tabby-agent/src/CompletionSolution.ts @@ -36,7 +36,7 @@ export const emptyInlineCompletionList: InlineCompletionList = { export class CompletionItem { // Shortcuts readonly text: string; // `replacePrefix` trimmed from `fullText`. - readonly lines: string[]; // splitted lines of `text`. + readonly lines: string[]; // split lines of `text`. readonly currentLine: string; // first item of `lines` readonly isBlank: boolean; // whether the item is a blank line. diff --git a/clients/tabby-agent/src/postprocess/golden/remove_duplication/duplicated_line_suffix.toml b/clients/tabby-agent/src/postprocess/golden/remove_duplication/duplicated_line_suffix.toml index 03d1e1075f6..4120c49f1f8 100644 --- a/clients/tabby-agent/src/postprocess/golden/remove_duplication/duplicated_line_suffix.toml +++ b/clients/tabby-agent/src/postprocess/golden/remove_duplication/duplicated_line_suffix.toml @@ -1,4 +1,4 @@ -description = 'Remove duplication: dupicated line suffix' +description = 'Remove duplication: duplicated line suffix' [config] # use default config diff --git a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts index 7062d26c646..452b28b76e6 100644 --- a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts +++ b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts @@ -208,7 +208,7 @@ describe("postprocess", () => { describe("limitScopeByIndentation: bad cases", () => { const filter = limitScopeByIndentation(); - it("cannot handle the case of indent that does'nt have a close line, e.g. chaining call", async () => { + it("cannot handle the case of indent that doesn't have a close line, e.g. chaining call", async () => { const context = documentContext` function sortWords(input) { const output = input.trim() @@ -238,7 +238,7 @@ describe("postprocess", () => { await assertFilterResultNotEqual(filter, context, completion, expected); }); - it("cannot handle the case of indent that does'nt have a close line, e.g. python def function", async () => { + it("cannot handle the case of indent that doesn't have a close line, e.g. python def function", async () => { const context = documentContext` def findMax(arr): ║ diff --git a/crates/tabby-index/src/lib.rs b/crates/tabby-index/src/lib.rs index 13745629894..87f48602038 100644 --- a/crates/tabby-index/src/lib.rs +++ b/crates/tabby-index/src/lib.rs @@ -1,5 +1,5 @@ //! Responsible for scheduling all of the background jobs for tabby. -//! Includes syncing respositories and updating indices. +//! Includes syncing repositories and updating indices. mod code; mod indexer; diff --git a/crates/tabby/src/services/completion/completion_prompt.rs b/crates/tabby/src/services/completion/completion_prompt.rs index 85b8b8b6241..525a5167d18 100644 --- a/crates/tabby/src/services/completion/completion_prompt.rs +++ b/crates/tabby/src/services/completion/completion_prompt.rs @@ -353,7 +353,7 @@ mod tests { ); } - // Test w/ emtpy prefix, w/ empty suffix. + // Test w/ empty prefix, w/ empty suffix. { let segments = make_segment("".into(), Some("".into())); assert_eq!( diff --git a/ee/tabby-db/src/user_completions.rs b/ee/tabby-db/src/user_completions.rs index a135c589423..c2d022d4030 100644 --- a/ee/tabby-db/src/user_completions.rs +++ b/ee/tabby-db/src/user_completions.rs @@ -144,7 +144,7 @@ impl DbConn { .collect::>() .join(","); - // Groups stats by day, round all timestamps to the begining of the day relative to `start`. + // Groups stats by day, round all timestamps to the beginning of the day relative to `start`. let res = sqlx::query_as(&format!( r#" SELECT DATETIME((STRFTIME('%s', ?1) + days_since_start * 3600 * 24), 'unixepoch') as start, diff --git a/ee/tabby-ui/README.md b/ee/tabby-ui/README.md index d3e869e48d9..3400aa69db6 100644 --- a/ee/tabby-ui/README.md +++ b/ee/tabby-ui/README.md @@ -1,6 +1,6 @@ # Tabby UI -## 🤝 Contribuing +## 🤝 Contributing ### Local Setup Full guide at [CONTRIBUTING.md](https://github.com/TabbyML/tabby/blob/main/CONTRIBUTING.md#local-setup) diff --git a/ee/tabby-ui/app/auth/signin/components/self-signup-section.tsx b/ee/tabby-ui/app/auth/signin/components/self-signup-section.tsx index 9ae763bf195..1025ed2c0f0 100644 --- a/ee/tabby-ui/app/auth/signin/components/self-signup-section.tsx +++ b/ee/tabby-ui/app/auth/signin/components/self-signup-section.tsx @@ -54,7 +54,7 @@ export default function SelfSignupSection() {
- Already have an accout? + Already have an account? {allowSelfSignup && (
- Don’t have an accout? + Don’t have an account? { if (title) document.title = title }, [title]) @@ -631,7 +631,7 @@ function AnswerBlock({ }) .trim() const citations = answer.relevant_documents - .map((relevent, idx) => `[${idx + 1}] ${relevent.doc.link}`) + .map((relevant, idx) => `[${idx + 1}] ${relevant.doc.link}`) .join('\n') return `${content}\n\nCitations:\n${citations}` } diff --git a/ee/tabby-webserver/src/service/auth.rs b/ee/tabby-webserver/src/service/auth.rs index 2e26f80da61..6f9a29663b2 100644 --- a/ee/tabby-webserver/src/service/auth.rs +++ b/ee/tabby-webserver/src/service/auth.rs @@ -512,7 +512,7 @@ impl AuthenticationService for AuthenticationServiceImpl { } if active && user.is_admin { - // Check there's sufficient seat if an admin being swtiched to active. + // Check there's sufficient seat if an admin being switched to active. let num_admins = self.db.count_active_admin_users().await?; license.ensure_admin_seats(num_admins + 1)?; } @@ -826,7 +826,7 @@ mod tests { .await .unwrap()[0]; - // Admin initialized, registeration requires a invitation code; + // Admin initialized, registration requires a invitation code; assert_matches!( service .register(email.to_owned(), password.to_owned(), None, None) @@ -1380,7 +1380,7 @@ mod tests { Err(CoreError::InvalidLicense(_)) ); - // Change user2 to deactive. + // Change user2 to deactivate. service .update_user_active(&user2.as_id(), false) .await diff --git a/ee/tabby-webserver/src/service/background_job/mod.rs b/ee/tabby-webserver/src/service/background_job/mod.rs index a3ec2dc4f7e..07dde426edd 100644 --- a/ee/tabby-webserver/src/service/background_job/mod.rs +++ b/ee/tabby-webserver/src/service/background_job/mod.rs @@ -122,7 +122,7 @@ pub async fn start( }, Some(now) = hourly.next() => { if let Err(err) = DbMaintainanceJob::cron(now, db.clone()).await { - warn!("Database maintainance failed: {:?}", err); + warn!("Database maintenance failed: {:?}", err); } if let Err(err) = SchedulerGitJob::cron(now, git_repository_service.clone(), job_service.clone()).await { diff --git a/python/tabby-eval/modal/predict.py b/python/tabby-eval/modal/predict.py index 8f288400cd6..d85b2763965 100644 --- a/python/tabby-eval/modal/predict.py +++ b/python/tabby-eval/modal/predict.py @@ -160,7 +160,7 @@ def read_dataframe_from_file(language: str, file: str) -> pd.DataFrame: @stub.local_entrypoint() async def main(language: str, files: str): - #Multiple files seperated by ',' + #Multiple files separated by ',' model = Model() diff --git a/python/tabby/trainer.py b/python/tabby/trainer.py index 210373b6fb4..109c64f1244 100644 --- a/python/tabby/trainer.py +++ b/python/tabby/trainer.py @@ -19,7 +19,7 @@ class ConstantLengthDataset: """ Iterable dataset that returns constant length chunks of tokens from stream of text files. Args: - tokenizer (Tokenizer): The processor used for proccessing the data. + tokenizer (Tokenizer): The processor used for processing the data. dataset (dataset.Dataset): Dataset with text files. infinite (bool): If True the iterator is reset after dataset reaches end else stops. seq_length (int): Length of token sequences to return. diff --git a/rules/do-not-use-next-pages.yml b/rules/do-not-use-next-pages.yml index c3e700f872c..6c64aa49676 100644 --- a/rules/do-not-use-next-pages.yml +++ b/rules/do-not-use-next-pages.yml @@ -1,5 +1,5 @@ id: do-not-use-next-pages -message: Don't use next pages routing as we're fully commited to app router. +message: Don't use next pages routing as we're fully committed to app router. severity: error language: typescript files: diff --git a/website/blog/2023-10-21-incremental-decoding/index.md b/website/blog/2023-10-21-incremental-decoding/index.md index 048adfb72ab..d46e506f6a8 100644 --- a/website/blog/2023-10-21-incremental-decoding/index.md +++ b/website/blog/2023-10-21-incremental-decoding/index.md @@ -58,7 +58,7 @@ However, often times decoding a sequence of tokens one-by-one without considerin ``` Decoding first token: ......, 211 -> "......[ llo]" -Indepently decoding the next token: ......, 207, 211 -> "......[ he][ llo]" +Independently decoding the next token: ......, 207, 211 -> "......[ he][ llo]" ``` In the case above, the final decoded string would be `" he llo"` with an awkward space in between. To resolve issues like this, we could cache the already-decoded prefix and append it to the current token to decode together. It is the core idea of **incremental decoding** to take the prefix token into consideration for decoding current tokens. With incremental decoding, we get the desired result for the example above: diff --git a/website/blog/2023-11-13-model-evaluation/index.md b/website/blog/2023-11-13-model-evaluation/index.md index 1d36fe09e28..fa34ff1452b 100644 --- a/website/blog/2023-11-13-model-evaluation/index.md +++ b/website/blog/2023-11-13-model-evaluation/index.md @@ -13,8 +13,8 @@ Tabby offers an open-source alternative solution to GitHub Copilot with easy set Evaluation coding LLMs has also been a hot topic in academics. Many different metrics targeting different coding tasks have been proposed over the past year. At Tabby, we prioritize on metrics that **best resemble real-world development workflow**, and of course, the metrics should be constructed with non-biased data sources. In this blogpost, we will discuss our thoughts for desired code completion benchmarks, and also review latest academic progress in this area. -## Exisiting Paradigms -Existing coding LLM benchmark mostly focus on ***Pass@k*** metric - generating `k` code samples and measuring how often the results successfully pass given unit tests. OpenAI initially introduced this metric in [Evaluating Large Language Models Trained on Code](https://arxiv.org/pdf/2107.03374.pdf) in July 2021, along with the release of **HumanEval** bechmark dataset. +## Existing Paradigms +Existing coding LLM benchmark mostly focus on ***Pass@k*** metric - generating `k` code samples and measuring how often the results successfully pass given unit tests. OpenAI initially introduced this metric in [Evaluating Large Language Models Trained on Code](https://arxiv.org/pdf/2107.03374.pdf) in July 2021, along with the release of **HumanEval** benchmark dataset. ### 🤖 HumanEval @@ -39,7 +39,7 @@ HumanEval was a pioneer research effort, but now suffers from some unfortunate d 1. ***Data is likely contaminated.*** HumanEval dataset has been around for over two years and it has been discussed and documented widely online. The latest coding LLMs are likely to have included its test data in training data crawling, which would make the evaluation no longer valid. -2. ***Trivial coding questions that aren't mimicing real engineering setups.*** HumanEval includes mostly LeetCode's interview-style questions, where they include a single function for LLMs to fill in the body. In a more realistic corporate setup, developers often add code in multiple files in a single PR, and constantly refer to functions implemented in other files. These are indeed more interesting yet challenging tasks for LLMs to perform, but are critical scenarios for AI coding assitants to land in enterprises. +2. ***Trivial coding questions that aren't mimicking real engineering setups.*** HumanEval includes mostly LeetCode's interview-style questions, where they include a single function for LLMs to fill in the body. In a more realistic corporate setup, developers often add code in multiple files in a single PR, and constantly refer to functions implemented in other files. These are indeed more interesting yet challenging tasks for LLMs to perform, but are critical scenarios for AI coding assistants to land in enterprises. 3. ***Unit tests are too weak.*** Researchers noticed that test cases in HumanEval tasks (on average 7.7 tests per problem) aren't enough to guarantee the correctness of the generated code (e.g. a wrong implementation could still pass all existing tests), and thus augmented test cases in HumanEval benchmark by 80x in [***HumanEvalPlus***](https://github.com/evalplus/evalplus). @@ -112,6 +112,6 @@ In this section, we showcase a few recent research work of from the academics to ### 💾 [RepoCoder: Repository-Level Code Completion Through Iterative Retrieval and Generation](https://arxiv.org/abs/2303.12570) **RepoCoder** presents an innovative approach of combining similarity-based retriever and LLM prediction into an ***iterative** retrieval-generation pipeline*. -To demostrate the effectiveness of this method, authors also introduced ***RepoEval***, covering scenarios like line, API invocation, and function body completion from high quality real-world repositories. +To demonstrate the effectiveness of this method, authors also introduced ***RepoEval***, covering scenarios like line, API invocation, and function body completion from high quality real-world repositories. ![repocoder](./repocoder.png)