Updated documentation and default CLI.

souradipp76 · Nov 25, 2024 · d02eab1 · d02eab1
1 parent 01fb5e2
commit d02eab1
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -80,10 +80,10 @@ from readme_ready.types import (
 model = LLMModels.LLAMA2_7B_CHAT_GPTQ # Choose model from supported models
 
 repo_config = AutodocRepoConfig (
-    name = "<NAME>", # Replace <NAME>
-    root = "<PROJECT_ROOT>", # Replace <PROJECT_ROOT>
-    repository_url = "<PROJECT_URL>", # Replace <PROJECT_URL>
-    output = "<OUTPUT_DIR>", # Replace <OUTPUT_DIR>
+    name = "<REPOSITORY_NAME>", # Replace <REPOSITORY_NAME>
+    root = "<REPOSITORY_ROOT_DIR_PATH>", # Replace <REPOSITORY_ROOT_DIR_PATH>
+    repository_url = "<REPOSITORY_URL>", # Replace <REPOSITORY_URL>
+    output = "<OUTPUT_DIR_PATH>", # Replace <OUTPUT_DIR_PATH>
     llms = [model],
     peft_model_path = "<PEFT_MODEL_NAME_OR_PATH>", # Replace <PEFT_MODEL_NAME_OR_PATH>
     ignore = [
@@ -116,6 +116,7 @@ user_config = AutodocUserConfig(
 )
 
 readme_config = AutodocReadmeConfig(
+    # Set comma separated list of README headings
     headings = "Description,Requirements,Installation,Usage,Contributing,License"
 )
 
@@ -125,13 +126,31 @@ query.generate_readme(repo_config, user_config, readme_config)
 
 Run the sample script in the `examples/example.py` to see a typical code usage.
 
+See detailed API references [here](https://souradipp76.github.io/ReadMeReady/reference/).
+
 ### Finetuning
 
 For finetuning on custom datasets, follow the instructions below.
 
 - Run the notebook file `scripts/data.ipynb` and follow the instructions in the file to generate custom dataset from open-source repositories.
 - Run the notebook file `scripts/fine-tuning-with-llama2-qlora.ipynb` and follow the instructions in the file to finetune custom LLMs.
 
+### Supported models
+- TINYLLAMA_1p1B_CHAT_GGUF (`TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF`)
+- GOOGLE_GEMMA_2B_INSTRUCT_GGUF (`bartowski/gemma-2-2b-it-GGUF`)
+- LLAMA2_7B_CHAT_GPTQ (`TheBloke/Llama-2-7B-Chat-GPTQ`)
+- LLAMA2_13B_CHAT_GPTQ (`TheBloke/Llama-2-13B-Chat-GPTQ`)
+- CODELLAMA_7B_INSTRUCT_GPTQ (`TheBloke/CodeLlama-7B-Instruct-GPTQ`)
+- CODELLAMA_13B_INSTRUCT_GPTQ (`TheBloke/CodeLlama-13B-Instruct-GPTQ`)
+- LLAMA2_7B_CHAT_HF (`meta-llama/Llama-2-7b-chat-hf`)
+- LLAMA2_13B_CHAT_HF (`meta-llama/Llama-2-13b-chat-hf`)
+- CODELLAMA_7B_INSTRUCT_HF (`meta-llama/CodeLlama-7b-Instruct-hf`)
+- CODELLAMA_13B_INSTRUCT_HF (`meta-llama/CodeLlama-13b-Instruct-hf`)
+- GOOGLE_GEMMA_2B_INSTRUCT (`google/gemma-2b-it`)
+- GOOGLE_GEMMA_7B_INSTRUCT (`google/gemma-7b-it`)
+- GOOGLE_CODEGEMMA_2B (`google/codegemma-2b`)
+- GOOGLE_CODEGEMMA_7B_INSTRUCT (`google/codegemma-7b-it`)
+
 ## Contributing
 
 ReadmeReady is an open-source project that is supported by a community who will gratefully and humbly accept any contributions you might make to the project.

diff --git a/docs/index.md b/docs/index.md
@@ -77,10 +77,10 @@ from readme_ready.types import (
 model = LLMModels.LLAMA2_7B_CHAT_GPTQ # Choose model from supported models
 
 repo_config = AutodocRepoConfig (
-    name = "<NAME>", # Replace <NAME>
-    root = "<PROJECT_ROOT>", # Replace <PROJECT_ROOT>
-    repository_url = "<PROJECT_URL>", # Replace <PROJECT_URL>
-    output = "<OUTPUT_DIR>", # Replace <OUTPUT_DIR>
+    name = "<REPOSITORY_NAME>", # Replace <REPOSITORY_NAME>
+    root = "<REPOSITORY_ROOT_DIR_PATH>", # Replace <REPOSITORY_ROOT_DIR_PATH>
+    repository_url = "<REPOSITORY_URL>", # Replace <REPOSITORY_URL>
+    output = "<OUTPUT_DIR_PATH>", # Replace <OUTPUT_DIR_PATH>
     llms = [model],
     peft_model_path = "<PEFT_MODEL_NAME_OR_PATH>", # Replace <PEFT_MODEL_NAME_OR_PATH>
     ignore = [
@@ -113,6 +113,7 @@ user_config = AutodocUserConfig(
 )
 
 readme_config = AutodocReadmeConfig(
+    # Set comma separated list of README headings
     headings = "Description,Requirements,Installation,Usage,Contributing,License"
 )
 
@@ -122,6 +123,24 @@ query.generate_readme(repo_config, user_config, readme_config)
 
 Run the sample script in the `examples/example.py` to see a typical code usage.
 
+See detailed API references [here](https://souradipp76.github.io/ReadMeReady/reference/).
+
+### Supported models
+- TINYLLAMA_1p1B_CHAT_GGUF (`TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF`)
+- GOOGLE_GEMMA_2B_INSTRUCT_GGUF (`bartowski/gemma-2-2b-it-GGUF`)
+- LLAMA2_7B_CHAT_GPTQ (`TheBloke/Llama-2-7B-Chat-GPTQ`)
+- LLAMA2_13B_CHAT_GPTQ (`TheBloke/Llama-2-13B-Chat-GPTQ`)
+- CODELLAMA_7B_INSTRUCT_GPTQ (`TheBloke/CodeLlama-7B-Instruct-GPTQ`)
+- CODELLAMA_13B_INSTRUCT_GPTQ (`TheBloke/CodeLlama-13B-Instruct-GPTQ`)
+- LLAMA2_7B_CHAT_HF (`meta-llama/Llama-2-7b-chat-hf`)
+- LLAMA2_13B_CHAT_HF (`meta-llama/Llama-2-13b-chat-hf`)
+- CODELLAMA_7B_INSTRUCT_HF (`meta-llama/CodeLlama-7b-Instruct-hf`)
+- CODELLAMA_13B_INSTRUCT_HF (`meta-llama/CodeLlama-13b-Instruct-hf`)
+- GOOGLE_GEMMA_2B_INSTRUCT (`google/gemma-2b-it`)
+- GOOGLE_GEMMA_7B_INSTRUCT (`google/gemma-7b-it`)
+- GOOGLE_CODEGEMMA_2B (`google/codegemma-2b`)
+- GOOGLE_CODEGEMMA_7B_INSTRUCT (`google/codegemma-7b-it`)
+
 ## Contributing
 
 ReadmeReady is an open-source project that is supported by a community who will gratefully and humbly accept any contributions you might make to the project.

diff --git a/examples/example.py b/examples/example.py
@@ -10,15 +10,27 @@
     LLMModels,
 )
 
-model = LLMModels.LLAMA2_7B_CHAT_GPTQ # Choose model from supported models
+# Choose model from supported models
+model = LLMModels.LLAMA2_7B_CHAT_GPTQ
+
+# Initialize the repository configuration. `root` refers to the path to the
+# code repository for which you want to generate a README for. Please download
+# any code repository from GitHub and use that or if you have your own
+# repository downloaded (say 'MyRepo') you can use that as well.
+# Set `name` to the 'MyRepo'.
+# Set `root` as <path to 'MyRepo'>.
+# Set `repository_url` to the GitHub URL of 'MyRepo' (if any) else leave blank.
+# Set `output` as the path to the directory where the README and other metadata
+# will be generated and saved.
+# Set other parameters accordingly (or leave as default).
 
 repo_config = AutodocRepoConfig (
-    name = "readmy_ready",
-    root = "./readme_ready",
-    repository_url = "https://github.com/souradipp76/ReadMeReady",
-    output = "./output/readmy_ready",
+    name = "readmy_ready", # Set repository name
+    root = "./readme_ready", # Set path to root directory of the repository
+    repository_url = "https://github.com/souradipp76/ReadMeReady", # Set url
+    output = "./output/readmy_ready", # Set path to output directory to save
     llms = [model],
-    peft_model_path = None,
+    peft_model_path = None, # Set path to PEFT model
     ignore = [
         ".*",
         "*package-lock.json",
@@ -41,14 +53,15 @@
     priority = None,
     max_concurrent_calls = 50,
     add_questions = False,
-    device = "auto",
+    device = "auto", # Select device "cpu" or "auto"
 )
 
 user_config = AutodocUserConfig(
     llms = [model]
 )
 
 readme_config = AutodocReadmeConfig(
+    # Set comma separated list of README headings
     headings = "Description,Requirements,Installation,Usage,Contributing,License"
 )
 

diff --git a/paper/paper.md b/paper/paper.md
@@ -58,6 +58,8 @@ The application prompts the user to enter the project's name, GitHub URL, and se
 - `google/gemma-2b-it` [@gemma-2b-it]
 - `google/codegemma-2b-it` [@codegemma-2b-it]
 
+For our experimentation and tests, we used 1 × NVIDIA Tesla V100 with 16GB of GPU memory which is ideal for running the application.
+
 **Document Retrieval:** Our application indexes the codebase through a depth-first traversal of all repository contents and utilizes an LLM to generate documentation. All files are converted into text, tokenized, and then chunked, with each chunk containing 1000 tokens. The application employs the `sentence-transformers/all-mpnet-base-v2` [@sentence-transformers-all-mpnet-base-v2] sentence encoder to convert each chunk into a 768-dimensional embedding vector, which is stored in an in-memory vector store. When a query is provided, it is converted into a similar vector using the same sentence encoder. The neighbor nearest to the query embedding vector is searched using KNN (k=4) from the vector store, utilizing cosine similarity as the distance metric. For the KNN search, we use the HNSWLib library, which implements an approximate nearest-neighbor search based on hierarchical navigable small-world graphs [@malkov2018efficient]. This methodology provides the relevant sections of the source code, aiding in answering the prompted question. The entire methodology for Retrieval Augmented Generation (RAG) and fine-tuning is illustrated in the figure below.
 
 <img src="figures/rag_workflow.jpg" alt="rag_workflow">

diff --git a/readme_ready/VERSION b/readme_ready/VERSION
@@ -1 +1 @@
-1.1.3
+1.1.4
diff --git a/readme_ready/__main__.py b/readme_ready/__main__.py
@@ -1,6 +1,15 @@
 """Entry point for readme_ready."""
 
-from .main import main  # pragma: no cover
+import os
+
+
+def entry_point():
+    os.environ["OPENAI_API_KEY"] = "dummy"
+    os.environ["HF_TOKEN"] = "dummy"
+    from .main import main
 
-if __name__ == "__main__":  # pragma: no cover
     main()
+
+
+if __name__ == "__main__":  # pragma: no cover
+    entry_point()
diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@ def read_requirements(path):
     packages=find_packages(exclude=["tests", ".github"]),
     install_requires=read_requirements("requirements.txt"),
     entry_points={
-        "console_scripts": ["readme_ready = readme_ready.__main__:main"]
+        "console_scripts": ["readme_ready = readme_ready.__main__:entry_point"]
     },
     extras_require={"test": read_requirements("requirements-test.txt")},
 )