-
-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #222 from Cloud-Code-AI/10-add-rag-support
[WIP ] : 10 add rag support
- Loading branch information
Showing
20 changed files
with
2,280 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,4 +164,5 @@ cython_debug/ | |
node_modules | ||
.next | ||
|
||
.cloudcode | ||
.cloudcode | ||
tree_sitter_languages/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
FROM postgres:16-bullseye | ||
|
||
# Install build dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
build-essential \ | ||
git \ | ||
postgresql-server-dev-16 | ||
|
||
# Clone and install pgvector | ||
RUN git clone https://github.com/pgvector/pgvector.git \ | ||
&& cd pgvector \ | ||
&& make \ | ||
&& make install | ||
|
||
# Clean up | ||
RUN apt-get remove -y build-essential git postgresql-server-dev-16 \ | ||
&& apt-get autoremove -y \ | ||
&& rm -rf /var/lib/apt/lists/* /pgvector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
-- Enable vector extension | ||
CREATE EXTENSION IF NOT EXISTS vector; | ||
|
||
CREATE TABLE repositories ( | ||
repo_id SERIAL PRIMARY KEY, | ||
repo_name TEXT NOT NULL, | ||
repo_owner TEXT NOT NULL, | ||
repo_url TEXT NOT NULL, | ||
repo_description TEXT, | ||
CONSTRAINT unique_repo UNIQUE (repo_name, repo_owner) | ||
); | ||
|
||
-- Table to store file information | ||
CREATE TABLE files ( | ||
file_id SERIAL PRIMARY KEY, | ||
repo_id INTEGER NOT NULL REFERENCES repositories(repo_id), | ||
file_path TEXT NOT NULL, | ||
file_name TEXT NOT NULL, | ||
file_ext TEXT NOT NULL, | ||
programming_language TEXT, | ||
CONSTRAINT unique_repo_file UNIQUE (repo_id, file_path) | ||
); | ||
|
||
-- Table to store function abstractions | ||
CREATE TABLE function_abstractions ( | ||
function_id SERIAL PRIMARY KEY, | ||
file_id INTEGER NOT NULL REFERENCES files(file_id), | ||
function_name TEXT NOT NULL, | ||
function_signature TEXT NOT NULL, | ||
abstract_functionality TEXT NOT NULL, | ||
complexity_score FLOAT, | ||
input_output_description TEXT, | ||
start_line INTEGER NOT NULL, | ||
end_line INTEGER NOT NULL | ||
); | ||
|
||
-- Table to store vector embeddings for function abstractions | ||
CREATE TABLE function_embeddings ( | ||
embedding_id SERIAL PRIMARY KEY, | ||
function_id INTEGER NOT NULL REFERENCES function_abstractions(function_id), | ||
vector VECTOR(1536) NOT NULL, | ||
CONSTRAINT unique_function_embedding UNIQUE (function_id) | ||
); | ||
|
||
CREATE TABLE syntax_nodes ( | ||
node_id SERIAL PRIMARY KEY, | ||
file_id INTEGER NOT NULL REFERENCES files(file_id), | ||
node_type TEXT NOT NULL, | ||
start_line INTEGER NOT NULL, | ||
end_line INTEGER NOT NULL, | ||
node_content TEXT, | ||
language TEXT NOT NULL | ||
); | ||
|
||
-- Table to store node relationships | ||
CREATE TABLE node_relationships ( | ||
relationship_id SERIAL PRIMARY KEY, | ||
parent_node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id), | ||
child_node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id), | ||
relationship_type TEXT NOT NULL | ||
); | ||
|
||
-- Table to store node properties | ||
CREATE TABLE node_properties ( | ||
property_id SERIAL PRIMARY KEY, | ||
node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id), | ||
property_name TEXT NOT NULL, | ||
property_value TEXT NOT NULL | ||
); | ||
|
||
-- Create an index on the file_path column for faster lookups | ||
CREATE INDEX idx_file_path ON files(file_path); | ||
|
||
-- Create an index on the function_name column for faster lookups | ||
CREATE INDEX idx_function_name ON function_abstractions(function_name); | ||
|
||
-- Create an index on the node_type column for faster lookups | ||
CREATE INDEX idx_node_type ON syntax_nodes(node_type); | ||
|
||
-- Create an index on the vector column for faster similarity searches | ||
CREATE INDEX idx_function_embeddings_vector ON function_embeddings USING ivfflat (vector vector_l2_ops); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from kaizen.retriever.llama_index_retriever import RepositoryAnalyzer | ||
|
||
# Initialize the analyzer | ||
analyzer = RepositoryAnalyzer() | ||
|
||
# Set up the repository (do this when you first analyze a repo or when you want to update it) | ||
# analyzer.setup_repository("./github_app/") | ||
|
||
# Perform queries (you can do this as many times as you want without calling setup_repository again) | ||
results = analyzer.query("Find functions that handle authentication") | ||
for result in results: | ||
print(f"File: {result['file_path']}") | ||
print(f"Abstraction: {result['abstraction']}") | ||
print(f"result:\n{result}") | ||
print(f"Relevance Score: {result['relevance_score']}") | ||
print("---") | ||
|
||
# # If you make changes to the repository and want to update the analysis: | ||
# analyzer.setup_repository("/path/to/your/repo") | ||
|
||
# Then you can query again with the updated data | ||
results = analyzer.query("authentication") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/bin/bash | ||
|
||
# Directory to store the language libraries | ||
LANGUAGE_DIR="tree_sitter_languages" | ||
|
||
# List of languages to install | ||
LANGUAGES=( | ||
"python" | ||
"javascript" | ||
"typescript" | ||
"rust" | ||
) | ||
|
||
# Create the language directory if it doesn't exist | ||
mkdir -p "$LANGUAGE_DIR" | ||
|
||
# Function to install a language | ||
install_language() { | ||
lang=$1 | ||
echo "Installing Tree-sitter parser for $lang..." | ||
|
||
# Clone the repository if it doesn't exist | ||
if [ ! -d "$LANGUAGE_DIR/tree-sitter-$lang" ]; then | ||
git clone "https://github.com/tree-sitter/tree-sitter-$lang" "$LANGUAGE_DIR/tree-sitter-$lang" | ||
fi | ||
|
||
# Navigate to the repository directory | ||
cd "$LANGUAGE_DIR/tree-sitter-$lang" | ||
|
||
# Update submodules | ||
git submodule update --init | ||
|
||
# Build the parser using tree-sitter CLI | ||
tree-sitter generate | ||
|
||
# Navigate back to the original directory | ||
cd ../.. | ||
|
||
echo "Tree-sitter parser for $lang installed successfully." | ||
} | ||
|
||
# Install each language | ||
for lang in "${LANGUAGES[@]}"; do | ||
install_language $lang | ||
done | ||
|
||
echo "All Tree-sitter parsers have been installed." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.