From ad403d955b0f8f67f0b4cb842577cb83e1a3457d Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Fri, 16 Aug 2024 01:34:59 -0700
Subject: [PATCH] feat: final update to RAG generation

---
 .gitignore                                    |   3 +-
 .../kaizen/helpers/test_create_folder.py      |  76 ++-
 .../helpers/test_create_pr_description.py     | 127 +++--
 .../helpers/test_create_pr_review_text.py     |  28 +-
 .../kaizen/helpers/test_create_test_files.py  | 218 +++++----
 .../kaizen/helpers/test_get_parent_folder.py  |  24 +-
 .../kaizen/helpers/test_get_web_html.py       |  59 ++-
 Dockerfile                                    |  12 +
 config.json                                   |  21 +
 db_setup/init.sql                             |  57 +--
 examples/ragify_codebase/main.py              |  25 +-
 install_tree_sitter_languages.sh              |  51 ++
 kaizen/generator/unit_test.py                 |  12 +-
 kaizen/llms/provider.py                       |   9 +
 kaizen/retriever/code_chunker.py              | 272 ++++++-----
 kaizen/retriever/feedback_system.py           |  18 +
 kaizen/retriever/llama_index_retriever.py     | 354 +++++++++-----
 kaizen/retriever/query_processor.py           |   0
 kaizen/retriever/result_processor.py          |   0
 kaizen/retriever/tree_sitter_utils.py         | 107 +++++
 poetry.lock                                   | 453 ++++++++++++------
 pyproject.toml                                |   2 +
 22 files changed, 1298 insertions(+), 630 deletions(-)
 create mode 100644 install_tree_sitter_languages.sh
 create mode 100644 kaizen/retriever/feedback_system.py
 delete mode 100644 kaizen/retriever/query_processor.py
 delete mode 100644 kaizen/retriever/result_processor.py
 create mode 100644 kaizen/retriever/tree_sitter_utils.py

diff --git a/.gitignore b/.gitignore
index 79fb5dac..16d3c87f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,4 +164,5 @@ cython_debug/
 node_modules
 .next
 
-.cloudcode
\ No newline at end of file
+.cloudcode
+tree_sitter_languages/
\ No newline at end of file
diff --git a/.kaizen/unit_test/kaizen/helpers/test_create_folder.py b/.kaizen/unit_test/kaizen/helpers/test_create_folder.py
index 0348f3c0..1ef355e3 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_create_folder.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_create_folder.py
@@ -6,33 +6,44 @@
 # Mock logger
 logger = mock.Mock()
 
+
 @pytest.fixture
 def mock_os_path_exists():
-    with mock.patch('os.path.exists') as mock_exists:
+    with mock.patch("os.path.exists") as mock_exists:
         yield mock_exists
 
+
 @pytest.fixture
 def mock_os_makedirs():
-    with mock.patch('os.makedirs') as mock_makedirs:
+    with mock.patch("os.makedirs") as mock_makedirs:
         yield mock_makedirs
 
+
 @pytest.fixture
 def mock_logger_debug():
-    with mock.patch('kaizen.helpers.output.logger.debug') as mock_debug:
+    with mock.patch("kaizen.helpers.output.logger.debug") as mock_debug:
         yield mock_debug
 
-def test_create_new_folder_when_not_exists(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
-    folder_path = 'new_folder'
+
+def test_create_new_folder_when_not_exists(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
+    folder_path = "new_folder"
     mock_os_path_exists.return_value = False
 
     create_folder(folder_path)
 
     mock_os_path_exists.assert_called_once_with(folder_path)
     mock_os_makedirs.assert_called_once_with(folder_path)
-    mock_logger_debug.assert_called_once_with(f"Folder '{folder_path}' created successfully.")
+    mock_logger_debug.assert_called_once_with(
+        f"Folder '{folder_path}' created successfully."
+    )
+
 
-def test_do_nothing_when_folder_already_exists(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
-    folder_path = 'existing_folder'
+def test_do_nothing_when_folder_already_exists(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
+    folder_path = "existing_folder"
     mock_os_path_exists.return_value = True
 
     create_folder(folder_path)
@@ -41,47 +52,66 @@ def test_do_nothing_when_folder_already_exists(mock_os_path_exists, mock_os_make
     mock_os_makedirs.assert_not_called()
     mock_logger_debug.assert_called_once_with(f"Folder '{folder_path}' already exists.")
 
+
 def test_raise_value_error_when_folder_path_is_empty():
     with pytest.raises(ValueError, match="Folder path cannot be empty"):
-        create_folder('')
+        create_folder("")
+
 
-def test_create_deeply_nested_folder(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
-    folder_path = 'a/b/c/d/e/f/g'
+def test_create_deeply_nested_folder(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
+    folder_path = "a/b/c/d/e/f/g"
     mock_os_path_exists.return_value = False
 
     create_folder(folder_path)
 
     mock_os_path_exists.assert_called_once_with(folder_path)
     mock_os_makedirs.assert_called_once_with(folder_path)
-    mock_logger_debug.assert_called_once_with(f"Folder '{folder_path}' created successfully.")
+    mock_logger_debug.assert_called_once_with(
+        f"Folder '{folder_path}' created successfully."
+    )
+
 
-def test_create_folder_with_special_characters(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
-    folder_path = 'folder_with_special_!@#$%^&*()'
+def test_create_folder_with_special_characters(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
+    folder_path = "folder_with_special_!@#$%^&*()"
     mock_os_path_exists.return_value = False
 
     create_folder(folder_path)
 
     mock_os_path_exists.assert_called_once_with(folder_path)
     mock_os_makedirs.assert_called_once_with(folder_path)
-    mock_logger_debug.assert_called_once_with(f"Folder '{folder_path}' created successfully.")
+    mock_logger_debug.assert_called_once_with(
+        f"Folder '{folder_path}' created successfully."
+    )
 
-def test_create_folder_with_max_path_length(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
+
+def test_create_folder_with_max_path_length(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
     # Adjusting the max path length to a more typical value for modern filesystems
-    max_path_length = os.pathconf('/', 'PC_PATH_MAX')
-    folder_path = 'a' * max_path_length
+    max_path_length = os.pathconf("/", "PC_PATH_MAX")
+    folder_path = "a" * max_path_length
     mock_os_path_exists.return_value = False
 
     create_folder(folder_path)
 
     mock_os_path_exists.assert_called_once_with(folder_path)
     mock_os_makedirs.assert_called_once_with(folder_path)
-    mock_logger_debug.assert_called_once_with(f"Folder '{folder_path}' created successfully.")
+    mock_logger_debug.assert_called_once_with(
+        f"Folder '{folder_path}' created successfully."
+    )
+
 
-def test_create_folder_with_invalid_characters(mock_os_path_exists, mock_os_makedirs, mock_logger_debug):
+def test_create_folder_with_invalid_characters(
+    mock_os_path_exists, mock_os_makedirs, mock_logger_debug
+):
     # Assuming the filesystem does not allow characters like ':', '*', '?', '<', '>', '|'
-    invalid_characters = [':', '*', '?', '<', '>', '|']
+    invalid_characters = [":", "*", "?", "<", ">", "|"]
     for char in invalid_characters:
-        folder_path = f'invalid{char}folder'
+        folder_path = f"invalid{char}folder"
         mock_os_path_exists.return_value = False
 
         with pytest.raises(OSError):
@@ -89,4 +119,4 @@ def test_create_folder_with_invalid_characters(mock_os_path_exists, mock_os_make
 
         mock_os_path_exists.assert_called_once_with(folder_path)
         mock_os_makedirs.assert_not_called()
-        mock_logger_debug.assert_not_called()
\ No newline at end of file
+        mock_logger_debug.assert_not_called()
diff --git a/.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py b/.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py
index 4280b502..e006cabf 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_create_pr_description.py
@@ -2,51 +2,91 @@
 import time
 from kaizen.helpers.output import create_pr_description
 
-DESC_COLLAPSIBLE_TEMPLATE = "<details><summary>Original Description</summary>\n\n{desc}\n\n</details>"
+DESC_COLLAPSIBLE_TEMPLATE = (
+    "<details><summary>Original Description</summary>\n\n{desc}\n\n</details>"
+)
 
-@pytest.mark.parametrize("desc, original_desc, expected", [
-    # Normal Cases
-    ("This is a PR description", "This is the original detailed description",
-     "This is a PR description\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis is the original detailed description\n\n</details>"),
-    ("Fixes a bug", "This fixes a bug in the system",
-     "Fixes a bug\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis fixes a bug in the system\n\n</details>"),
-    # Edge Cases
-    ("", "This is the original detailed description",
-     "\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis is the original detailed description\n\n</details>"),
-    ("This is a PR description", "",
-     "This is a PR description\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>"),
-    ("", "",
-     "\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>"),
-    ("# Heading\n* Bullet", "**Bold**\n_Italic_",
-     "# Heading\n* Bullet\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n**Bold**\n_Italic_\n\n</details>"),
-    # Special Characters and HTML Tags
-    ("<h1>Title</h1>", "<p>This is a <strong>bold</strong> statement</p>",
-     "<h1>Title</h1>\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n<p>This is a <strong>bold</strong> statement</p>\n\n</details>"),
-    ("Special characters: !@#$%^&*()", "More special characters: ~`<>?",
-     "Special characters: !@#$%^&*()\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nMore special characters: ~`<>?\n\n</details>"),
-])
+
+@pytest.mark.parametrize(
+    "desc, original_desc, expected",
+    [
+        # Normal Cases
+        (
+            "This is a PR description",
+            "This is the original detailed description",
+            "This is a PR description\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis is the original detailed description\n\n</details>",
+        ),
+        (
+            "Fixes a bug",
+            "This fixes a bug in the system",
+            "Fixes a bug\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis fixes a bug in the system\n\n</details>",
+        ),
+        # Edge Cases
+        (
+            "",
+            "This is the original detailed description",
+            "\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nThis is the original detailed description\n\n</details>",
+        ),
+        (
+            "This is a PR description",
+            "",
+            "This is a PR description\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>",
+        ),
+        (
+            "",
+            "",
+            "\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>",
+        ),
+        (
+            "# Heading\n* Bullet",
+            "**Bold**\n_Italic_",
+            "# Heading\n* Bullet\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n**Bold**\n_Italic_\n\n</details>",
+        ),
+        # Special Characters and HTML Tags
+        (
+            "<h1>Title</h1>",
+            "<p>This is a <strong>bold</strong> statement</p>",
+            "<h1>Title</h1>\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n<p>This is a <strong>bold</strong> statement</p>\n\n</details>",
+        ),
+        (
+            "Special characters: !@#$%^&*()",
+            "More special characters: ~`<>?",
+            "Special characters: !@#$%^&*()\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\nMore special characters: ~`<>?\n\n</details>",
+        ),
+    ],
+)
 def test_create_pr_description_normal_and_edge_cases(desc, original_desc, expected):
     assert create_pr_description(desc, original_desc) == expected
 
-@pytest.mark.parametrize("desc, original_desc, expected_error_message", [
-    # Error Handling
-    (None, "This is the original detailed description", "desc must be a string"),
-    (123, "This is the original detailed description", "desc must be a string"),
-    ([], "This is the original detailed description", "desc must be a string"),
-    ("This is a PR description", None, "original_desc must be a string"),
-    ("This is a PR description", 123, "original_desc must be a string"),
-    ("This is a PR description", [], "original_desc must be a string"),
-])
-def test_create_pr_description_error_handling(desc, original_desc, expected_error_message):
+
+@pytest.mark.parametrize(
+    "desc, original_desc, expected_error_message",
+    [
+        # Error Handling
+        (None, "This is the original detailed description", "desc must be a string"),
+        (123, "This is the original detailed description", "desc must be a string"),
+        ([], "This is the original detailed description", "desc must be a string"),
+        ("This is a PR description", None, "original_desc must be a string"),
+        ("This is a PR description", 123, "original_desc must be a string"),
+        ("This is a PR description", [], "original_desc must be a string"),
+    ],
+)
+def test_create_pr_description_error_handling(
+    desc, original_desc, expected_error_message
+):
     with pytest.raises(TypeError) as exc_info:
         create_pr_description(desc, original_desc)
     assert str(exc_info.value) == expected_error_message
 
-@pytest.mark.parametrize("desc, original_desc", [
-    # Boundary Conditions
-    ("a" * 10000, "b" * 10000),
-    ("a" * 100000, "b" * 100000),
-])
+
+@pytest.mark.parametrize(
+    "desc, original_desc",
+    [
+        # Boundary Conditions
+        ("a" * 10000, "b" * 10000),
+        ("a" * 100000, "b" * 100000),
+    ],
+)
 def test_create_pr_description_boundary_conditions(desc, original_desc):
     start_time = time.time()
     result = create_pr_description(desc, original_desc)
@@ -56,9 +96,18 @@ def test_create_pr_description_boundary_conditions(desc, original_desc):
     assert result.startswith(desc)
     assert result.endswith(DESC_COLLAPSIBLE_TEMPLATE.format(desc=original_desc))
     assert "> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️" in result
-    assert len(result) == len(desc) + len(original_desc) + len("\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>") - 2
+    assert (
+        len(result)
+        == len(desc)
+        + len(original_desc)
+        + len(
+            "\n\n> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️\n\n<details><summary>Original Description</summary>\n\n\n\n</details>"
+        )
+        - 2
+    )
     # Removed the arbitrary 1-second boundary condition
     print(f"Execution time: {execution_time} seconds")
 
+
 if __name__ == "__main__":
-    pytest.main()
\ No newline at end of file
+    pytest.main()
diff --git a/.kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py b/.kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py
index f6761b9b..ef4d8de7 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_create_pr_review_text.py
@@ -15,6 +15,7 @@
 </details>
 """
 
+
 @pytest.fixture
 def setup_single_topic_single_review():
     return {
@@ -32,6 +33,7 @@ def setup_single_topic_single_review():
         ]
     }
 
+
 @pytest.fixture
 def setup_multiple_topics_multiple_reviews():
     return {
@@ -55,7 +57,7 @@ def setup_multiple_topics_multiple_reviews():
                 "end_line": 40,
                 "file_name": "file2.py",
                 "severity_level": 7,
-            }
+            },
         ],
         "topic2": [
             {
@@ -68,14 +70,18 @@ def setup_multiple_topics_multiple_reviews():
                 "file_name": "file3.py",
                 "severity_level": 5,
             }
-        ]
+        ],
     }
 
+
 def test_empty_topics():
     topics = {}
-    expected_output = "## Code Review\n\n✅ **All Clear:** This PR is ready to merge! 👍\n\n"
+    expected_output = (
+        "## Code Review\n\n✅ **All Clear:** This PR is ready to merge! 👍\n\n"
+    )
     assert create_pr_review_text(topics) == expected_output
 
+
 def test_single_topic_single_review(setup_single_topic_single_review):
     topics = setup_single_topic_single_review
     expected_output = (
@@ -96,6 +102,7 @@ def test_single_topic_single_review(setup_single_topic_single_review):
     )
     assert create_pr_review_text(topics) == expected_output
 
+
 def test_multiple_topics_multiple_reviews(setup_multiple_topics_multiple_reviews):
     topics = setup_multiple_topics_multiple_reviews
     expected_output = (
@@ -139,6 +146,7 @@ def test_multiple_topics_multiple_reviews(setup_multiple_topics_multiple_reviews
     )
     assert create_pr_review_text(topics) == expected_output
 
+
 def test_reviews_with_missing_fields():
     topics = {
         "topic1": [
@@ -181,7 +189,7 @@ def test_reviews_with_missing_fields():
                 "end_line": 80,
                 "file_name": "final_test_file.py",
                 # Missing severity_level
-            }
+            },
         ]
     }
     expected_output = (
@@ -235,6 +243,7 @@ def test_reviews_with_missing_fields():
     )
     assert create_pr_review_text(topics) == expected_output
 
+
 def test_reviews_with_missing_comment():
     topics = {
         "topic1": [
@@ -268,9 +277,10 @@ def test_reviews_with_missing_comment():
     )
     assert create_pr_review_text(topics) == expected_output
 
+
 def test_empty_list_in_topics():
-    topics = {
-        "topic1": []
-    }
-    expected_output = "## Code Review\n\n✅ **All Clear:** This PR is ready to merge! 👍\n\n"
-    assert create_pr_review_text(topics) == expected_output
\ No newline at end of file
+    topics = {"topic1": []}
+    expected_output = (
+        "## Code Review\n\n✅ **All Clear:** This PR is ready to merge! 👍\n\n"
+    )
+    assert create_pr_review_text(topics) == expected_output
diff --git a/.kaizen/unit_test/kaizen/helpers/test_create_test_files.py b/.kaizen/unit_test/kaizen/helpers/test_create_test_files.py
index f318d808..0cf4ad3b 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_create_test_files.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_create_test_files.py
@@ -4,45 +4,60 @@
 from unittest import mock
 from kaizen.helpers.output import create_test_files
 
+
 # Mocking dependencies
 @pytest.fixture
 def mock_dependencies():
-    with mock.patch('kaizen.helpers.output.create_folder') as mock_create_folder, \
-         mock.patch('kaizen.helpers.output.general.clean_python_code') as mock_clean_python_code, \
-         mock.patch('kaizen.helpers.output.logger') as mock_logger:
+    with mock.patch(
+        "kaizen.helpers.output.create_folder"
+    ) as mock_create_folder, mock.patch(
+        "kaizen.helpers.output.general.clean_python_code"
+    ) as mock_clean_python_code, mock.patch(
+        "kaizen.helpers.output.logger"
+    ) as mock_logger:
         yield mock_create_folder, mock_clean_python_code, mock_logger
 
+
 # Utility function to read file content
 def read_file_content(file_path):
-    with open(file_path, 'r') as f:
+    with open(file_path, "r") as f:
         return f.read()
 
+
 # Utility function to sanitize file names
 def sanitize_filename(filename):
-    return "".join(c if c.isalnum() or c in (' ', '.', '_') else '_' for c in filename)
+    return "".join(c if c.isalnum() or c in (" ", ".", "_") else "_" for c in filename)
+
 
 # Test single module with a single test
 def test_single_module_single_test(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = "def test_example():\n    assert True"
 
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [{
-            "test_name": "Test Example",
-            "test_description": "This is a test example.",
-            "code": "def test_example():\n    assert True"
-        }]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": "Test Example",
+                    "test_description": "This is a test example.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+    ]
 
     create_test_files(json_tests, tmp_path)
 
     # Assertions
     assert os.path.exists(os.path.join(tmp_path, "tests.json"))
     assert os.path.exists(os.path.join(tmp_path, "module1", "test_test_example.py"))
-    assert "Importance: High" in read_file_content(os.path.join(tmp_path, "module1", "test_test_example.py"))
+    assert "Importance: High" in read_file_content(
+        os.path.join(tmp_path, "module1", "test_test_example.py")
+    )
+
 
 # Test multiple modules with multiple tests
 def test_multiple_modules_multiple_tests(tmp_path, mock_dependencies):
@@ -58,14 +73,14 @@ def test_multiple_modules_multiple_tests(tmp_path, mock_dependencies):
                 {
                     "test_name": "Test Example 1",
                     "test_description": "This is test example 1.",
-                    "code": "def test_example_1():\n    assert True"
+                    "code": "def test_example_1():\n    assert True",
                 },
                 {
                     "test_name": "Test Example 2",
                     "test_description": "This is test example 2.",
-                    "code": "def test_example_2():\n    assert True"
-                }
-            ]
+                    "code": "def test_example_2():\n    assert True",
+                },
+            ],
         },
         {
             "folder_name": "module2",
@@ -75,10 +90,10 @@ def test_multiple_modules_multiple_tests(tmp_path, mock_dependencies):
                 {
                     "test_name": "Test Example 3",
                     "test_description": "This is test example 3.",
-                    "code": "def test_example_3():\n    assert True"
+                    "code": "def test_example_3():\n    assert True",
                 }
-            ]
-        }
+            ],
+        },
     ]
 
     create_test_files(json_tests, tmp_path)
@@ -89,6 +104,7 @@ def test_multiple_modules_multiple_tests(tmp_path, mock_dependencies):
     assert os.path.exists(os.path.join(tmp_path, "module1", "test_test_example_2.py"))
     assert os.path.exists(os.path.join(tmp_path, "module2", "test_test_example_3.py"))
 
+
 # Test empty json_tests list
 def test_empty_json_tests(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
@@ -101,21 +117,26 @@ def test_empty_json_tests(tmp_path, mock_dependencies):
     assert os.path.exists(os.path.join(tmp_path, "tests.json"))
     assert os.path.getsize(os.path.join(tmp_path, "tests.json")) == 0
 
+
 # Test names with special characters
 def test_special_characters_in_test_names(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = "def test_example():\n    assert True"
 
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [{
-            "test_name": "Test Example!@#",
-            "test_description": "This is a test example with special characters.",
-            "code": "def test_example():\n    assert True"
-        }]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": "Test Example!@#",
+                    "test_description": "This is a test example with special characters.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+    ]
 
     create_test_files(json_tests, tmp_path)
 
@@ -123,22 +144,27 @@ def test_special_characters_in_test_names(tmp_path, mock_dependencies):
     sanitized_name = sanitize_filename("test_test_example!@#.py")
     assert os.path.exists(os.path.join(tmp_path, "module1", sanitized_name))
 
+
 # Test very long test names
 def test_very_long_test_names(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = "def test_example():\n    assert True"
 
     long_test_name = "Test " + "Example " * 50
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [{
-            "test_name": long_test_name,
-            "test_description": "This is a very long test name.",
-            "code": "def test_example():\n    assert True"
-        }]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": long_test_name,
+                    "test_description": "This is a very long test name.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+    ]
 
     create_test_files(json_tests, tmp_path)
 
@@ -146,23 +172,30 @@ def test_very_long_test_names(tmp_path, mock_dependencies):
     file_name = "test_" + "_".join(long_test_name.lower().split(" ")) + ".py"
     assert os.path.exists(os.path.join(tmp_path, "module1", file_name))
     assert len(file_name) <= 255  # Assuming a common file system limit
-    assert "def test_example():\n    assert True" in read_file_content(os.path.join(tmp_path, "module1", file_name))
+    assert "def test_example():\n    assert True" in read_file_content(
+        os.path.join(tmp_path, "module1", file_name)
+    )
+
 
 # Test clean code function returns empty string
 def test_clean_code_returns_empty_string(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = ""
 
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [{
-            "test_name": "Test Example",
-            "test_description": "This is a test example.",
-            "code": "def test_example():\n    assert True"
-        }]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": "Test Example",
+                    "test_description": "This is a test example.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+    ]
 
     create_test_files(json_tests, tmp_path)
 
@@ -171,21 +204,26 @@ def test_clean_code_returns_empty_string(tmp_path, mock_dependencies):
     assert not os.path.exists(os.path.join(tmp_path, "module1", "test_test_example.py"))
     assert not os.path.exists(os.path.join(tmp_path, "tests.json"))
 
+
 # Test file writing permission issues
 def test_file_writing_permission_issues(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = "def test_example():\n    assert True"
 
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [{
-            "test_name": "Test Example",
-            "test_description": "This is a test example.",
-            "code": "def test_example():\n    assert True"
-        }]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": "Test Example",
+                    "test_description": "This is a test example.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+    ]
 
     # Simulate permission error
     with mock.patch("builtins.open", mock.mock_open()) as mock_file:
@@ -197,6 +235,7 @@ def test_file_writing_permission_issues(tmp_path, mock_dependencies):
     # Assertions
     assert not os.path.exists(os.path.join(tmp_path, "tests.json"))
 
+
 # Test maximum number of modules
 def test_maximum_number_of_modules(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
@@ -207,40 +246,51 @@ def test_maximum_number_of_modules(tmp_path, mock_dependencies):
             "folder_name": f"module{i}",
             "module_title": f"Module {i}",
             "importance": "High",
-            "tests": [{
-                "test_name": f"Test Example {i}",
-                "test_description": f"This is test example {i}.",
-                "code": "def test_example():\n    assert True"
-            }]
-        } for i in range(100)
+            "tests": [
+                {
+                    "test_name": f"Test Example {i}",
+                    "test_description": f"This is test example {i}.",
+                    "code": "def test_example():\n    assert True",
+                }
+            ],
+        }
+        for i in range(100)
     ]
 
     create_test_files(json_tests, tmp_path)
 
     # Assertions
     for i in range(100):
-        assert os.path.exists(os.path.join(tmp_path, f"module{i}", f"test_test_example_{i}.py"))
+        assert os.path.exists(
+            os.path.join(tmp_path, f"module{i}", f"test_test_example_{i}.py")
+        )
+
 
 # Test maximum number of tests per module
 def test_maximum_number_of_tests_per_module(tmp_path, mock_dependencies):
     mock_create_folder, mock_clean_python_code, mock_logger = mock_dependencies
     mock_clean_python_code.return_value = "def test_example():\n    assert True"
 
-    json_tests = [{
-        "folder_name": "module1",
-        "module_title": "Module 1",
-        "importance": "High",
-        "tests": [
-            {
-                "test_name": f"Test Example {i}",
-                "test_description": f"This is test example {i}.",
-                "code": "def test_example():\n    assert True"
-            } for i in range(100)
-        ]
-    }]
+    json_tests = [
+        {
+            "folder_name": "module1",
+            "module_title": "Module 1",
+            "importance": "High",
+            "tests": [
+                {
+                    "test_name": f"Test Example {i}",
+                    "test_description": f"This is test example {i}.",
+                    "code": "def test_example():\n    assert True",
+                }
+                for i in range(100)
+            ],
+        }
+    ]
 
     create_test_files(json_tests, tmp_path)
 
     # Assertions
     for i in range(100):
-        assert os.path.exists(os.path.join(tmp_path, "module1", f"test_test_example_{i}.py"))
\ No newline at end of file
+        assert os.path.exists(
+            os.path.join(tmp_path, "module1", f"test_test_example_{i}.py")
+        )
diff --git a/.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py b/.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py
index cfc86496..974c699c 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_get_parent_folder.py
@@ -5,32 +5,42 @@
 from unittest import mock
 from kaizen.helpers.output import get_parent_folder
 
+
 # Correct implementation of get_parent_folder()
 def get_parent_folder():
     return os.path.dirname(os.getcwd())
 
+
 # Test function for normal case
 def test_get_parent_folder_normal():
     expected = os.path.dirname(os.getcwd())
     result = get_parent_folder()
     assert result == expected, f"Expected {expected}, but got {result}"
 
+
 # Test function for error handling case
 def test_get_parent_folder_error_handling():
-    with mock.patch('os.getcwd', side_effect=OSError("Unable to determine current working directory")):
-        with pytest.raises(OSError, match="Unable to determine current working directory"):
+    with mock.patch(
+        "os.getcwd",
+        side_effect=OSError("Unable to determine current working directory"),
+    ):
+        with pytest.raises(
+            OSError, match="Unable to determine current working directory"
+        ):
             get_parent_folder()
-    
-    with mock.patch('os.getcwd', side_effect=Exception("Unknown error")):
+
+    with mock.patch("os.getcwd", side_effect=Exception("Unknown error")):
         with pytest.raises(Exception, match="Unknown error"):
             get_parent_folder()
 
+
 # Test function for nested directory structure
 def test_get_parent_folder_nested():
-    with mock.patch('os.getcwd', return_value='/home/user/project/subfolder'):
-        expected = '/home/user/project'
+    with mock.patch("os.getcwd", return_value="/home/user/project/subfolder"):
+        expected = "/home/user/project"
         result = get_parent_folder()
         assert result == expected, f"Expected {expected}, but got {result}"
 
+
 if __name__ == "__main__":
-    pytest.main()
\ No newline at end of file
+    pytest.main()
diff --git a/.kaizen/unit_test/kaizen/helpers/test_get_web_html.py b/.kaizen/unit_test/kaizen/helpers/test_get_web_html.py
index 969948da..c54e9d24 100644
--- a/.kaizen/unit_test/kaizen/helpers/test_get_web_html.py
+++ b/.kaizen/unit_test/kaizen/helpers/test_get_web_html.py
@@ -7,19 +7,24 @@
 # Assuming the get_web_html function is defined in kaizen/helpers/output.py
 from kaizen.helpers.output import get_web_html
 
+
 @pytest.fixture
 def mock_get_html():
-    with patch('kaizen.helpers.output.get_html', new_callable=AsyncMock) as mock:
+    with patch("kaizen.helpers.output.get_html", new_callable=AsyncMock) as mock:
         yield mock
 
+
 @pytest.fixture
 def mock_nest_asyncio():
-    with patch('kaizen.helpers.output.nest_asyncio.apply') as mock:
+    with patch("kaizen.helpers.output.nest_asyncio.apply") as mock:
         yield mock
 
-@pytest.mark.parametrize("html_content, expected_output", [
-    (
-        """
+
+@pytest.mark.parametrize(
+    "html_content, expected_output",
+    [
+        (
+            """
         <html>
             <head><meta charset="UTF-8"><title>Test</title></head>
             <body>
@@ -33,7 +38,7 @@ def mock_nest_asyncio():
             </body>
         </html>
         """,
-        """
+            """
         <html>
          <body>
           <div>
@@ -43,18 +48,15 @@ def mock_nest_asyncio():
           </p>
          </body>
         </html>
-        """
-    ),
-    (
-        "",  # Empty HTML content
-        ""
-    ),
-    (
-        "<html><body><p>Nothing to remove here!</p></body></html>",  # No removable elements
-        "<html>\n <body>\n  <p>\n   Nothing to remove here!\n  </p>\n </body>\n</html>"
-    ),
-    (
-        """
+        """,
+        ),
+        ("", ""),  # Empty HTML content
+        (
+            "<html><body><p>Nothing to remove here!</p></body></html>",  # No removable elements
+            "<html>\n <body>\n  <p>\n   Nothing to remove here!\n  </p>\n </body>\n</html>",
+        ),
+        (
+            """
         <html>
             <head><meta charset="UTF-8"><title>Test</title></head>
             <body>
@@ -62,7 +64,7 @@ def mock_nest_asyncio():
             </body>
         </html>
         """,
-        """
+            """
         <html>
          <body>
           <p>
@@ -70,10 +72,13 @@ def mock_nest_asyncio():
           </p>
          </body>
         </html>
-        """
-    )
-])
-async def test_get_web_html_normal_cases(mock_get_html, mock_nest_asyncio, html_content, expected_output):
+        """,
+        ),
+    ],
+)
+async def test_get_web_html_normal_cases(
+    mock_get_html, mock_nest_asyncio, html_content, expected_output
+):
     mock_get_html.return_value = html_content
 
     url = "https://cloudcode.ai"
@@ -82,6 +87,7 @@ async def test_get_web_html_normal_cases(mock_get_html, mock_nest_asyncio, html_
     assert result.strip() == expected_output.strip()
     mock_nest_asyncio.assert_called_once()
 
+
 async def test_get_web_html_invalid_url(mock_get_html, mock_nest_asyncio):
     mock_get_html.side_effect = Exception("Network error")
 
@@ -90,13 +96,16 @@ async def test_get_web_html_invalid_url(mock_get_html, mock_nest_asyncio):
         await get_web_html(url)
     mock_nest_asyncio.assert_called_once()
 
+
 async def test_get_web_html_large_content(mock_get_html, mock_nest_asyncio):
     large_html_content = "<html><body>" + "<p>Test</p>" * 10000 + "</body></html>"
-    expected_output = "<html>\n <body>\n" + "  <p>\n   Test\n  </p>\n" * 10000 + " </body>\n</html>"
+    expected_output = (
+        "<html>\n <body>\n" + "  <p>\n   Test\n  </p>\n" * 10000 + " </body>\n</html>"
+    )
     mock_get_html.return_value = large_html_content
 
     url = "https://cloudcode.ai"
     result = await get_web_html(url)
 
     assert result.strip() == expected_output.strip()
-    mock_nest_asyncio.assert_called_once()
\ No newline at end of file
+    mock_nest_asyncio.assert_called_once()
diff --git a/Dockerfile b/Dockerfile
index 0d900049..e56f0e4f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,6 +4,12 @@ FROM python:3.12-slim
 # Set the working directory in the container
 WORKDIR /app
 
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
 # Install Poetry
 RUN pip install --no-cache-dir poetry
 
@@ -16,6 +22,12 @@ RUN poetry install --no-dev --no-root
 # Copy the application code into the container
 COPY . .
 
+# Make the installation script executable
+RUN chmod +x install_tree_sitter_languages.sh
+
+# Run the Tree-sitter language installation script
+RUN ./install_tree_sitter_languages.sh
+
 # Expose the port on which the application will run
 EXPOSE 8000
 
diff --git a/config.json b/config.json
index 6c0dbe29..8647f8cb 100644
--- a/config.json
+++ b/config.json
@@ -4,6 +4,27 @@
         "enable_observability_logging": false,
         "redis_enabled": true,
         "models": [
+            {
+                "model_name": "embedding",
+                "litellm_params": {
+                    "model": "azure/text-embedding-small",
+                    "input_cost_per_token": 0.000000015,
+                    "output_cost_per_token": 0.0000006,
+                    "api_key": "os.environ/AZURE_API_KEY",
+                    "api_base": "os.environ/AZURE_API_BASE"
+                }
+            },
+            {
+                "model_name": "small",
+                "litellm_params": {
+                    "model": "azure/gpt-4o-mini",
+                    "input_cost_per_token": 0.000000015,
+                    "output_cost_per_token": 0.0000006,
+                    "api_key": "os.environ/AZURE_API_KEY",
+                    "api_base": "os.environ/AZURE_API_BASE",
+                    "base_model": "azure/gpt-4o-mini"
+                }
+            },
             {
                 "model_name": "default",
                 "litellm_params": {
diff --git a/db_setup/init.sql b/db_setup/init.sql
index 5f8720a1..223492fa 100644
--- a/db_setup/init.sql
+++ b/db_setup/init.sql
@@ -20,54 +20,49 @@ CREATE TABLE files (
     programming_language TEXT
 );
 
--- Table to store code snippets
-CREATE TABLE code_snippets (
-    snippet_id SERIAL PRIMARY KEY,
+-- Table to store function abstractions
+CREATE TABLE function_abstractions (
+    function_id SERIAL PRIMARY KEY,
     file_id INTEGER NOT NULL REFERENCES files(file_id),
-    snippet_text TEXT NOT NULL,
+    function_name TEXT NOT NULL,
+    function_signature TEXT NOT NULL,
+    abstract_functionality TEXT NOT NULL,
+    complexity_score FLOAT,
+    input_output_description TEXT,
     start_line INTEGER NOT NULL,
-    end_line INTEGER NOT NULL,
-    functionality TEXT,
-    context TEXT
+    end_line INTEGER NOT NULL
 );
 
--- Table to store vector embeddings for code snippets
-CREATE TABLE embeddings (
+-- Table to store vector embeddings for function abstractions
+CREATE TABLE function_embeddings (
     embedding_id SERIAL PRIMARY KEY,
-    snippet_id INTEGER NOT NULL REFERENCES code_snippets(snippet_id),
-    vector VECTOR NOT NULL
+    function_id INTEGER NOT NULL REFERENCES function_abstractions(function_id),
+    vector VECTOR(1536) NOT NULL
 );
 
--- Table to store AI-generated summaries for code snippets
-CREATE TABLE snippet_summaries (
-    summary_id SERIAL PRIMARY KEY,
-    snippet_id INTEGER NOT NULL REFERENCES code_snippets(snippet_id),
-    summary TEXT NOT NULL,
-    summary_quality_score FLOAT
-);
 
--- Node level data for AST
-CREATE TABLE ast_nodes (
+CREATE TABLE syntax_nodes (
     node_id SERIAL PRIMARY KEY,
     file_id INTEGER NOT NULL REFERENCES files(file_id),
     node_type TEXT NOT NULL,
     start_line INTEGER NOT NULL,
-    end_line INTEGER NOT NULL
-    -- Add other common node properties here
+    end_line INTEGER NOT NULL,
+    node_content TEXT,
+    language TEXT NOT NULL
+);
+
+-- Table to store node relationships
+CREATE TABLE node_relationships (
+    relationship_id SERIAL PRIMARY KEY,
+    parent_node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id),
+    child_node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id),
+    relationship_type TEXT NOT NULL
 );
 
 -- Table to store node properties
 CREATE TABLE node_properties (
     property_id SERIAL PRIMARY KEY,
-    node_id INTEGER NOT NULL REFERENCES ast_nodes(node_id),
+    node_id INTEGER NOT NULL REFERENCES syntax_nodes(node_id),
     property_name TEXT NOT NULL,
     property_value TEXT NOT NULL
-);
-
--- Table to store node relationships
-CREATE TABLE node_relationships (
-    relationship_id SERIAL PRIMARY KEY,
-    parent_node_id INTEGER NOT NULL REFERENCES ast_nodes(node_id),
-    child_node_id INTEGER NOT NULL REFERENCES ast_nodes(node_id),
-    relationship_type TEXT NOT NULL
 );
\ No newline at end of file
diff --git a/examples/ragify_codebase/main.py b/examples/ragify_codebase/main.py
index 13bab95b..411dadbf 100644
--- a/examples/ragify_codebase/main.py
+++ b/examples/ragify_codebase/main.py
@@ -1,9 +1,22 @@
 from kaizen.retriever.llama_index_retriever import RepositoryAnalyzer
 
-# Usage
-analyzer = RepositoryAnalyzer(database_config={})
-analyzer.analyze_repository("./kaizen")
+# Initialize the analyzer
+analyzer = RepositoryAnalyzer()
 
-# Query example
-result = analyzer.query("How is function X related to function Y?")
-print(result)
+# Set up the repository (do this when you first analyze a repo or when you want to update it)
+analyzer.setup_repository("./github_app/")
+
+# Perform queries (you can do this as many times as you want without calling setup_repository again)
+results = analyzer.query("Find functions that handle authentication")
+for result in results:
+    print(f"File: {result['file_path']}")
+    print(f"Abstraction: {result['abstraction']}")
+    print(f"Code:\n{result['code']}")
+    print(f"Relevance Score: {result['relevance_score']}")
+    print("---")
+
+# # If you make changes to the repository and want to update the analysis:
+# analyzer.setup_repository("/path/to/your/repo")
+
+# Then you can query again with the updated data
+results = analyzer.query("authentication")
diff --git a/install_tree_sitter_languages.sh b/install_tree_sitter_languages.sh
new file mode 100644
index 00000000..cb247dd3
--- /dev/null
+++ b/install_tree_sitter_languages.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Directory to store the language libraries
+LANGUAGE_DIR="tree_sitter_languages"
+
+# List of languages to install
+LANGUAGES=(
+    "python"
+    "javascript"
+    "typescript"
+    "rust"
+)
+
+# Create the language directory if it doesn't exist
+mkdir -p "$LANGUAGE_DIR"
+
+# Function to install a language
+install_language() {
+    lang=$1
+    echo "Installing Tree-sitter parser for $lang..."
+    
+    # Clone the repository if it doesn't exist
+    if [ ! -d "$LANGUAGE_DIR/tree-sitter-$lang" ]; then
+        git clone "https://github.com/tree-sitter/tree-sitter-$lang" "$LANGUAGE_DIR/tree-sitter-$lang"
+    fi
+    
+    # Navigate to the repository directory
+    cd "$LANGUAGE_DIR/tree-sitter-$lang"
+    
+    # Update submodules
+    git submodule update --init
+    
+    # Compile the parser
+    cc -fPIC -c -I./src src/parser.c
+    cc -shared *.o -o "../$lang.so"
+    
+    # Clean up object files
+    rm *.o
+    
+    # Navigate back to the original directory
+    cd ../..
+    
+    echo "Tree-sitter parser for $lang installed successfully."
+}
+
+# Install each language
+for lang in "${LANGUAGES[@]}"; do
+    install_language $lang
+done
+
+echo "All Tree-sitter parsers have been installed."
\ No newline at end of file
diff --git a/kaizen/generator/unit_test.py b/kaizen/generator/unit_test.py
index 66b8623e..7c85ded5 100644
--- a/kaizen/generator/unit_test.py
+++ b/kaizen/generator/unit_test.py
@@ -59,7 +59,11 @@ def _setup_directories(self):
         self._create_output_folder(self.output_folder)
 
     def generate_tests_from_dir(
-        self, dir_path: str, output_path: str = None, max_critique: int = 3, verbose: bool = False,
+        self,
+        dir_path: str,
+        output_path: str = None,
+        max_critique: int = 3,
+        verbose: bool = False,
         enable_critique: bool = False,
     ):
         """
@@ -69,14 +73,14 @@ def generate_tests_from_dir(
         self.enable_critique = enable_critique
         self.verbose = verbose if verbose else self.verbose
         self.output_folder = output_path if output_path else self.output_folder
-        for file_path in Path(dir_path).rglob('*.*'):
+        for file_path in Path(dir_path).rglob("*.*"):
             try:
                 self.generate_tests(file_path=str(file_path), output_path=output_path)
             except Exception as e:
                 print(f"Error: Could not generate tests for {file_path}: {e}")
-        
+
         return {}, self.total_usage
-        
+
     def generate_tests(
         self,
         file_path: str,
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index 5d575bf9..e58836fa 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -232,3 +232,12 @@ def get_usage_cost(self, total_usage: Dict[str, int], model: str = None) -> floa
         return litellm.cost_per_token(
             model, total_usage["prompt_tokens"], total_usage["completion_tokens"]
         )
+
+    def get_text_embedding(self, text):
+        # for model in self.config["language_model"]["models"]:
+        #     if model["model_name"] == "embedding":
+        #         break
+        response = self.provider.embedding(
+            model="embedding", input=[text], dimensions=1536
+        )
+        return response["data"], response["usage"]
diff --git a/kaizen/retriever/code_chunker.py b/kaizen/retriever/code_chunker.py
index b7749652..ebd0d15f 100644
--- a/kaizen/retriever/code_chunker.py
+++ b/kaizen/retriever/code_chunker.py
@@ -1,140 +1,156 @@
-import ast
-import esprima
-import escodegen
-import json
-
-
-ParsedBody = {
-    "functions": {},
-    "classes": {},
-    "hooks": {},
-    "components": {},
-    "other_blocks": [],
+import os
+import subprocess
+from tree_sitter import Language, Parser
+from typing import Dict, List, Any
+
+ParsedBody = Dict[str, Dict[str, Any]]
+
+# Define the languages and their GitHub repositories
+LANGUAGES = {
+    "python": "https://github.com/tree-sitter/tree-sitter-python",
+    "javascript": "https://github.com/tree-sitter/tree-sitter-javascript",
+    "typescript": "https://github.com/tree-sitter/tree-sitter-typescript",
+    "rust": "https://github.com/tree-sitter/tree-sitter-rust",
 }
 
-
-def chunk_python_code(code):
-    tree = ast.parse(code)
-    functions = {}
-    classes = {}
-    other_blocks = []
-    current_block = []
-
-    for node in ast.iter_child_nodes(tree):
-        if isinstance(node, ast.FunctionDef):
-            functions[node.name] = ast.unparse(node)
-        elif isinstance(node, ast.ClassDef):
-            methods = {}
-            for item in node.body:
-                if isinstance(item, ast.FunctionDef):
-                    methods[item.name] = ast.unparse(item)
-            classes[node.name] = {"definition": ast.unparse(node), "methods": methods}
-        elif isinstance(node, (ast.If, ast.For, ast.While)):
-            other_blocks.append(ast.unparse(node))
-        else:
-            current_block.append(ast.unparse(node))
-
-    if current_block:
-        other_blocks.append("\n".join(current_block))
-
-    body = ParsedBody
-    body["functions"] = functions
-    body["classes"] = classes
-    body["other_blocks"] = other_blocks
-    return body
+# Directory to store the language libraries
+LANGUAGE_DIR = os.path.join(os.path.dirname(__file__), "tree_sitter_languages")
+
+
+def ensure_language_installed(language: str) -> None:
+    if not os.path.exists(LANGUAGE_DIR):
+        os.makedirs(LANGUAGE_DIR)
+
+    lang_file = os.path.join(LANGUAGE_DIR, f"{language}.so")
+    if not os.path.exists(lang_file):
+        repo_url = LANGUAGES[language]
+        repo_dir = os.path.join(LANGUAGE_DIR, f"tree-sitter-{language}")
+
+        if not os.path.exists(repo_dir):
+            subprocess.run(["git", "clone", repo_url, repo_dir], check=True)
+
+        subprocess.run(
+            ["bash", "-c", f"cd {repo_dir} && git submodule update --init"], check=True
+        )
+        Language.build_library(lang_file, [repo_dir])
+
+
+def get_parser(language: str) -> Parser:
+    ensure_language_installed(language)
+    parser = Parser()
+    lang_file = os.path.join(LANGUAGE_DIR, f"{language}.so")
+    lang = Language(lang_file, language)
+    parser.set_language(lang)
+    return parser
+
+
+def traverse_tree(node, code_bytes: bytes) -> Dict[str, Any]:
+    if node.type in [
+        "function_definition",
+        "function_declaration",
+        "arrow_function",
+        "method_definition",
+    ]:
+        return {
+            "type": "function",
+            "name": (
+                node.child_by_field_name("name").text.decode("utf8")
+                if node.child_by_field_name("name")
+                else "anonymous"
+            ),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type in ["class_definition", "class_declaration"]:
+        return {
+            "type": "class",
+            "name": node.child_by_field_name("name").text.decode("utf8"),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type in ["jsx_element", "jsx_self_closing_element"]:
+        return {
+            "type": "component",
+            "name": (
+                node.child_by_field_name("opening_element")
+                .child_by_field_name("name")
+                .text.decode("utf8")
+                if node.type == "jsx_element"
+                else node.child_by_field_name("name").text.decode("utf8")
+            ),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type == "impl_item":
+        return {
+            "type": "impl",
+            "name": node.child_by_field_name("type").text.decode("utf8"),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    else:
+        return None
 
 
-def chunk_javascript_code(code):
-    tree = esprima.parseModule(code, jsx=True, tolerant=True)
-    functions = {}
-    classes = {}
-    components = {}
-    hooks = {}
-    other_blocks = []
+def chunk_code(code: str, language: str) -> ParsedBody:
+    parser = get_parser(language)
+    tree = parser.parse(code.encode("utf8"))
 
-    def ast_to_source(node):
-        try:
-            return escodegen.generate(node)
-        except Exception:
-            return f"// Unable to generate code for {node.type}"
+    body: ParsedBody = {
+        "functions": {},
+        "classes": {},
+        "hooks": {},
+        "components": {},
+        "other_blocks": [],
+    }
+    code_bytes = code.encode("utf8")
 
     def process_node(node):
-        if node.type == "FunctionDeclaration":
-            if is_react_component(node):
-                components[node.id.name] = ast_to_source(node)
-            else:
-                functions[node.id.name] = ast_to_source(node)
-        elif node.type == "ClassDeclaration":
-            if is_react_component(node):
-                components[node.id.name] = ast_to_source(node)
-            else:
-                methods = {}
-                for item in node.body.body:
-                    if item.type == "MethodDefinition":
-                        methods[item.key.name] = ast_to_source(item)
-                classes[node.id.name] = {
-                    "definition": ast_to_source(node),
-                    "methods": methods,
-                }
-        elif node.type == "VariableDeclaration":
-            for decl in node.declarations:
-                if decl.init and decl.init.type == "ArrowFunctionExpression":
-                    if is_react_component(decl.init):
-                        components[decl.id.name] = ast_to_source(node)
-                    elif is_react_hook(decl.id.name):
-                        hooks[decl.id.name] = ast_to_source(node)
-                    else:
-                        functions[decl.id.name] = ast_to_source(node)
+        result = traverse_tree(node, code_bytes)
+        if result:
+            if result["type"] == "function":
+                if is_react_hook(result["name"]):
+                    body["hooks"][result["name"]] = result["code"]
+                elif is_react_component(result["code"]):
+                    body["components"][result["name"]] = result["code"]
                 else:
-                    other_blocks.append(ast_to_source(node))
-        elif node.type in [
-            "ImportDeclaration",
-            "ExportDefaultDeclaration",
-            "ExportNamedDeclaration",
-        ]:
-            other_blocks.append(ast_to_source(node))
+                    body["functions"][result["name"]] = result["code"]
+            elif result["type"] == "class":
+                if is_react_component(result["code"]):
+                    body["components"][result["name"]] = result["code"]
+                else:
+                    body["classes"][result["name"]] = result["code"]
+            elif result["type"] == "component":
+                body["components"][result["name"]] = result["code"]
+            elif result["type"] == "impl":
+                body["classes"][result["name"]] = result["code"]
         else:
-            other_blocks.append(ast_to_source(node))
-
-    def is_react_component(node):
-        # Check if the function/class is likely a React component
-        if node.type == "FunctionDeclaration" or node.type == "ArrowFunctionExpression":
-            body = node.body.body if node.body.type == "BlockStatement" else [node.body]
-            return any(
-                stmt.type == "ReturnStatement"
-                and stmt.argument
-                and stmt.argument.type == "JSXElement"
-                for stmt in body
-            )
-        elif node.type == "ClassDeclaration":
-            return any(
-                method.key.name == "render"
-                for method in node.body.body
-                if method.type == "MethodDefinition"
-            )
-        return False
-
-    def is_react_hook(name):
-        # Check if the function name starts with 'use'
-        return name.startswith("use") and name[3].isupper()
-
-    for node in tree.body:
-        process_node(node)
-
-    # return functions, classes, components, hooks, other_blocks
-    body = ParsedBody
-    body["functions"] = functions
-    body["classes"] = classes
-    body["other_blocks"] = other_blocks
-    body["components"] = components
-    body["hooks"] = hooks
+            for child in node.children:
+                process_node(child)
+
+    process_node(tree.root_node)
+
+    # Collect remaining code as other_blocks
+    collected_ranges = []
+    for section in body.values():
+        if isinstance(section, dict):
+            for code_block in section.values():
+                start = code.index(code_block)
+                collected_ranges.append((start, start + len(code_block)))
+
+    collected_ranges.sort()
+    last_end = 0
+    for start, end in collected_ranges:
+        if start > last_end:
+            body["other_blocks"].append(code[last_end:start].strip())
+        last_end = end
+    if last_end < len(code):
+        body["other_blocks"].append(code[last_end:].strip())
+
     return body
 
 
-def chunk_code(code, language):
-    if language.lower() == "python":
-        return chunk_python_code(code)
-    elif language.lower() in ["javascript", "js"]:
-        return chunk_javascript_code(code)
-    else:
-        raise ValueError("Unsupported language. Please use 'python' or 'javascript'.")
+def is_react_hook(name: str) -> bool:
+    return name.startswith("use") and len(name) > 3 and name[3].isupper()
+
+
+def is_react_component(code: str) -> bool:
+    return (
+        "React" in code or "jsx" in code.lower() or "tsx" in code.lower() or "<" in code
+    )
diff --git a/kaizen/retriever/feedback_system.py b/kaizen/retriever/feedback_system.py
new file mode 100644
index 00000000..8c47a1ec
--- /dev/null
+++ b/kaizen/retriever/feedback_system.py
@@ -0,0 +1,18 @@
+from typing import Dict, Any
+
+
+class AbstractionFeedback:
+    def __init__(self):
+        self.feedback_store: Dict[str, Dict[str, Any]] = {}
+
+    def add_feedback(
+        self, code_id: str, abstraction: str, rating: int, correction: str = None
+    ) -> None:
+        self.feedback_store[code_id] = {
+            "abstraction": abstraction,
+            "rating": rating,
+            "correction": correction,
+        }
+
+    def get_feedback(self, code_id: str) -> Dict[str, Any]:
+        return self.feedback_store.get(code_id, None)
diff --git a/kaizen/retriever/llama_index_retriever.py b/kaizen/retriever/llama_index_retriever.py
index 10c09206..86abec50 100644
--- a/kaizen/retriever/llama_index_retriever.py
+++ b/kaizen/retriever/llama_index_retriever.py
@@ -1,71 +1,182 @@
 import os
-from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
+import logging
+import openai
+from llama_index.core import (
+    SimpleDirectoryReader,
+    StorageContext,
+    VectorStoreIndex,
+    Document,
+)
 from llama_index.vector_stores.postgres import PGVectorStore
 from sqlalchemy import create_engine, text
 import ast
+from llama_index.core import VectorStoreIndex
+
 import networkx as nx
+from typing import List, Dict, Any
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import tiktoken
+from kaizen.llms.provider import LLMProvider
+from kaizen.retriever.code_chunker import chunk_code
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+# Set up OpenAI API key
+openai.api_key = os.environ.get("OPENAI_API_KEY")
+
+# Initialize tokenizer
+tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
 
 
 class RepositoryAnalyzer:
     def __init__(self):
-        self.index = None
+        logger.info("Initializing RepositoryAnalyzer")
         self.engine = create_engine(
-            f"postgresql://{os.environ['PG_USER']}:{os.environ['PG_PASSWORD']}@{os.environ['PG_HOST']}:{os.environ['PG_PORT']}/{os.environ['db_name']}"
+            f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}",
+            pool_size=10,
+            max_overflow=20,
         )
         self.graph = nx.DiGraph()
-
-    def load_index(self, folder_path):
-        documents = SimpleDirectoryReader(folder_path).load_data()
-
-        vector_store = PGVectorStore.from_params(
-            database=os.environ["db_name"],
-            host=os.environ["PG_HOST"],
-            password=os.environ["PG_PASSWORD"],
-            port=os.environ["PG_PORT"],
-            user=os.environ["PG_USER"],
+        self.vector_store = PGVectorStore.from_params(
+            database=os.environ["POSTGRES_DB"],
+            host=os.environ["POSTGRES_HOST"],
+            password=os.environ["POSTGRES_PASSWORD"],
+            port=os.environ["POSTGRES_PORT"],
+            user=os.environ["POSTGRES_USER"],
             table_name="embeddings",
-            embed_dim=512,  # openai embedding dimension
+            embed_dim=1536,  # OpenAI's text-embedding-ada-002 dimension
         )
-
-        storage_context = StorageContext.from_defaults(vector_store=vector_store)
-        self.index = VectorStoreIndex.from_documents(
-            documents, storage_context=storage_context, show_progress=True
+        self.provider = LLMProvider()
+        self.storage_context = StorageContext.from_defaults(
+            vector_store=self.vector_store
         )
+        logger.info("RepositoryAnalyzer initialized successfully")
 
-    def parse_repository(self, repo_path):
-        for root, dirs, files in os.walk(repo_path):
-            for file in files:
-                if file.endswith(".py"):
-                    file_path = os.path.join(root, file)
-                    self.parse_file(file_path)
-
-    def parse_file(self, file_path):
-        with open(file_path, "r") as file:
-            content = file.read()
-
-        tree = ast.parse(content)
-        for node in ast.walk(tree):
-            if isinstance(node, ast.FunctionDef):
-                self.process_function(node, file_path)
-
-    def process_function(self, node, file_path):
-        function_name = node.name
-        start_line = node.lineno
-        end_line = node.end_lineno
-
-        # Store function information in the database
-        self.store_function_in_db(function_name, file_path, start_line, end_line)
-
-        # Analyze function calls within the function
-        for sub_node in ast.walk(node):
-            if isinstance(sub_node, ast.Call):
-                if isinstance(sub_node.func, ast.Name):
-                    called_function = sub_node.func.id
-                    self.graph.add_edge(function_name, called_function)
-
-    def store_function_in_db(self, function_name, file_path, start_line, end_line):
-        with self.engine.connect() as connection:
-            # Insert into files table if not exists
+    def setup_repository(self, repo_path: str):
+        self.total_usage = self.provider.DEFAULT_USAGE
+        logger.info(f"Starting repository setup for: {repo_path}")
+        self.parse_repository(repo_path)
+        self.store_function_relationships()
+        logger.info("Repository setup completed successfully")
+
+    def parse_repository(self, repo_path: str):
+        logger.info(f"Parsing repository: {repo_path}")
+        with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
+            futures = []
+            for root, _, files in os.walk(repo_path):
+                for file in files:
+                    if file.endswith(
+                        (".py", ".js", ".ts", ".rs")
+                    ):  # Add more extensions as needed
+                        file_path = os.path.join(root, file)
+                        futures.append(executor.submit(self.parse_file, file_path))
+
+            for future in as_completed(futures):
+                try:
+                    future.result()
+                except Exception as e:
+                    logger.error(f"Error in parsing file: {str(e)}")
+        logger.info("Repository parsing completed")
+
+    def parse_file(self, file_path: str):
+        logger.debug(f"Parsing file: {file_path}")
+        try:
+            with open(file_path, "r", encoding="utf-8") as file:
+                content = file.read()
+
+            language = self.get_language_from_extension(file_path)
+            chunked_code = chunk_code(content, language)
+
+            for section, items in chunked_code.items():
+                if isinstance(items, dict):
+                    for name, code_info in items.items():
+                        self.process_code_block(code_info, file_path, section, name)
+                elif isinstance(items, list):
+                    for i, code_info in enumerate(items):
+                        self.process_code_block(
+                            code_info, file_path, section, f"{section}_{i}"
+                        )
+            logger.debug(f"Successfully parsed file: {file_path}")
+        except Exception as e:
+            logger.error(f"Error processing file {file_path}: {str(e)}")
+
+    @staticmethod
+    def get_language_from_extension(file_path: str) -> str:
+        ext = os.path.splitext(file_path)[1].lower()
+        return {
+            ".py": "python",
+            ".js": "javascript",
+            ".jsx": "javascript",
+            ".ts": "typescript",
+            ".tsx": "typescript",
+            ".rs": "rust",
+        }.get(ext, "unknown")
+
+    def process_code_block(
+        self, code_info: Dict[str, Any], file_path: str, section: str, name: str
+    ):
+        logger.debug(f"Processing code block: {section} - {name}")
+        code = code_info["code"]
+        language = self.get_language_from_extension(file_path)
+        abstraction, usage = self.generate_abstraction(code, language)
+        total_usage = self.provider.update_usage()
+
+        snippet_id = self.store_code_in_db(code, abstraction, file_path, section, name)
+        self.store_abstraction_and_embedding(snippet_id, abstraction)
+
+        if section == "functions":
+            self.analyze_function_calls(name, code)
+        logger.debug(f"Finished processing code block: {section} - {name}")
+
+    def generate_abstraction(
+        self, code_block: str, language: str, max_tokens: int = 300
+    ) -> str:
+        prompt = f"""Generate a concise yet comprehensive abstract description of the following {language} code block. 
+        Include information about:
+        1. The purpose or functionality of the code
+        2. Input parameters and return values (if applicable)
+        3. Any important algorithms or data structures used
+        4. Key dependencies or external libraries used
+        5. Any notable design patterns or architectural choices
+        6. Potential edge cases or error handling
+
+        Code:
+        ```{language}
+        {code_block}
+        ```
+        """
+
+        estimated_prompt_tokens = len(tokenizer.encode(prompt))
+        adjusted_max_tokens = min(max(150, estimated_prompt_tokens), 1000)
+
+        try:
+            abstraction, usage = self.provider.chat_completion(
+                model="small",
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an expert programmer tasked with generating comprehensive and accurate abstractions of code snippets.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=adjusted_max_tokens,
+                n=1,
+                temperature=0.5,
+            )
+            return abstraction
+
+        except Exception as e:
+            raise e
+
+    def store_code_in_db(
+        self, code: str, abstraction: str, file_path: str, section: str, name: str
+    ) -> int:
+        logger.debug(f"Storing code in DB: {file_path} - {section} - {name}")
+        with self.engine.begin() as connection:
             file_query = text(
                 """
                 INSERT INTO files (repo_id, file_path, file_name, file_ext, programming_language)
@@ -74,98 +185,107 @@ def store_function_in_db(self, function_name, file_path, start_line, end_line):
                 RETURNING file_id
             """
             )
-            file_result = connection.execute(
+            file_id = connection.execute(
                 file_query,
                 {
                     "repo_id": 1,  # Assuming repo_id is 1, adjust as needed
                     "file_path": file_path,
                     "file_name": os.path.basename(file_path),
-                    "file_ext": ".py",
-                    "programming_language": "Python",
+                    "file_ext": os.path.splitext(file_path)[1],
+                    "programming_language": self.get_language_from_extension(file_path),
                 },
-            )
-            file_id = file_result.fetchone()[0]
+            ).scalar_one()
 
-            # Insert into code_snippets table
             snippet_query = text(
                 """
-                INSERT INTO code_snippets (file_id, snippet_text, start_line, end_line, functionality)
-                VALUES (:file_id, :snippet_text, :start_line, :end_line, :functionality)
+                INSERT INTO code_snippets (file_id, snippet_text, functionality, context)
+                VALUES (:file_id, :snippet_text, :functionality, :context)
                 RETURNING snippet_id
             """
             )
-            snippet_result = connection.execute(
+            snippet_id = connection.execute(
                 snippet_query,
                 {
                     "file_id": file_id,
-                    "snippet_text": function_name,  # This should be the actual function code
-                    "start_line": start_line,
-                    "end_line": end_line,
-                    "functionality": f"Function: {function_name}",
+                    "snippet_text": code,
+                    "functionality": abstraction,
+                    "context": f"{section}: {name}",
                 },
-            )
-            snippet_id = snippet_result.fetchone()[0]
+            ).scalar_one()
 
-            # Insert into ast_nodes table
-            node_query = text(
-                """
-                INSERT INTO ast_nodes (file_id, node_type, start_line, end_line)
-                VALUES (:file_id, :node_type, :start_line, :end_line)
-                RETURNING node_id
-            """
-            )
-            node_result = connection.execute(
-                node_query,
-                {
-                    "file_id": file_id,
-                    "node_type": "FunctionDef",
-                    "start_line": start_line,
-                    "end_line": end_line,
-                },
-            )
-            node_id = node_result.fetchone()[0]
+        logger.debug(f"Code stored in DB with snippet_id: {snippet_id}")
+        return snippet_id
 
-            # Insert function name as a property
-            prop_query = text(
-                """
-                INSERT INTO node_properties (node_id, property_name, property_value)
-                VALUES (:node_id, :property_name, :property_value)
-            """
-            )
-            connection.execute(
-                prop_query,
-                {
-                    "node_id": node_id,
-                    "property_name": "function_name",
-                    "property_value": function_name,
-                },
-            )
+    def store_abstraction_and_embedding(self, snippet_id: int, abstraction: str):
+        logger.debug(f"Storing abstraction and embedding for snippet_id: {snippet_id}")
+
+        embedding = self.provider.get_text_embedding(abstraction)
+        doc = Document(text=abstraction, metadata={"snippet_id": snippet_id})
+        self.vector_store.add_documents([doc], embedding_vectors=[embedding])
+
+        logger.debug(f"Abstraction and embedding stored for snippet_id: {snippet_id}")
+
+    def analyze_function_calls(self, function_name: str, code: str):
+        logger.debug(f"Analyzing function calls for: {function_name}")
+        try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
+                    self.graph.add_edge(function_name, node.func.id)
+                    logger.debug(f"Added edge: {function_name} -> {node.func.id}")
+        except SyntaxError:
+            logger.error(f"Syntax error in function {function_name}")
 
     def store_function_relationships(self):
-        for caller, callee in self.graph.edges():
-            with self.engine.connect() as connection:
+        logger.info("Storing function relationships")
+        with self.engine.begin() as connection:
+            for caller, callee in self.graph.edges():
                 query = text(
                     """
                     INSERT INTO node_relationships (parent_node_id, child_node_id, relationship_type)
                     VALUES (
-                        (SELECT node_id FROM ast_nodes WHERE node_type = 'FunctionDef' AND node_id IN 
-                            (SELECT node_id FROM node_properties WHERE property_name = 'function_name' AND property_value = :caller)
-                        ),
-                        (SELECT node_id FROM ast_nodes WHERE node_type = 'FunctionDef' AND node_id IN 
-                            (SELECT node_id FROM node_properties WHERE property_name = 'function_name' AND property_value = :callee)
-                        ),
+                        (SELECT snippet_id FROM code_snippets WHERE context LIKE :caller),
+                        (SELECT snippet_id FROM code_snippets WHERE context LIKE :callee),
                         'calls'
                     )
+                    ON CONFLICT DO NOTHING
                 """
                 )
-                connection.execute(query, {"caller": caller, "callee": callee})
+                connection.execute(
+                    query, {"caller": f"%{caller}", "callee": f"%{callee}"}
+                )
+        logger.info("Function relationships stored successfully")
 
-    def query(self, query_text):
-        # Perform retrieval using the index
-        response = self.index.query(query_text)
-        return response
+    def query(self, query_text: str, num_results: int = 5) -> List[Dict[str, Any]]:
+        logger.info(f"Performing query: '{query_text}'")
 
-    def analyze_repository(self, repo_path):
-        self.parse_repository(repo_path)
-        self.store_function_relationships()
-        self.load_index(repo_path)
+        index = VectorStoreIndex.from_vector_store(self.vector_store)
+        
+        # Create a query engine
+        query_engine = index.as_query_engine(similarity_top_k=num_results)
+        
+        # Perform the query
+        response = query_engine.query(query_text)
+        
+        results = []
+        with self.engine.connect() as connection:
+            for node in response.source_nodes:
+                snippet_id = node.metadata["snippet_id"]
+                query = text("""
+                    SELECT cs.snippet_text, cs.functionality, f.file_path
+                    FROM code_snippets cs
+                    JOIN files f ON cs.file_id = f.file_id
+                    WHERE cs.snippet_id = :snippet_id
+                """)
+                result = connection.execute(query, {"snippet_id": snippet_id}).fetchone()
+                if result:
+                    results.append({
+                        "code": result[0],
+                        "abstraction": result[1],
+                        "file_path": result[2],
+                        "relevance_score": node.score if hasattr(node, 'score') else 1.0
+                    })
+        
+        sorted_results = sorted(results, key=lambda x: x["relevance_score"], reverse=True)
+        logger.info(f"Query completed. Found {len(sorted_results)} results.")
+        return sorted_results
diff --git a/kaizen/retriever/query_processor.py b/kaizen/retriever/query_processor.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/kaizen/retriever/result_processor.py b/kaizen/retriever/result_processor.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/kaizen/retriever/tree_sitter_utils.py b/kaizen/retriever/tree_sitter_utils.py
new file mode 100644
index 00000000..356e39a5
--- /dev/null
+++ b/kaizen/retriever/tree_sitter_utils.py
@@ -0,0 +1,107 @@
+import os
+from functools import lru_cache
+from tree_sitter import Language, Parser
+from typing import Dict, Any
+import logging
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Directory where the language libraries are stored
+LANGUAGE_DIR = "/app/tree_sitter_languages"
+
+class LanguageLoader:
+    @staticmethod
+    @lru_cache(maxsize=None)
+    def load_language(language: str) -> Language:
+        try:
+            lang_file = os.path.join(LANGUAGE_DIR, f"{language}.so")
+            if not os.path.exists(lang_file):
+                raise FileNotFoundError(f"Language file for {language} not found.")
+            return Language(lang_file, language)
+        except Exception as e:
+            logger.error(f"Failed to load language {language}: {str(e)}")
+            raise
+
+class ParserFactory:
+    @staticmethod
+    @lru_cache(maxsize=None)
+    def get_parser(language: str) -> Parser:
+        try:
+            parser = Parser()
+            lang = LanguageLoader.load_language(language)
+            parser.set_language(lang)
+            return parser
+        except Exception as e:
+            logger.error(f"Failed to create parser for {language}: {str(e)}")
+            raise
+
+def traverse_tree(node, code_bytes: bytes) -> Dict[str, Any]:
+    if node.type in [
+        "function_definition",
+        "function_declaration",
+        "arrow_function",
+        "method_definition",
+    ]:
+        return {
+            "type": "function",
+            "name": (
+                node.child_by_field_name("name").text.decode("utf8")
+                if node.child_by_field_name("name")
+                else "anonymous"
+            ),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type in ["class_definition", "class_declaration"]:
+        return {
+            "type": "class",
+            "name": node.child_by_field_name("name").text.decode("utf8"),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type in ["jsx_element", "jsx_self_closing_element"]:
+        return {
+            "type": "component",
+            "name": (
+                node.child_by_field_name("opening_element")
+                .child_by_field_name("name")
+                .text.decode("utf8")
+                if node.type == "jsx_element"
+                else node.child_by_field_name("name").text.decode("utf8")
+            ),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    elif node.type == "impl_item":
+        return {
+            "type": "impl",
+            "name": node.child_by_field_name("type").text.decode("utf8"),
+            "code": code_bytes[node.start_byte : node.end_byte].decode("utf8"),
+        }
+    else:
+        return None
+
+def parse_code(code: str, language: str) -> Dict[str, Any]:
+    try:
+        parser = ParserFactory.get_parser(language)
+        tree = parser.parse(bytes(code, "utf8"))
+        return traverse_tree(tree.root_node, code.encode("utf8"))
+    except Exception as e:
+        logger.error(f"Failed to parse {language} code: {str(e)}")
+        raise
+
+def check_language_files():
+    required_languages = ["python", "javascript", "typescript", "rust"]
+    missing_languages = []
+    for lang in required_languages:
+        try:
+            LanguageLoader.load_language(lang)
+        except FileNotFoundError:
+            missing_languages.append(lang)
+    
+    if missing_languages:
+        logger.warning(f"Missing language files for: {', '.join(missing_languages)}")
+    else:
+        logger.info("All required language files are present.")
+
+# Call this function at the start of your application
+check_language_files()
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 1adae5ba..a5bab61b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,3 +1,5 @@
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.3.4"
@@ -1037,109 +1039,6 @@ files = [
     {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
 ]
 
-[[package]]
-name = "ijson"
-version = "3.3.0"
-description = "Iterative JSON parser with standard Python iterator interfaces"
-optional = false
-python-versions = "*"
-files = [
-    {file = "ijson-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7f7a5250599c366369fbf3bc4e176f5daa28eb6bc7d6130d02462ed335361675"},
-    {file = "ijson-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f87a7e52f79059f9c58f6886c262061065eb6f7554a587be7ed3aa63e6b71b34"},
-    {file = "ijson-3.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b73b493af9e947caed75d329676b1b801d673b17481962823a3e55fe529c8b8b"},
-    {file = "ijson-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5576415f3d76290b160aa093ff968f8bf6de7d681e16e463a0134106b506f49"},
-    {file = "ijson-3.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e9ffe358d5fdd6b878a8a364e96e15ca7ca57b92a48f588378cef315a8b019e"},
-    {file = "ijson-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8643c255a25824ddd0895c59f2319c019e13e949dc37162f876c41a283361527"},
-    {file = "ijson-3.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:df3ab5e078cab19f7eaeef1d5f063103e1ebf8c26d059767b26a6a0ad8b250a3"},
-    {file = "ijson-3.3.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3dc1fb02c6ed0bae1b4bf96971258bf88aea72051b6e4cebae97cff7090c0607"},
-    {file = "ijson-3.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e9afd97339fc5a20f0542c971f90f3ca97e73d3050cdc488d540b63fae45329a"},
-    {file = "ijson-3.3.0-cp310-cp310-win32.whl", hash = "sha256:844c0d1c04c40fd1b60f148dc829d3f69b2de789d0ba239c35136efe9a386529"},
-    {file = "ijson-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:d654d045adafdcc6c100e8e911508a2eedbd2a1b5f93f930ba13ea67d7704ee9"},
-    {file = "ijson-3.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:501dce8eaa537e728aa35810656aa00460a2547dcb60937c8139f36ec344d7fc"},
-    {file = "ijson-3.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:658ba9cad0374d37b38c9893f4864f284cdcc7d32041f9808fba8c7bcaadf134"},
-    {file = "ijson-3.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2636cb8c0f1023ef16173f4b9a233bcdb1df11c400c603d5f299fac143ca8d70"},
-    {file = "ijson-3.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd174b90db68c3bcca273e9391934a25d76929d727dc75224bf244446b28b03b"},
-    {file = "ijson-3.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97a9aea46e2a8371c4cf5386d881de833ed782901ac9f67ebcb63bb3b7d115af"},
-    {file = "ijson-3.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c594c0abe69d9d6099f4ece17763d53072f65ba60b372d8ba6de8695ce6ee39e"},
-    {file = "ijson-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8e0ff16c224d9bfe4e9e6bd0395826096cda4a3ef51e6c301e1b61007ee2bd24"},
-    {file = "ijson-3.3.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0015354011303175eae7e2ef5136414e91de2298e5a2e9580ed100b728c07e51"},
-    {file = "ijson-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034642558afa57351a0ffe6de89e63907c4cf6849070cc10a3b2542dccda1afe"},
-    {file = "ijson-3.3.0-cp311-cp311-win32.whl", hash = "sha256:192e4b65495978b0bce0c78e859d14772e841724d3269fc1667dc6d2f53cc0ea"},
-    {file = "ijson-3.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:72e3488453754bdb45c878e31ce557ea87e1eb0f8b4fc610373da35e8074ce42"},
-    {file = "ijson-3.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:988e959f2f3d59ebd9c2962ae71b97c0df58323910d0b368cc190ad07429d1bb"},
-    {file = "ijson-3.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b2f73f0d0fce5300f23a1383d19b44d103bb113b57a69c36fd95b7c03099b181"},
-    {file = "ijson-3.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ee57a28c6bf523d7cb0513096e4eb4dac16cd935695049de7608ec110c2b751"},
-    {file = "ijson-3.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0155a8f079c688c2ccaea05de1ad69877995c547ba3d3612c1c336edc12a3a5"},
-    {file = "ijson-3.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ab00721304af1ae1afa4313ecfa1bf16b07f55ef91e4a5b93aeaa3e2bd7917c"},
-    {file = "ijson-3.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40ee3821ee90be0f0e95dcf9862d786a7439bd1113e370736bfdf197e9765bfb"},
-    {file = "ijson-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3b6987a0bc3e6d0f721b42c7a0198ef897ae50579547b0345f7f02486898f5"},
-    {file = "ijson-3.3.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:63afea5f2d50d931feb20dcc50954e23cef4127606cc0ecf7a27128ed9f9a9e6"},
-    {file = "ijson-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b5c3e285e0735fd8c5a26d177eca8b52512cdd8687ca86ec77a0c66e9c510182"},
-    {file = "ijson-3.3.0-cp312-cp312-win32.whl", hash = "sha256:907f3a8674e489abdcb0206723e5560a5cb1fa42470dcc637942d7b10f28b695"},
-    {file = "ijson-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:8f890d04ad33262d0c77ead53c85f13abfb82f2c8f078dfbf24b78f59534dfdd"},
-    {file = "ijson-3.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b9d85a02e77ee8ea6d9e3fd5d515bcc3d798d9c1ea54817e5feb97a9bc5d52fe"},
-    {file = "ijson-3.3.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6576cdc36d5a09b0c1a3d81e13a45d41a6763188f9eaae2da2839e8a4240bce"},
-    {file = "ijson-3.3.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5589225c2da4bb732c9c370c5961c39a6db72cf69fb2a28868a5413ed7f39e6"},
-    {file = "ijson-3.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad04cf38164d983e85f9cba2804566c0160b47086dcca4cf059f7e26c5ace8ca"},
-    {file = "ijson-3.3.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:a3b730ef664b2ef0e99dec01b6573b9b085c766400af363833e08ebc1e38eb2f"},
-    {file = "ijson-3.3.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:4690e3af7b134298055993fcbea161598d23b6d3ede11b12dca6815d82d101d5"},
-    {file = "ijson-3.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:aaa6bfc2180c31a45fac35d40e3312a3d09954638ce0b2e9424a88e24d262a13"},
-    {file = "ijson-3.3.0-cp36-cp36m-win32.whl", hash = "sha256:44367090a5a876809eb24943f31e470ba372aaa0d7396b92b953dda953a95d14"},
-    {file = "ijson-3.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7e2b3e9ca957153557d06c50a26abaf0d0d6c0ddf462271854c968277a6b5372"},
-    {file = "ijson-3.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:47c144117e5c0e2babb559bc8f3f76153863b8dd90b2d550c51dab5f4b84a87f"},
-    {file = "ijson-3.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ce02af5fbf9ba6abb70765e66930aedf73311c7d840478f1ccecac53fefbf3"},
-    {file = "ijson-3.3.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ac6c3eeed25e3e2cb9b379b48196413e40ac4e2239d910bb33e4e7f6c137745"},
-    {file = "ijson-3.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d92e339c69b585e7b1d857308ad3ca1636b899e4557897ccd91bb9e4a56c965b"},
-    {file = "ijson-3.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:8c85447569041939111b8c7dbf6f8fa7a0eb5b2c4aebb3c3bec0fb50d7025121"},
-    {file = "ijson-3.3.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:542c1e8fddf082159a5d759ee1412c73e944a9a2412077ed00b303ff796907dc"},
-    {file = "ijson-3.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:30cfea40936afb33b57d24ceaf60d0a2e3d5c1f2335ba2623f21d560737cc730"},
-    {file = "ijson-3.3.0-cp37-cp37m-win32.whl", hash = "sha256:6b661a959226ad0d255e49b77dba1d13782f028589a42dc3172398dd3814c797"},
-    {file = "ijson-3.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0b003501ee0301dbf07d1597482009295e16d647bb177ce52076c2d5e64113e0"},
-    {file = "ijson-3.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e8d8de44effe2dbd0d8f3eb9840344b2d5b4cc284a14eb8678aec31d1b6bea8"},
-    {file = "ijson-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9cd5c03c63ae06d4f876b9844c5898d0044c7940ff7460db9f4cd984ac7862b5"},
-    {file = "ijson-3.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04366e7e4a4078d410845e58a2987fd9c45e63df70773d7b6e87ceef771b51ee"},
-    {file = "ijson-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de7c1ddb80fa7a3ab045266dca169004b93f284756ad198306533b792774f10a"},
-    {file = "ijson-3.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8851584fb931cffc0caa395f6980525fd5116eab8f73ece9d95e6f9c2c326c4c"},
-    {file = "ijson-3.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdcfc88347fd981e53c33d832ce4d3e981a0d696b712fbcb45dcc1a43fe65c65"},
-    {file = "ijson-3.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3917b2b3d0dbbe3296505da52b3cb0befbaf76119b2edaff30bd448af20b5400"},
-    {file = "ijson-3.3.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:e10c14535abc7ddf3fd024aa36563cd8ab5d2bb6234a5d22c77c30e30fa4fb2b"},
-    {file = "ijson-3.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3aba5c4f97f4e2ce854b5591a8b0711ca3b0c64d1b253b04ea7b004b0a197ef6"},
-    {file = "ijson-3.3.0-cp38-cp38-win32.whl", hash = "sha256:b325f42e26659df1a0de66fdb5cde8dd48613da9c99c07d04e9fb9e254b7ee1c"},
-    {file = "ijson-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:ff835906f84451e143f31c4ce8ad73d83ef4476b944c2a2da91aec8b649570e1"},
-    {file = "ijson-3.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3c556f5553368dff690c11d0a1fb435d4ff1f84382d904ccc2dc53beb27ba62e"},
-    {file = "ijson-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e4396b55a364a03ff7e71a34828c3ed0c506814dd1f50e16ebed3fc447d5188e"},
-    {file = "ijson-3.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e6850ae33529d1e43791b30575070670070d5fe007c37f5d06aebc1dd152ab3f"},
-    {file = "ijson-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36aa56d68ea8def26778eb21576ae13f27b4a47263a7a2581ab2ef58b8de4451"},
-    {file = "ijson-3.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7ec759c4a0fc820ad5dc6a58e9c391e7b16edcb618056baedbedbb9ea3b1524"},
-    {file = "ijson-3.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b51bab2c4e545dde93cb6d6bb34bf63300b7cd06716f195dd92d9255df728331"},
-    {file = "ijson-3.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:92355f95a0e4da96d4c404aa3cff2ff033f9180a9515f813255e1526551298c1"},
-    {file = "ijson-3.3.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8795e88adff5aa3c248c1edce932db003d37a623b5787669ccf205c422b91e4a"},
-    {file = "ijson-3.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8f83f553f4cde6d3d4eaf58ec11c939c94a0ec545c5b287461cafb184f4b3a14"},
-    {file = "ijson-3.3.0-cp39-cp39-win32.whl", hash = "sha256:ead50635fb56577c07eff3e557dac39533e0fe603000684eea2af3ed1ad8f941"},
-    {file = "ijson-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:c8a9befb0c0369f0cf5c1b94178d0d78f66d9cebb9265b36be6e4f66236076b8"},
-    {file = "ijson-3.3.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2af323a8aec8a50fa9effa6d640691a30a9f8c4925bd5364a1ca97f1ac6b9b5c"},
-    {file = "ijson-3.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f64f01795119880023ba3ce43072283a393f0b90f52b66cc0ea1a89aa64a9ccb"},
-    {file = "ijson-3.3.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a716e05547a39b788deaf22725490855337fc36613288aa8ae1601dc8c525553"},
-    {file = "ijson-3.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473f5d921fadc135d1ad698e2697025045cd8ed7e5e842258295012d8a3bc702"},
-    {file = "ijson-3.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd26b396bc3a1e85f4acebeadbf627fa6117b97f4c10b177d5779577c6607744"},
-    {file = "ijson-3.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:25fd49031cdf5fd5f1fd21cb45259a64dad30b67e64f745cc8926af1c8c243d3"},
-    {file = "ijson-3.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b72178b1e565d06ab19319965022b36ef41bcea7ea153b32ec31194bec032a2"},
-    {file = "ijson-3.3.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d0b6b637d05dbdb29d0bfac2ed8425bb369e7af5271b0cc7cf8b801cb7360c2"},
-    {file = "ijson-3.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5378d0baa59ae422905c5f182ea0fd74fe7e52a23e3821067a7d58c8306b2191"},
-    {file = "ijson-3.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:99f5c8ab048ee4233cc4f2b461b205cbe01194f6201018174ac269bf09995749"},
-    {file = "ijson-3.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:45ff05de889f3dc3d37a59d02096948ce470699f2368b32113954818b21aa74a"},
-    {file = "ijson-3.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1efb521090dd6cefa7aafd120581947b29af1713c902ff54336b7c7130f04c47"},
-    {file = "ijson-3.3.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c727691858fd3a1c085d9980d12395517fcbbf02c69fbb22dede8ee03422da"},
-    {file = "ijson-3.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0420c24e50389bc251b43c8ed379ab3e3ba065ac8262d98beb6735ab14844460"},
-    {file = "ijson-3.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8fdf3721a2aa7d96577970f5604bd81f426969c1822d467f07b3d844fa2fecc7"},
-    {file = "ijson-3.3.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:891f95c036df1bc95309951940f8eea8537f102fa65715cdc5aae20b8523813b"},
-    {file = "ijson-3.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed1336a2a6e5c427f419da0154e775834abcbc8ddd703004108121c6dd9eba9d"},
-    {file = "ijson-3.3.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0c819f83e4f7b7f7463b2dc10d626a8be0c85fbc7b3db0edc098c2b16ac968e"},
-    {file = "ijson-3.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33afc25057377a6a43c892de34d229a86f89ea6c4ca3dd3db0dcd17becae0dbb"},
-    {file = "ijson-3.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7914d0cf083471856e9bc2001102a20f08e82311dfc8cf1a91aa422f9414a0d6"},
-    {file = "ijson-3.3.0.tar.gz", hash = "sha256:7f172e6ba1bee0d4c8f8ebd639577bfe429dee0f3f96775a067b8bae4492d8a0"},
-]
-
 [[package]]
 name = "importlib-metadata"
 version = "8.2.0"
@@ -1159,24 +1058,6 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke
 perf = ["ipython"]
 test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
 
-[[package]]
-name = "importlib-resources"
-version = "6.4.0"
-description = "Read resources from Python packages"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"},
-    {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
-]
-
-[package.dependencies]
-zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
-
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -1205,6 +1086,17 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "joblib"
+version = "1.4.2"
+description = "Lightweight pipelining with Python functions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
+    {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.23.0"
@@ -1218,9 +1110,7 @@ files = [
 
 [package.dependencies]
 attrs = ">=22.2.0"
-importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""}
 jsonschema-specifications = ">=2023.03.6"
-pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""}
 referencing = ">=0.28.4"
 rpds-py = ">=0.7.1"
 
@@ -1240,7 +1130,6 @@ files = [
 ]
 
 [package.dependencies]
-importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""}
 referencing = ">=0.31.0"
 
 [[package]]
@@ -1257,7 +1146,6 @@ files = [
 [package.dependencies]
 aiohttp = "*"
 click = "*"
-ijson = "*"
 importlib-metadata = ">=6.8.0"
 jinja2 = ">=3.1.2,<4.0.0"
 jsonschema = ">=4.22.0,<5.0.0"
@@ -1272,15 +1160,87 @@ tokenizers = "*"
 extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "pynacl (>=1.5.0,<2.0.0)", "resend (>=0.8.0,<0.9.0)"]
 proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"]
 
+[[package]]
+name = "llama-cloud"
+version = "0.0.13"
+description = ""
+optional = false
+python-versions = "<4,>=3.8"
+files = [
+    {file = "llama_cloud-0.0.13-py3-none-any.whl", hash = "sha256:b641450308b80c85eeae7ef9cb5a3b4a3b1823d5cde05b626ce33f7494ec6229"},
+    {file = "llama_cloud-0.0.13.tar.gz", hash = "sha256:0e3165a22f8df34a00d13f1f5739438ba4d620f2d8a9289df830078a39fe6f1f"},
+]
+
+[package.dependencies]
+httpx = ">=0.20.0"
+pydantic = ">=1.10"
+
+[[package]]
+name = "llama-index"
+version = "0.10.65"
+description = "Interface between LLMs and your data"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index-0.10.65-py3-none-any.whl", hash = "sha256:3e5c447fa2dc8a5da95dce47a5dfe2e1c6a3b4f40ff4be8688b38ee321ee425c"},
+    {file = "llama_index-0.10.65.tar.gz", hash = "sha256:1607c6d5f7ebe6cd016891796eff553c9fe85fde9cf8d211f6fd0f4cdbc7a88e"},
+]
+
+[package.dependencies]
+llama-index-agent-openai = ">=0.1.4,<0.3.0"
+llama-index-cli = ">=0.1.2,<0.2.0"
+llama-index-core = ">=0.10.65,<0.11.0"
+llama-index-embeddings-openai = ">=0.1.5,<0.2.0"
+llama-index-indices-managed-llama-cloud = ">=0.2.0"
+llama-index-legacy = ">=0.9.48,<0.10.0"
+llama-index-llms-openai = ">=0.1.27,<0.2.0"
+llama-index-multi-modal-llms-openai = ">=0.1.3,<0.2.0"
+llama-index-program-openai = ">=0.1.3,<0.2.0"
+llama-index-question-gen-openai = ">=0.1.2,<0.2.0"
+llama-index-readers-file = ">=0.1.4,<0.2.0"
+llama-index-readers-llama-parse = ">=0.1.2"
+
+[[package]]
+name = "llama-index-agent-openai"
+version = "0.2.9"
+description = "llama-index agent openai integration"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_agent_openai-0.2.9-py3-none-any.whl", hash = "sha256:d7f0fd4c87124781acd783be603871f8808b1a3969e876a9c96e2ed0844d46ac"},
+    {file = "llama_index_agent_openai-0.2.9.tar.gz", hash = "sha256:debe86da6d9d983db32b445ddca7c798ac140fe59573bafded73595b3995f3d5"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.41,<0.11.0"
+llama-index-llms-openai = ">=0.1.5,<0.2.0"
+openai = ">=1.14.0"
+
+[[package]]
+name = "llama-index-cli"
+version = "0.1.13"
+description = "llama-index cli"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_cli-0.1.13-py3-none-any.whl", hash = "sha256:5e05bc3ce55ee1bf6e5af7e87631a71d6b6cf8fc2af10cd3947b09b1bac6788d"},
+    {file = "llama_index_cli-0.1.13.tar.gz", hash = "sha256:86147ded4439fbab1d6c7c0d72e8f231d2935da9fdf5c9d3f0dde4f35d44aa59"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.11.post1,<0.11.0"
+llama-index-embeddings-openai = ">=0.1.1,<0.2.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
 [[package]]
 name = "llama-index-core"
-version = "0.10.47"
+version = "0.10.66"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llama_index_core-0.10.47-py3-none-any.whl", hash = "sha256:9d9f7d0f4861832386bb21326e67277c13aa7472af1dfdef53e8910309c9f569"},
-    {file = "llama_index_core-0.10.47.tar.gz", hash = "sha256:a76b6029552e281c2cbc67bcbb2639e4ee9e864d30df78bde24945ddb8ab6422"},
+    {file = "llama_index_core-0.10.66-py3-none-any.whl", hash = "sha256:0d4ffaea4a5f0bdc2243d7e71d5f6926a508737088aa5c0af658ea2deac98b4d"},
+    {file = "llama_index_core-0.10.66.tar.gz", hash = "sha256:70f5cc9da6ee1c550dfde0bd8ab12e77128cc308714958e2cafb7affbc3f5c87"},
 ]
 
 [package.dependencies]
@@ -1290,10 +1250,9 @@ deprecated = ">=1.2.9.3"
 dirtyjson = ">=1.0.8,<2.0.0"
 fsspec = ">=2023.5.0"
 httpx = "*"
-llamaindex-py-client = ">=0.1.18,<0.2.0"
 nest-asyncio = ">=1.5.8,<2.0.0"
 networkx = ">=3.0"
-nltk = ">=3.8.1,<4.0.0"
+nltk = ">=3.8.1"
 numpy = "<2.0.0"
 openai = ">=1.1.0"
 pandas = "*"
@@ -1322,19 +1281,120 @@ files = [
 [package.dependencies]
 llama-index-core = ">=0.10.1,<0.11.0"
 
+[[package]]
+name = "llama-index-indices-managed-llama-cloud"
+version = "0.2.7"
+description = "llama-index indices llama-cloud integration"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_indices_managed_llama_cloud-0.2.7-py3-none-any.whl", hash = "sha256:94335504eab2a6baf7361bbd8bda3ae20a68c7d0111587c9a0793440e9edff21"},
+    {file = "llama_index_indices_managed_llama_cloud-0.2.7.tar.gz", hash = "sha256:d7e9b4cc50214b3cfcd75ea63cacce4ee36092cb672c003f15fd23ba31c49ec0"},
+]
+
+[package.dependencies]
+llama-cloud = ">=0.0.11"
+llama-index-core = ">=0.10.48.post1,<0.11.0"
+
+[[package]]
+name = "llama-index-legacy"
+version = "0.9.48.post2"
+description = "Interface between LLMs and your data"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_legacy-0.9.48.post2-py3-none-any.whl", hash = "sha256:2581af680a4e577d4f0accd76e8286c5f1054f28a2fb0e8e5758f09ce5da0176"},
+    {file = "llama_index_legacy-0.9.48.post2.tar.gz", hash = "sha256:a4c1f10b4d19d005674195c449f4e859022c65c816dcba1a619ef5df922aa212"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.8.6,<4.0.0"
+dataclasses-json = "*"
+deprecated = ">=1.2.9.3"
+dirtyjson = ">=1.0.8,<2.0.0"
+fsspec = ">=2023.5.0"
+httpx = "*"
+nest-asyncio = ">=1.5.8,<2.0.0"
+networkx = ">=3.0"
+nltk = ">=3.8.1"
+numpy = "*"
+openai = ">=1.1.0"
+pandas = "*"
+requests = ">=2.31.0"
+SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]}
+tenacity = ">=8.2.0,<9.0.0"
+tiktoken = ">=0.3.3"
+typing-extensions = ">=4.5.0"
+typing-inspect = ">=0.8.0"
+
+[package.extras]
+gradientai = ["gradientai (>=1.4.0)"]
+html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"]
+langchain = ["langchain (>=0.0.303)"]
+local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"]
+postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"]
+query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"]
+
 [[package]]
 name = "llama-index-llms-openai"
-version = "0.1.22"
+version = "0.1.27"
 description = "llama-index llms openai integration"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llama_index_llms_openai-0.1.22-py3-none-any.whl", hash = "sha256:84a8c910671460ad724ed818192f209f7481e71bcc6528553ba7e66db2e14bcd"},
-    {file = "llama_index_llms_openai-0.1.22.tar.gz", hash = "sha256:729bf2ea7043517465e1d585089512b77d8b3ce92233a67c138d5d621061ed56"},
+    {file = "llama_index_llms_openai-0.1.27-py3-none-any.whl", hash = "sha256:8da0e90d4a558667d2b9cf1b3f577a4cb7723b7680ed6d22027b0baf9cd5999e"},
+    {file = "llama_index_llms_openai-0.1.27.tar.gz", hash = "sha256:37c2d1159b56607d3a807d90260ee25b4f002086d6251c7272afbc53f2514603"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.57,<0.11.0"
+
+[[package]]
+name = "llama-index-multi-modal-llms-openai"
+version = "0.1.9"
+description = "llama-index multi-modal-llms openai integration"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_multi_modal_llms_openai-0.1.9-py3-none-any.whl", hash = "sha256:614f40427a4671e72742780be8fda77297dbf2942519bffcb2c9de8696a9edff"},
+    {file = "llama_index_multi_modal_llms_openai-0.1.9.tar.gz", hash = "sha256:dbacf44d5c2cca07ca424eacd1337583002d70387a3c1868cf8ae743b1dbec4a"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+
+[[package]]
+name = "llama-index-program-openai"
+version = "0.1.7"
+description = "llama-index program openai integration"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_program_openai-0.1.7-py3-none-any.whl", hash = "sha256:33489b573c1050a3f583ff68fcbc4bcbd49f29e74f3e5baea08ab0d5f363403c"},
+    {file = "llama_index_program_openai-0.1.7.tar.gz", hash = "sha256:bf7eb61a073381714be5a049d93b40044dfe51bd4333bee539d1532b7407621f"},
 ]
 
 [package.dependencies]
-llama-index-core = ">=0.10.24,<0.11.0"
+llama-index-agent-openai = ">=0.1.1,<0.3.0"
+llama-index-core = ">=0.10.57,<0.11.0"
+llama-index-llms-openai = ">=0.1.1"
+
+[[package]]
+name = "llama-index-question-gen-openai"
+version = "0.1.3"
+description = "llama-index question_gen openai integration"
+optional = false
+python-versions = ">=3.8.1,<4.0"
+files = [
+    {file = "llama_index_question_gen_openai-0.1.3-py3-none-any.whl", hash = "sha256:1f83b49e8b2e665030d1ec8c54687d6985d9fa8426147b64e46628a9e489b302"},
+    {file = "llama_index_question_gen_openai-0.1.3.tar.gz", hash = "sha256:4486198117a45457d2e036ae60b93af58052893cc7d78fa9b6f47dd47b81e2e1"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+llama-index-llms-openai = ">=0.1.1,<0.2.0"
+llama-index-program-openai = ">=0.1.1,<0.2.0"
 
 [[package]]
 name = "llama-index-readers-file"
@@ -1356,6 +1416,21 @@ striprtf = ">=0.0.26,<0.0.27"
 [package.extras]
 pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"]
 
+[[package]]
+name = "llama-index-readers-llama-parse"
+version = "0.1.6"
+description = "llama-index readers llama-parse integration"
+optional = false
+python-versions = "<4.0,>=3.8.1"
+files = [
+    {file = "llama_index_readers_llama_parse-0.1.6-py3-none-any.whl", hash = "sha256:71d445a2357ce4c632e0fada7c913ac62790e77c062f12d916dd86378380ff1f"},
+    {file = "llama_index_readers_llama_parse-0.1.6.tar.gz", hash = "sha256:04f2dcfbb0fb87ce70890f5a2f4f89941d79be6a818b43738f053560e4b451cf"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.7,<0.11.0"
+llama-parse = ">=0.4.0"
+
 [[package]]
 name = "llama-index-vector-stores-postgres"
 version = "0.1.11"
@@ -1375,19 +1450,18 @@ psycopg2-binary = ">=2.9.9,<3.0.0"
 sqlalchemy = {version = ">=1.4.49,<2.1", extras = ["asyncio"]}
 
 [[package]]
-name = "llamaindex-py-client"
-version = "0.1.19"
-description = ""
+name = "llama-parse"
+version = "0.4.9"
+description = "Parse files into RAG-Optimized formats."
 optional = false
-python-versions = "<4,>=3.8"
+python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llamaindex_py_client-0.1.19-py3-none-any.whl", hash = "sha256:fd9416fd78b97209bf323bc3c7fab314499778563e7274f10853ad560563d10e"},
-    {file = "llamaindex_py_client-0.1.19.tar.gz", hash = "sha256:73f74792bb8c092bae6dc626627a09ac13a099fa8d10f8fcc83e17a2b332cca7"},
+    {file = "llama_parse-0.4.9-py3-none-any.whl", hash = "sha256:71974a57a73d642608cc406942bee4e7fc1a713fa410f51df67da509479ba544"},
+    {file = "llama_parse-0.4.9.tar.gz", hash = "sha256:657f8fa5f7d399f14c0454fc05cae6034da0373f191df6cfca17a1b4a704ef87"},
 ]
 
 [package.dependencies]
-httpx = ">=0.20.0"
-pydantic = ">=1.10"
+llama-index-core = ">=0.10.29"
 
 [[package]]
 name = "markupsafe"
@@ -1771,10 +1845,9 @@ files = [
 
 [package.dependencies]
 numpy = [
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
     {version = ">=1.23.2", markers = "python_version == \"3.11\""},
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
-
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -1926,17 +1999,6 @@ files = [
     {file = "pip-24.2.tar.gz", hash = "sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8"},
 ]
 
-[[package]]
-name = "pkgutil-resolve-name"
-version = "1.3.10"
-description = "Resolve a name to an object."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"},
-    {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"},
-]
-
 [[package]]
 name = "platformdirs"
 version = "4.2.2"
@@ -2707,6 +2769,17 @@ files = [
     {file = "rpds_py-0.19.1.tar.gz", hash = "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520"},
 ]
 
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -3071,6 +3144,48 @@ notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
 
+[[package]]
+name = "tree-sitter"
+version = "0.22.3"
+description = "Python bindings to the Tree-sitter parsing library"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "tree-sitter-0.22.3.tar.gz", hash = "sha256:6516bcef5d36e0365670b97c91a169c8b1aa82ea4b60946b879020820718ce3d"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d9a26dd80cf10763527483b02ba35a0b8d9168f324dbbce3f07860256c29bf15"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4bcbe0a7358628629d9ec8e5687477e12f7c6aae6943b0872afb7170db039b86"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfa45e6bf2542862ce987482fe212ef3153bd331d5bba5873b9f485f8923f65a"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4545b142da82f9668007180e0081583054682d0154cd6349796ac77dc8520d63"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4978d22fe2868ab9a91125f49bd576ce5f954cc887c19471e0c33e104f37ba71"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0ec593a69f8c4f1c81494147814d11b7fc6c903e5299e084ae7b89caf95cef84"},
+    {file = "tree_sitter-0.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:0f66b88b8e9993630613d594e845f3cf2695fef87d0ca1475437cb17eeb72dc5"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e627eb129421f63378e936b5d0e13b8befa6e7c5267a8a7621a397a84e8f1f7"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cfa2a9860bfb0404ae28a9cf056dab8f2eb7f1673d8cc9b3f7e21452daad0e0"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a66cc5f19635119a9d8325bcb00a58ed48427e3c3d307caf7c00d745ac83a5"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de16468ea22c910e67caa91c99be9d6eb73e97e5164480a890f678b22d32faca"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:98c697427f82abab6b39cfe2ade6547d844dd419fa8cfc89031bcdf7c10579b6"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:548aa34f15a29aef1fc8e85507f13e0678a54f1de16461f844d86179b19bb5f6"},
+    {file = "tree_sitter-0.22.3-cp311-cp311-win_amd64.whl", hash = "sha256:2fc0e1097fb86623b340141e80a0f2b7668b09d953501d91adc715a577e32c61"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7cb5c145fbd4bcc0cd4851dc4d0a6079a8e2f61257f8c0effc92434f6fb19b14"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4a592080db6b9472a886f4593b4705d02630721fdbe4a700085fe775fcab20e"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f36bf523763f05edf924126583ea997f905162046c0f184d6fd040cc1ccbf2c5"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e1193f27c25aab299f4fc154664122c7bfe80633b726bb457356d371479a5b"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:156df7e71a6c6b542ff29526cad6886a41115e42dc768c55101398d68325db54"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:82e1d467ce23dd2ecc37d4fb83965e891fc37b943639c517cd5acf54a2df0ff7"},
+    {file = "tree_sitter-0.22.3-cp312-cp312-win_amd64.whl", hash = "sha256:e541a0c08a04f229ba9479a8c441dd267fdaa3e5842ae70a744c178bcaf53fa3"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a85a1d0fdff21cc524a959b3277c311941a9b5b91a862e462c1b55470893884a"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f96c6acd2799bafa28543a267937eec6a3d9ccbdeb6e1d05858114d4cd882da9"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed2708aecd3a4c8d20a89350d3c89ac2f964985ee9117c39357cee3098a9498a"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2f99535aa4195b20fef18559defaabd9e12fe8ed8806c101d51820f240ca64"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:459a0f3bf8d6dbb9e9f651d67cee3a60f0b799fefd4a33f49a7e9501ada98e35"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4a51bfe99dcd8bbfb0fe95113f0197e6e540db3077abce77a058235beec747a3"},
+    {file = "tree_sitter-0.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:8d54ef562492493bf091cb3fd605cb7e60bf1d56634a94ab48075741d823e3a5"},
+]
+
+[package.extras]
+docs = ["sphinx (>=7.3,<8.0)", "sphinx-book-theme"]
+tests = ["tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json", "tree-sitter-python", "tree-sitter-rust"]
+
 [[package]]
 name = "typing-extensions"
 version = "4.12.2"
@@ -3082,6 +3197,32 @@ files = [
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
+[[package]]
+name = "typing-inspect"
+version = "0.9.0"
+description = "Runtime inspection utilities for typing module."
+optional = false
+python-versions = "*"
+files = [
+    {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
+    {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=0.3.0"
+typing-extensions = ">=3.7.4"
+
+[[package]]
+name = "tzdata"
+version = "2024.1"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+files = [
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.2"
@@ -3318,4 +3459,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9.0"
-content-hash = "27a73a66a7f9b6dbdf4dfe56a5e5c2409edc833a97def321b304d6b171997d9c"
+content-hash = "3047839778c62f1db3f6405778595f938edc258aca93d573dd6dd42011965adc"
diff --git a/pyproject.toml b/pyproject.toml
index 6a6d3b5b..be13cacc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,8 @@ esprima = "^4.0.1"
 escodegen = "^1.0.11"
 redis = "^5.0.7"
 tqdm = "^4.66.5"
+tree-sitter = "^0.22.3"
+llama-index = "^0.10.65"
 
 
 [build-system]