-
-
Notifications
You must be signed in to change notification settings - Fork 353
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Framework for adding context to LLM prompt (#993)
* context provider * split base and base command context providers + replacing prompt * comment * only replace prompt if context variable in template * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Run mypy on CI, fix or ignore typing issues (#987) * Run mypy on CI * Rename, add mypy to test deps * Fix typing jupyter-ai codebase (mostly) * Three more cases * update deepmerge version specifier --------- Co-authored-by: David L. Qiu <[email protected]> * context provider * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * mypy * black * modify backtick logic * allow for spaces in filepath * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor * fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix test * refactor autocomplete to remove hardcoded '/' and '@' prefix * modify context prompt template Co-authored-by: david qiu <[email protected]> * refactor * docstrings + refactor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * mypy * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add context providers to help * remove _examples.py and remove @learned from defaults * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * make find_commands unoverridable --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michał Krassowski <[email protected]> Co-authored-by: David L. Qiu <[email protected]>
- Loading branch information
1 parent
fcb2d71
commit 6e426ab
Showing
14 changed files
with
942 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .base import ( | ||
BaseCommandContextProvider, | ||
ContextCommand, | ||
ContextProviderException, | ||
find_commands, | ||
) | ||
from .file import FileContextProvider |
53 changes: 53 additions & 0 deletions
53
packages/jupyter-ai/jupyter_ai/context_providers/_learned.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Currently unused as it is duplicating the functionality of the /ask command. | ||
# TODO: Rename "learned" to something better. | ||
from typing import List | ||
|
||
from jupyter_ai.chat_handlers.learn import Retriever | ||
from jupyter_ai.models import HumanChatMessage | ||
|
||
from .base import BaseCommandContextProvider, ContextCommand | ||
from .file import FileContextProvider | ||
|
||
FILE_CHUNK_TEMPLATE = """ | ||
Snippet from file: {filepath} | ||
``` | ||
{content} | ||
``` | ||
""".strip() | ||
|
||
|
||
class LearnedContextProvider(BaseCommandContextProvider): | ||
id = "learned" | ||
help = "Include content indexed from `/learn`" | ||
remove_from_prompt = True | ||
header = "Following are snippets from potentially relevant files:" | ||
|
||
def __init__(self, **kwargs): | ||
super().__init__(**kwargs) | ||
self.retriever = Retriever(learn_chat_handler=self.chat_handlers["/learn"]) | ||
|
||
async def _make_context_prompt( | ||
self, message: HumanChatMessage, commands: List[ContextCommand] | ||
) -> str: | ||
if not self.retriever: | ||
return "" | ||
query = self._clean_prompt(message.body) | ||
docs = await self.retriever.ainvoke(query) | ||
excluded = self._get_repeated_files(message) | ||
context = "\n\n".join( | ||
[ | ||
FILE_CHUNK_TEMPLATE.format( | ||
filepath=d.metadata["path"], content=d.page_content | ||
) | ||
for d in docs | ||
if d.metadata["path"] not in excluded and d.page_content | ||
] | ||
) | ||
return self.header + "\n" + context | ||
|
||
def _get_repeated_files(self, message: HumanChatMessage) -> List[str]: | ||
# don't include files that are already provided by the file context provider | ||
file_context_provider = self.context_providers.get("file") | ||
if isinstance(file_context_provider, FileContextProvider): | ||
return file_context_provider.get_filepaths(message) | ||
return [] |
Oops, something went wrong.