diff --git a/README.md b/README.md index 6069f82..7e8febf 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,26 @@ -# MemOnto 🧠 +# memonto 🧠

- logo + logo

-`memonto` (_memory + ontology_) adds memory to AI agents based on custom defined ontology. Define your own [RDF](https://www.w3.org/RDF/) ontology with [`rdflib`](https://github.com/RDFLib/rdflib) then have `memonto` automatically extract information that maps onto that ontology into a memory graph. The memories in the memory graph can be queried directly with `SPARQL` queries or contextually summarized. +

+ + memonto-pypi + + + memonto-downloads + + + memonto-license + +

+ +`memonto` (_memory + ontology_) adds memory to AI agents based on your custom defined ontology. +- Define your own [RDF](https://www.w3.org/RDF/) ontology with [`rdflib`](https://github.com/RDFLib/rdflib). +- `memonto` automatically extracts information that maps onto that ontology into a memory graph (triple store). +- Memory data can be queried directly with `SPARQL` returning a list of matching triples (subject > predicate > object). +- Memories can also be contextually summarized with the addition of a vector store. ``` ┌─────────────────────────────┐ ┌──────────────────────┐ ┌─────────────────────────────────┐ @@ -91,7 +107,7 @@ memonto.configure(config) ### Triple Store Mode -A triple store enables the persistent storage of memory data. Currently supports Apache Jena Fuseki as a triple store. +A triple store enables the persistent storage of memory data. Currently supports Apache Jena Fuseki as a triple store. To configure a triple store, add `triple_store` to the top level of your `config` dictionary. **Install Apache Jena Fuseki** 1. Download Apache Jena Fuseki [here](https://jena.apache.org/download/index.cgi#apache-jena-fuseki). @@ -113,54 +129,54 @@ config = { "connection_url": "http://localhost:8080/dataset_name", }, }, - "model": { - "provider": "openai", - "config": { - "model": "gpt-4o", - "api_key": "api-key", - }, - } } - -memonto = Memonto( - ontology=g, - namespaces={"hist": HIST}, -) -memonto.configure(config) ``` ### Triple + Vector Stores Mode -A vector store enables contextual retrieval of memory data, it must be used in conjunction with a triple store. Currently supports Chroma as a vector store. +A vector store enables contextual retrieval of memory data, it must be used in conjunction with a triple store. Currently supports Chroma as a vector store. To configure a vector store, add `vector_store` to the top level of your `config` dictionary. + +**Configure Local Vector Store** ```python config = { - "triple_store": { - "provider": "apache_jena", + "vector_store": { + "provider": "chroma", "config": { - "connection_url": "http://localhost:8080/dataset_name", + "mode": "remote", + "path": ".local", }, }, +} +``` +**Configure Remote Vector Store** +```python +config = { "vector_store": { "provider": "chroma", "config": { - "mode": "local", - "path": ".local", + "mode": "remote", + "auth": "basic", + "host": "localhost", + "port": "8080" + "token": "bearer_token" }, }, - "model": { - "provider": "openai", +} +``` +```python +config = { + "vector_store": { + "provider": "chroma", "config": { - "model": "gpt-4o", - "api_key": "api-key", + "mode": "remote", + "auth": "token", + "host": "localhost", + "port": "8080" + "username": "admin" + "passport": "admin1" }, - } + }, } - -memonto = Memonto( - ontology=g, - namespaces={"hist": HIST}, -) -memonto.configure(config) ``` ## 🧰 Usage diff --git a/memonto/core/render.py b/memonto/core/render.py deleted file mode 100644 index 8fc8dcc..0000000 --- a/memonto/core/render.py +++ /dev/null @@ -1,51 +0,0 @@ -import graphviz -from rdflib import Graph - -from memonto.utils.rdf import is_rdf_schema, sanitize_label - - -def generate_image(g: Graph, path: str) -> None: - dot = graphviz.Digraph() - - for s, p, o in g: - if is_rdf_schema(p): - continue - - s_label = sanitize_label(str(s)) - p_label = sanitize_label(str(p)) - o_label = sanitize_label(str(o)) - - dot.node(s_label, s_label) - dot.node(o_label, o_label) - dot.edge(s_label, o_label, label=p_label) - - dot.render(path, format="png") - - return f"{path}.png" - - -def generate_text(g: Graph) -> str: - text_g = "" - - for s, p, o in g: - if is_rdf_schema(p): - continue - - text_g += f"({str(s)}) -> [{str(p)}] -> ({str(o)})" - - return text_g - - -def _render(g: Graph, format: str, path: str) -> str: - if format == "turtle": - return g.serialize(format="turtle") - elif format == "json": - return g.serialize(format="json-ld") - elif format == "triples": - return g.serialize(format="nt") - elif format == "text": - return generate_text(g) - elif format == "image": - return generate_image(g=g, path=path) - else: - raise ValueError(f"Unsupported type '{type}'.") diff --git a/memonto/core/retain.py b/memonto/core/retain.py index 9a72008..2d5e8a6 100644 --- a/memonto/core/retain.py +++ b/memonto/core/retain.py @@ -4,6 +4,7 @@ from memonto.stores.triple.base_store import TripleStoreModel from memonto.stores.vector.base_store import VectorStoreModel from memonto.utils.logger import logger +from memonto.utils.rdf import _render def run_script( @@ -110,3 +111,6 @@ def _retain( triple_store.save(ontology=ontology, data=data, id=id) if vector_store: vector_store.save(g=data, id=id) + + # _render(g=data, format="image") + data.remove((None, None, None)) diff --git a/memonto/memonto.py b/memonto/memonto.py index e840a14..dc78718 100644 --- a/memonto/memonto.py +++ b/memonto/memonto.py @@ -1,7 +1,7 @@ import asyncio from pydantic import BaseModel, ConfigDict, Field, model_validator from rdflib import Graph, Namespace, URIRef -from typing import Optional, Union +from typing import Optional from memonto.core.configure import _configure from memonto.core.init import init @@ -9,7 +9,6 @@ from memonto.core.retrieve import _retrieve from memonto.core.recall import _recall from memonto.core.remember import _remember -from memonto.core.render import _render from memonto.core.retain import _retain from memonto.llms.base_llm import LLMModel from memonto.stores.triple.base_store import TripleStoreModel @@ -183,26 +182,3 @@ def remember(self) -> None: triple_store=self.triple_store, id=self.id, ) - - def _render( - self, - format: str = "turtle", - path: str = None, - ) -> Union[str, dict]: - """ - Return a text representation of the entire currently stored memory. - - :param format: The format in which to render the graph. Supported formats are: - - "turtle": Return the graph in Turtle format. - - "json": Return the graph in JSON-LD format. - - "text": Return the graph in text format. - - "image": Return the graph as a png image. - :param path: The path to save the image if format is "image". - - :return: A text representation of the memory. - - "turtle" format returns a string in Turtle format. - - "json" format returns a dictionary in JSON-LD format. - - "text" format returns a string in text format. - - "image" format returns a string with the path to the png image. - """ - return _render(g=self.data, format=format, path=path) diff --git a/memonto/prompts/commit_to_memory.prompt b/memonto/prompts/commit_to_memory.prompt index 2a218b5..ee4513c 100644 --- a/memonto/prompts/commit_to_memory.prompt +++ b/memonto/prompts/commit_to_memory.prompt @@ -12,9 +12,10 @@ ${user_message} Analyze the user message to find AS MUCH relevant information AS POSSIBLE that could fit onto the above ontology then generate the Python code while adhering to these rules: - First find all the information in the user message that maps onto the above ontology. -- Then create the script that will add them to graph `data` and which namespaces they best fit under. -- NEVER generate code that initializes rdflib graph, namespaces, classes, properties, etc. +- Then apply only the existing namespaces to the new information. +- Finally create the script that will add them to graph `data`. +- NEVER generate code that initializes new graphs, namespaces, classes, properties, etc. - GENERATE Python code to add the triples with the relevant information assuming rdflib Graph `data` and the newly added namespaces already exists. +- GENERATE all necessary rdflib and rdflib.namespace imports for the code to run. - If there are no relevant information then RETURN a print an empty string and nothing else. -- Your response should include all necessary rdflib and rdflib.namespace imports. - Please generate the code without using ``` or any other code formatting symbols. Return only plain text. \ No newline at end of file diff --git a/memonto/prompts/commit_to_memory_error_handling.prompt b/memonto/prompts/commit_to_memory_error_handling.prompt index 0e28fb7..52ced28 100644 --- a/memonto/prompts/commit_to_memory_error_handling.prompt +++ b/memonto/prompts/commit_to_memory_error_handling.prompt @@ -21,8 +21,8 @@ ${ontology} ``` Fix the error and return a new Python script adhering to these rules: -- NEVER generate code that initializes rdflib graph, namespaces, classes, properties, etc. +- NEVER generate code that initializes new graphs, namespaces, classes, properties, etc. - GENERATE Python code to add the triples with the relevant information assuming rdflib Graph `data` and the newly added namespaces already exists. +- GENERATE all necessary rdflib and rdflib.namespace imports for the code to run. - If there are no relevant information then RETURN a print an empty string and nothing else. -- Your response should include all necessary rdflib and rdflib.namespace imports. - Please generate the code without using ``` or any other code formatting symbols. Return only plain text. \ No newline at end of file diff --git a/memonto/stores/vector/chroma.py b/memonto/stores/vector/chroma.py index c906981..c57f7d7 100644 --- a/memonto/stores/vector/chroma.py +++ b/memonto/stores/vector/chroma.py @@ -6,6 +6,7 @@ from typing import Literal from memonto.stores.vector.base_store import VectorStoreModel +from memonto.utils.logger import logger from memonto.utils.rdf import is_rdf_schema, remove_namespace @@ -51,6 +52,10 @@ def init(self) -> "Chroma": def save(self, g: Graph, id: str = None) -> None: collection = self.client.get_or_create_collection(id or "default") + documents = [] + metadatas = [] + ids = [] + for s, p, o in g: if is_rdf_schema(p): continue @@ -59,25 +64,33 @@ def save(self, g: Graph, id: str = None) -> None: _p = remove_namespace(str(p)) _o = remove_namespace(str(o)) - edge = f"{_s} {_p} {_o}" - - collection.add( - documents=edge, - metadatas={ - "triple": json.dumps({"s": str(s), "p": str(p), "o": str(o)}) - }, - ids=f"{s}-{p}-{o}", + documents.append(f"{_s} {_p} {_o}") + metadatas.append( + {"triple": json.dumps({"s": str(s), "p": str(p), "o": str(o)})} ) + ids.append(f"{s}-{p}-{o}") + + if documents: + try: + collection.add(documents=documents, metadatas=metadatas, ids=ids) + except Exception as e: + logger.error(f"Chroma Save\n{e}\n") def search(self, message: str, id: str = None, k: int = 3) -> list[dict]: collection = self.client.get_collection(id or "default") - matched = collection.query( - query_texts=[message], - n_results=k, - ) + try: + matched = collection.query( + query_texts=[message], + n_results=k, + ) + except Exception as e: + logger.error(f"Chroma Search\n{e}\n") return [json.loads(t.get("triple", "{}")) for t in matched["metadatas"][0]] def delete(self, id: str) -> None: - self.client.delete_collection(id) + try: + self.client.delete_collection(id) + except Exception as e: + logger.error(f"Chroma Delete\n{e}\n") diff --git a/memonto/utils/rdf.py b/memonto/utils/rdf.py index 6519939..8d51758 100644 --- a/memonto/utils/rdf.py +++ b/memonto/utils/rdf.py @@ -1,5 +1,9 @@ +import datetime +import graphviz +import os from rdflib import Graph from rdflib.namespace import RDF, RDFS, OWL +from typing import Union def is_rdf_schema(p) -> Graph: @@ -12,3 +16,76 @@ def sanitize_label(label: str) -> str: def remove_namespace(c: str) -> str: return c.split("/")[-1].split("#")[-1].split(":")[-1] + + +def generate_image(g: Graph, path: str = None) -> None: + if not path: + current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + current_directory = os.getcwd() + local_directory = os.path.join(current_directory, ".local") + path = os.path.join(local_directory, f"data-graph-{current_time}") + + dot = graphviz.Digraph() + + for s, p, o in g: + if is_rdf_schema(p): + continue + + s_label = sanitize_label(str(s)) + p_label = sanitize_label(str(p)) + o_label = sanitize_label(str(o)) + + dot.node(s_label, s_label) + dot.node(o_label, o_label) + dot.edge(s_label, o_label, label=p_label) + + dot.render(path, format="png") + + return f"{path}.png" + + +def generate_text(g: Graph) -> str: + text_g = "" + + for s, p, o in g: + if is_rdf_schema(p): + continue + + text_g += f"({str(s)}) -> [{str(p)}] -> ({str(o)})" + + return text_g + + +def _render( + g: Graph, + format: str = "turtle", + path: str = None, +) -> Union[str, dict]: + """ + Return a text representation of the entire currently stored memory. + + :param format: The format in which to render the graph. Supported formats are: + - "turtle": Return the graph in Turtle format. + - "json": Return the graph in JSON-LD format. + - "text": Return the graph in text format. + - "image": Return the graph as a png image. + :param path: The path to save the image if format is "image". + + :return: A text representation of the memory. + - "turtle" format returns a string in Turtle format. + - "json" format returns a dictionary in JSON-LD format. + - "text" format returns a string in text format. + - "image" format returns a string with the path to the png image. + """ + if format == "turtle": + return g.serialize(format="turtle") + elif format == "json": + return g.serialize(format="json-ld") + elif format == "triples": + return g.serialize(format="nt") + elif format == "text": + return generate_text(g) + elif format == "image": + return generate_image(g=g, path=path) + else: + raise ValueError(f"Unsupported type '{type}'.")