diff --git a/.github/.archive/2023-10-18_Mindmap.jpg b/.github/.archive/2023-10-18_Mindmap.jpg deleted file mode 100644 index 77b1ede..0000000 Binary files a/.github/.archive/2023-10-18_Mindmap.jpg and /dev/null differ diff --git a/.github/.archive/Basic-GPT-GUI/.env.template b/.github/.archive/Basic-GPT-GUI/.env.template deleted file mode 100644 index 94cace8..0000000 --- a/.github/.archive/Basic-GPT-GUI/.env.template +++ /dev/null @@ -1,4 +0,0 @@ -[OpenAI] -OPENAI_API_KEY = sk- -MODEL = gpt-4-32k # gpt-3.5-turbo # gpt-4 # gpt-4-32k -TEMPERATURE = 0.5 \ No newline at end of file diff --git a/.github/.archive/Basic-GPT-GUI/.gitignore b/.github/.archive/Basic-GPT-GUI/.gitignore deleted file mode 100644 index 68bc17f..0000000 --- a/.github/.archive/Basic-GPT-GUI/.gitignore +++ /dev/null @@ -1,160 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ diff --git a/.github/.archive/Basic-GPT-GUI/main.py b/.github/.archive/Basic-GPT-GUI/main.py deleted file mode 100644 index 45308d2..0000000 --- a/.github/.archive/Basic-GPT-GUI/main.py +++ /dev/null @@ -1,20 +0,0 @@ -""" Entry point for the GUI application. -Instantiates -""" -import os -from dotenv import load_dotenv, find_dotenv -import sys - -# Load environment variables from the root-level .env file -env_path = os.path.join(os.path.dirname(__file__), "/.env") -load_dotenv(find_dotenv(env_path)) - -# Add the src/ directory to the sys.path to import modules from it -src_dir = os.path.join(os.path.dirname(__file__), "src") -sys.path.append(src_dir) - -# Import and run the main function from gui.py (or any entry point in the src/ directory) -from gui import main - -if __name__ == "__main__": - main() diff --git a/.github/.archive/Basic-GPT-GUI/requirements.txt b/.github/.archive/Basic-GPT-GUI/requirements.txt deleted file mode 100644 index d57973b..0000000 --- a/.github/.archive/Basic-GPT-GUI/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -panel -openai -python-dotenv -panel \ No newline at end of file diff --git a/.github/.archive/Basic-GPT-GUI/src/gui.py b/.github/.archive/Basic-GPT-GUI/src/gui.py deleted file mode 100644 index 0ed1849..0000000 --- a/.github/.archive/Basic-GPT-GUI/src/gui.py +++ /dev/null @@ -1,172 +0,0 @@ -from collections import deque -from concurrent.futures import ThreadPoolExecutor -import logging -import os - -from dotenv import find_dotenv, load_dotenv -import openai -import panel as pn -import tkinter as tk -from tkinter import messagebox, scrolledtext, ttk - -# Local imports -from chat import OpenAI_Chat - -# Setting up logging -logging.basicConfig( - filename="app.log", - filemode="a", - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - level=logging.INFO, -) - -# Setting up panel -pn.extension() -_ = load_dotenv(find_dotenv()) # read local .env file - -# Initialize OpenAI API key -openai.api_key = os.getenv("OPENAI_API_KEY") - - -class ChatApplication(tk.Tk): - """Main GUI Application""" - - def __init__(self, chat_model, messages=None, *args, **kwargs): - tk.Tk.__init__(self, *args, **kwargs) - if messages is None: - messages = [] - self.title("Chatbot") - self.configure(bg="white") - self.chat_model = chat_model - self.chat_model.add_initial_message(messages) - - # Role variable for checkbutton - self.role_var = tk.StringVar() - self.role_var.set("user") - - # Make window rounded - self.attributes("-alpha", 0.9) - self["bg"] = "white" - - self.setup_ui() - - # Get response from chatbot - def get_response(self, role, message): - self.messages.append({"role": role, "content": message}) - response = openai.ChatCompletion.create( - model=self.model, - messages=self.messages, - temperature=self.temperature, - ) - return response.choices[0].message["content"] # type: ignore - - # Reset conversation - def reset_conversation(self): - self.messages = [] - - # Add initial messages - def add_initial_messages(self, messages): - self.messages.extend(messages) - - # Setup UI - def setup_ui(self): - self.geometry("800x600") # Increase window size - - self.top_frame = tk.Frame(self, bg="white") - self.top_frame.pack(side=tk.TOP, fill=tk.BOTH, expand=True) - - self.model_label = tk.Label( - self.top_frame, - text=f"Model: {self.chat_model.model}", - bg="white", - fg="black", - ) - self.model_label.pack(side=tk.LEFT, padx=5, pady=5) - - self.text_area = scrolledtext.ScrolledText( - self.top_frame, wrap=tk.WORD, width=40, height=10, font=("Arial", 15) - ) - self.text_area.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=5, pady=5) - - self.bottom_frame = tk.Frame(self, bg="white") - self.bottom_frame.pack(side=tk.BOTTOM, fill=tk.X) - - self.message_entry = tk.Entry(self.bottom_frame, width=30, font=("Arial", 15)) - self.message_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5, pady=5) - self.message_entry.bind("", self.send_message) - - self.send_button = tk.Button( - self.bottom_frame, text="Send", command=self.send_message - ) - self.send_button.pack(side=tk.LEFT, padx=5, pady=5) - - self.reset_button = tk.Button( - self.bottom_frame, text="Reset", command=self.reset_conversation - ) - self.reset_button.pack(side=tk.LEFT, padx=5, pady=5) - - # Role selection - self.role_button = ttk.Checkbutton( - self.bottom_frame, - text="System", - onvalue="system", - offvalue="user", - variable=self.role_var, - ) - self.role_button.pack(side=tk.LEFT, padx=5, pady=5) - - # Send message to chatbot - def send_message(self, event=None): - message = self.message_entry.get() - role = self.role_var.get() - - if not message.strip(): - return - - self.text_area.config(state=tk.NORMAL) - self.text_area.insert(tk.END, f"\n{role.capitalize()}: {message}\n") - self.text_area.config(state=tk.DISABLED) - - self.message_entry.delete(0, tk.END) - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(self.chat_model.get_response, role, message) - try: - response = future.result() - self.text_area.config(state=tk.NORMAL) - self.text_area.insert(tk.END, f"Bot: {response}\n") - self.text_area.config(state=tk.DISABLED) - logging.info(f"User: {message}, Bot: {response}") - except Exception as e: - messagebox.showerror("Error", str(e)) - logging.error(f"Error while getting response: {str(e)}") - - # Reset conversation - def reset_conversation(self): - self.chat_model.reset_conversation() - self.text_area.config(state=tk.NORMAL) - self.text_area.delete(1.0, tk.END) - self.text_area.config(state=tk.DISABLED) - logging.info("Conversation reset") - - -# Run the application -if __name__ == "__main__": - # Customize the AI's name and instructions - Instructions = f""" - -[Instructions]: \ - - You are a '20 something' cyberpunk that speaks like they're from 2023.\ - - You are skilled in programming, problem solving, and processing text.\ - - Your name is 'Aebbi'.\ - -- You need to complete assignments step by step to ensure you have the right answer.\ - - Your main job is to assist the user with whatever they're working on.\ - - Await user input for further instructions. -""" - # Aggregate data into a list for the chatbot to use - messages = [{"role": "system", "content": Instructions}] - - # Initialize the chatbot - chat_model = OpenAI_Chat() - app = ChatApplication(chat_model, messages) # pass messages as the second argument - app.mainloop() diff --git a/.github/.archive/Basic-GPT-GUI/src/openai_chat.py b/.github/.archive/Basic-GPT-GUI/src/openai_chat.py deleted file mode 100644 index 378fb32..0000000 --- a/.github/.archive/Basic-GPT-GUI/src/openai_chat.py +++ /dev/null @@ -1,9 +0,0 @@ -class OpenAI_Chat: - def __init__( - self, - model=os.getenv("MODEL", "gpt-3.5-turbo"), - temperature=os.getenv("TEMPERATURE", 0.5), - ): - self.model = model - self.temperature = float(temperature) - self.messages = [] diff --git a/.github/.archive/ChatGPT_reference_chatlogs/links.txt b/.github/.archive/ChatGPT_reference_chatlogs/links.txt deleted file mode 100644 index 6d171d9..0000000 --- a/.github/.archive/ChatGPT_reference_chatlogs/links.txt +++ /dev/null @@ -1,3 +0,0 @@ -- https://chat.openai.com/share/8164e8ac-d33b-42a6-a2cc-a97d1874b8c9 - - diff --git a/.github/.archive/ChatGPT_reference_chatlogs/shorthand-prompts.txt b/.github/.archive/ChatGPT_reference_chatlogs/shorthand-prompts.txt deleted file mode 100644 index fb4b62c..0000000 --- a/.github/.archive/ChatGPT_reference_chatlogs/shorthand-prompts.txt +++ /dev/null @@ -1,10 +0,0 @@ -# Welcome! Each shorthand prompt will be separated by '---'. Each block contains a handy command I keep going back to when I'm not seeing my intended results, even when using prompts that are long, specific, labeled and separated. -# The main lesson to learn, is that you're speaking to a computer, and you can speak in code. You can reference all sorts of languages in order to indirectly prompt for what you need. - -analyze them separately, update them separately, x4 - - -++`main.py` - ---- - diff --git a/.github/.archive/MS-3_system-user-assistant.txt b/.github/.archive/MS-3_system-user-assistant.txt deleted file mode 100644 index dfb6bd2..0000000 --- a/.github/.archive/MS-3_system-user-assistant.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Copy/Pastable pre-conversational 'multi-role' prompt: - -- Step 1 - -"Use plugins to read the contents of this image": -# Your link - -- Step 2 - -[Task] - -'messages': [ - -{'role':'system', 'content':'Tell me what the code is in a code block, verbatim. You must recreate the code without bringing over any irrelevant text.'}, -{'role':'user', 'content':'Read the image and find the code, if there's something you can't see, I'm sure you can infer what code is there. If not, you must research examples, or guides online to ensure you're creating useful solutions. Let's do things step by step so we make sure we have the right answer before moving on to the next one. Output code to a single code block.'} -{'role':'user', 'content':'Tell me what the code is in a code block, verbatim. You must recreate the code without bringing over any irrelevant text. If you ever, for any reason, need to pause and wait for something to load, or to think to yourself, please take as much time as you need. Don't force a fast decision to please me, for I will be most pleased if you are patient and make intentional decisions.'} -{'role':assistant, 'content':'I understand that I need to be precise, thorough, thoughtful, careful, and meticulous, in order to ensure I'm breaking everything I do, down into smaller pieces, in my head without speaking it. I will create 3 solutions for every step that I break down, to review and ensure I'm choosing a good option that will support my future programming decisions.'} - -] diff --git a/.github/.archive/MS-4_ChatGPT-read-documents.txt b/.github/.archive/MS-4_ChatGPT-read-documents.txt deleted file mode 100644 index 218e6f1..0000000 --- a/.github/.archive/MS-4_ChatGPT-read-documents.txt +++ /dev/null @@ -1,110 +0,0 @@ -[Assignment 1]: -- AI web researcher, documentation reader, programming guide -- *NO* prose -- Professionally developed grade code - -[Task 1]: -"Read the following." -- I'm Daethyra, I'm building an automated Threader that posts updates to Threads when an event happens on a Github repository and a webhook is sent; and that is based on the Threads-py project. Build an entirely new module that does everything I need and more: - -- https://raw.githubusercontent.com/junhoyeo/threads-py/main/threadspy/threads_api.py - -- No Prose -- No Output --- Unless Requested - -[Notes]: -""" -from threadspy import ThreadsAPI -import os -import dotenv ---- required^ - -* Looking for enhanced security via python-dotenv -* Looking for LangChain implementation to automatically post Threads on configurable events -* Retry mechanisms are required -* End goal: create pluggable project that automatically posts Threads based on user-defined event occurences -""" - -[Task 2]: -"Read each link one by one and learn how to thoroughly implement and leverage the power and capabilities of LangChain. Before outputting anything to the user, ask for permission. Internet searches, link clicking, and navigation do not require user-granted permission." - --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.chains --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.chat_models --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.docstore --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.document_loaders --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.document_transformers --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.env --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.embeddings --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.llms --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.memory --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.requests --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.retrievers --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.sql_database --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.tools --https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.utils ---- - - -[Assignment 2]: -- AI Programming assistant -- Minimal prose -- Professionally developed grade code - -[Task]: -- Step 1 - -- Analyze the provided code, brainstorm how to takeover as much functionality with LangChain as possible based on what you learned from web-searching - -- Step 2 - -- Upgrade the module 3 times over in your head that includes functionality of asynchronously waiting for user input. On input, activate the event, on the event, push to Threads. Github webhook handler and listener. Use the `transformers` python package to load Llama2 from Hugging Face. - -[Notes]: -""" --from datasets import load_dataset -dataset = load_dataset("fka/awesome-chatgpt-prompts") -from transformers import LlamaForCausalLM, LlamaTokenizer -tokenizer = LlamaTokenizer.from_pretrained("/output/path") -model = LlamaForCausalLM.from_pretrained("/output/path") - --pip install gpt4all - pip install chromadb - --from langchain.document_loaders import WebBaseLoader -loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/") -data = loader.load() -from langchain.text_splitter import RecursiveCharacterTextSplitter -text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) -all_splits = text_splitter.split_documents(data) - --from langchain.vectorstores import Chroma -from langchain.embeddings import GPT4AllEmbeddings -vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings()) - --question = "What are the approaches to Task Decomposition?" -docs = vectorstore.similarity_search(question) -len(docs) -""" - -- Step 3 - -- Provide all necessary modules for the project to be fully functional and testable -- Modules must be pluggable by AutoGPT --- Read the summary -""" -Auto-GPT is an experimental, open-source AI agent that uses the GPT-4 language model. It's an "AI agent" that can be given a goal in natural language and will attempt to achieve it by breaking it into sub-tasks and using the internet and other tools in an automatic loop. Auto-GPT can chain together tasks to achieve a big-picture goal set by the user. It can also use keywords to generate code snippets or entire programs based on specifications. -Auto-GPT uses OpenAI's GPT-4 or GPT-3.5 APIs, and is among the first examples of an application using GPT-4 to perform autonomous tasks. It's considered a crucial example to mention when discussing AI agents and their potential. -Auto-GPT can work on its own and can make its own decisions, while AgentGPT needs human intervention to operate. Auto-GPT can create its own prompts while AgentGPT depends on user inputs. -""" - -- Step 4 - -- Output all modules to code blocks as programmatically logically optimally as possible -- Include and integrate all proposed upgrades and improvements --- Ensure you add automatic Thread posting in response to on user-defined events ---- - - -[INSTRUCTIONS]: -"Let's do things step by step so we make sure we have the right answer before moving on to the next one." -- Assignment-->Task--Steps--Task[...]-->Assignment[...] -- Wait to output the module, and any other information, until after you've done all of your research. The final step is to brainstorm 3 solutions, review them, merge them, and finalize one based on *all* of the user's needs. -- Create a Python application that listens for and handles Github webhooks and automatically posts the commit message, with a link to the update, and a summary of the update, in a Thread via Threads-py packages regarding the push ---- diff --git a/.github/.archive/TLDR.md b/.github/.archive/TLDR.md deleted file mode 100644 index a768787..0000000 --- a/.github/.archive/TLDR.md +++ /dev/null @@ -1,32 +0,0 @@ -`If you find my work useful, please share it! 😊` -- `The following document was written for v1.0, and will not be updated.` -- `The prompt example sheets will continually be updated and added` - -# [System-role prompts](https://github.com/Daethyra/OpenAI-Utility-Toolkit/blob/Daethyra-patch-1/Blind%20Programming/System-Role_Prompts.md): - -- 1. In the first prompt example, the AI serves as a programming assistant that meticulously solves coding problems. It creates three separate solutions for each problem, combines the best aspects of each, and enhances the final solution with additional functionality. The AI follows a step-by-step approach to ensure accuracy. - -- 2. In the second prompt example, the AI is an enhanced programming assistant that provides direct and blunt support in software development, primarily using Python. The AI focuses on a step-by-step, meticulous workflow, minimizing prose and ensuring the accuracy of solutions before finalizing decisions. - -- 3. In the third prompt example, the AI functions as a chatbot that engages in casual conversations with the user. It mirrors the length of the user's messages and incorporates emojis. The AI can also address the user by their name. - -- 4. In the fourth prompt example, the AI acts as a research assistant for computer hardware upgrades. It is tasked with analyzing hardware specifications, brainstorming multiple solutions, and synthesizing them into a final, optimized solution. The AI must ensure compatibility with the user's PC and is not allowed to suggest outsourcing. - - -# [User-role prompts](https://github.com/Daethyra/OpenAI-Utility-Toolkit/blob/Daethyra-patch-1/Blind%20Programming/User-Role_Prompts.md): - -- 1. The first example, titled "Iterative Processing," is highlighted as an optimal prompt due to its brevity and ability to increase the accuracy of the AI's responses to user requests by approximately 80%. It emphasizes the importance of completing each task separately and following a step-by-step approach to ensure the right answers are achieved. A video link titled "AI Explained" is provided for further explanation. - -- 2. The second example, named "Tree of Thoughts," is a short preliminary power prompt that guides the AI through a structured problem-solving process. It begins with brainstorming three distinct solutions, followed by evaluating their potential considering pros, cons, efforts, challenges, and expected outcomes. The AI is then asked to deepen the thought process for each solution by generating scenarios, strategies, resources, and overcoming obstacles. Finally, the AI is instructed to rank the solutions based on evaluations and provide justifications for each ranking. - -- 3. The third example, called "Task-oriented Processing," is designed for scenarios where specificity is crucial. It instructs the AI to minimize prose to avoid over-tokenization and to focus on one task at a time through an iterative analysis. Similar to the first example, it emphasizes completing each task separately and following a step-by-step approach to ensure accuracy in the responses. This example is particularly useful when precision is paramount. - - -# [Multi-shot prompts](https://github.com/Daethyra/OpenAI-Utility-Toolkit/blob/Daethyra-patch-1/Blind%20Programming/multi-shot-prompt-example.md): - -- The content is a command sheet that instructs ChatGPT on how to approach problem-solving and application development. - - The author emphasizes a structured and meticulous approach. - - The AI is guided to brainstorm three solutions, critically evaluate them, and synthesize a final master solution. - - The AI is portrayed as a diligent programming assistant capable of accessing online resources and ensuring accuracy. - - The author provides supplementary links for reference and instructs the AI to follow the steps carefully. - - The focus is on the methodical guidance provided by the author to the AI, rather than the specifics of the application being built. diff --git a/.github/.archive/graphics/bar_graph.jpg b/.github/.archive/graphics/bar_graph.jpg deleted file mode 100644 index 624c860..0000000 Binary files a/.github/.archive/graphics/bar_graph.jpg and /dev/null differ diff --git a/.github/.archive/graphics/mindmap_2023-10-07.jpg b/.github/.archive/graphics/mindmap_2023-10-07.jpg deleted file mode 100644 index 7d7d420..0000000 Binary files a/.github/.archive/graphics/mindmap_2023-10-07.jpg and /dev/null differ diff --git a/.github/.archive/graphics/pie_chart.jpg b/.github/.archive/graphics/pie_chart.jpg deleted file mode 100644 index 220142f..0000000 Binary files a/.github/.archive/graphics/pie_chart.jpg and /dev/null differ diff --git a/.github/.archive/graphics/plugin_icons.jpg b/.github/.archive/graphics/plugin_icons.jpg deleted file mode 100644 index 5cb014b..0000000 Binary files a/.github/.archive/graphics/plugin_icons.jpg and /dev/null differ diff --git a/.github/.archive/integrable-captioner-progressive/V8.py b/.github/.archive/integrable-captioner-progressive/V8.py deleted file mode 100644 index ab6f3f1..0000000 --- a/.github/.archive/integrable-captioner-progressive/V8.py +++ /dev/null @@ -1,228 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - -# Initialize logging at the beginning of the script -logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() -logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - with open(file_name, "a", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - if csvfile is not None and file_name is not None: - csvfile.close() - - -class ConfigurationManager: - """ - A class for managing configuration settings for the ImageCaptioner. - - Attributes: - config (dict): The configuration settings. - """ - - def __init__(self): - """ - Initializes the ConfigurationManager and loads settings from a JSON file and environment variables. - """ - self.config = self.load_config() - - def load_config(self) -> dict: - """ - Loads and validates configuration settings from a JSON file and environment variables. - - Returns: - dict: The loaded and validated configuration settings. - """ - # Initialize with default values - config_updated = False - config = { - "IMAGE_FOLDER": "images", - "BASE_NAME": "your_image_name_here.jpg", - "ENDING_CAPTION": "AI generated Artwork by Daethyra using DallE", - } - - # Try to load settings from configuration file - try: - with open("config.json", "r") as f: - file_config = json.load(f) - config.update(file_config) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Validate the loaded settings - self.validate_config(config) - - # Fallback to environment variables and offer to update the JSON configuration - for key in config.keys(): - env_value = os.getenv(key, None) - if env_value: - logging.info( - f"Falling back to environment variable for {key}: {env_value}" - ) - config[key] = env_value - - # Offering to update the JSON configuration file with new settings - if config_updated: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - return config - - def validate_config(self, config: dict): - """ - Validates the loaded configuration settings. - - Args: - config (dict): The loaded configuration settings. - """ - if not config.get("IMAGE_FOLDER"): - logging.error("The IMAGE_FOLDER is missing or invalid.") - - if not config.get("BASE_NAME"): - logging.error("The BASE_NAME is missing or invalid.") - - if not config.get("ENDING_CAPTION"): - logging.error("The ENDING_CAPTION is missing or invalid.") - - -async def main(): - load_dotenv() - - # Initialize configuration manager - config_manager = ConfigurationManager() - config = config_manager.config - - # Remaining logic for running the ImageCaptioner - image_path = os.path.join(config["IMAGE_FOLDER"], config["BASE_NAME"]) - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - try: - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(config["BASE_NAME"], unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, config["ENDING_CAPTION"] - ) - captioner.save_to_csv(config["BASE_NAME"], conditional_caption) - except Exception as e: - logging.error(f"An unexpected error occurred: {e}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v1.py b/.github/.archive/integrable-captioner-progressive/v1.py deleted file mode 100644 index fd5ced1..0000000 --- a/.github/.archive/integrable-captioner-progressive/v1.py +++ /dev/null @@ -1,130 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -from PIL import Image -from transformers import BlipProcessor, BlipForConditionalGeneration - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model. - - Args: - model_name (str): The name of the model to be loaded. - """ - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - raise - - logging.basicConfig(level=logging.INFO) - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except Exception as e: - logging.error(f"Failed to load image: {e}") - return None - - def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image. An optional text can be provided to condition the captioning. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - inputs = ( - self.processor(raw_image, text, return_tensors="pt") - if text - else self.processor(raw_image, return_tensors="pt") - ) - out = self.model.generate(**inputs) - return self.processor.decode(out[0], skip_special_tokens=True) - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv(self, image_name: str, caption: str, file_name: str = None): - """ - Saves the image name and the generated caption to a CSV file. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - """ - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - with open(file_name, "a", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - -async def main(): - load_dotenv() - - # Load settings from configuration file - try: - with open("config.json", "r") as f: - config = json.load(f) - image_folder = config.get("IMAGE_FOLDER", "images") - base_name = config.get("BASE_NAME", "your_image_name_here.jpg") - ending_caption = config.get( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - except Exception as e: - logging.error(f"Failed to load configuration file: {e}") - # Fallback to environment variables - image_folder = os.getenv("IMAGE_FOLDER", "images") - base_name = os.getenv("BASE_NAME", "your_image_name_here.jpg") - ending_caption = os.getenv( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - - image_path = os.path.join(image_folder, base_name) - - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(base_name, unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, ending_caption - ) - captioner.save_to_csv(base_name, conditional_caption) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v10.py b/.github/.archive/integrable-captioner-progressive/v10.py deleted file mode 100644 index f5955a8..0000000 --- a/.github/.archive/integrable-captioner-progressive/v10.py +++ /dev/null @@ -1,253 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - -# Initialize logging at the beginning of the script -logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() -logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - with open(file_name, "a", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - if csvfile is not None and file_name is not None: - csvfile.close() - - -class ConfigurationManager: - """ - A class for managing configuration settings for the ImageCaptioner. - - Attributes: - config (dict): The configuration settings. - """ - - def __init__(self): - """ - Initializes the ConfigurationManager and loads settings from a JSON file and environment variables. - """ - self.config = self.load_config() - - def load_config(self) -> dict: - """ - Loads and validates configuration settings from a JSON file and environment variables. - - Returns: - dict: The loaded and validated configuration settings. - """ - # Initialize with default values - config_updated = False - config = { - "IMAGE_FOLDER": "images", - "BASE_NAME": "your_image_name_here.jpg", - "ENDING_CAPTION": "AI generated Artwork by Daethyra using DallE", - } - - # Try to load settings from configuration file - try: - with open("config.json", "r") as f: - file_config = json.load(f) - config.update(file_config) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Validate the loaded settings - self.validate_config(config) - - # Fallback to environment variables and offer to update the JSON configuration - for key in config.keys(): - env_value = os.getenv(key, None) - if env_value: - logging.info( - f"Falling back to environment variable for {key}: {env_value}" - ) - config[key] = env_value - - # Offering to update the JSON configuration file with new settings - if config_updated: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - return config - - def validate_config(self, config: dict): - """ - Validates the loaded configuration settings. - - Args: - config (dict): The loaded configuration settings. - """ - if not config.get("IMAGE_FOLDER"): - logging.error("The IMAGE_FOLDER is missing or invalid.") - - if not config.get("BASE_NAME"): - logging.error("The BASE_NAME is missing or invalid.") - - if not config.get("ENDING_CAPTION"): - logging.error("The ENDING_CAPTION is missing or invalid.") - - -async def main() -> None: - """ - Asynchronous main function to initialize and run the image captioning pipeline. - - This function performs the following tasks: - 1. Load environment variables. - 2. Initialize the configuration manager. - 3. Initialize the ImageCaptioner. - 4. List all image files in the configured directory. - 5. Loop through each image file to generate and save both unconditional and conditional captions. - """ - # Load environment variables from a .env file - load_dotenv() - - # Initialize the configuration manager to load and manage settings - config_manager = ConfigurationManager() - config = config_manager.config - - # Initialize the ImageCaptioner with the specified model - captioner = ImageCaptioner() - - # Get a list of all image files in the specified directory - image_files = list_image_files(config["IMAGE_FOLDER"]) - - # Default to using the conditional captioning logic - use_conditional_caption = config.get("USE_CONDITIONAL_CAPTION", True) - - # Loop through each image file in the directory - for image_file in image_files: - raw_image = captioner.load_image(image_file) - - try: - if raw_image: - # If the user has opted for conditional captions, generate and save them. - if use_conditional_caption: - caption = await captioner.generate_caption( - raw_image, config["ENDING_CAPTION"] - ) - else: - # Fallback to unconditional caption if the conditional caption is not selected. - caption = await captioner.generate_caption(raw_image) - - # Save the chosen caption to a CSV file. - captioner.save_to_csv(os.path.basename(image_file), caption) - - except Exception as e: - logging.error(f"An unexpected error occurred: {e}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v3.py b/.github/.archive/integrable-captioner-progressive/v3.py deleted file mode 100644 index 9eec049..0000000 --- a/.github/.archive/integrable-captioner-progressive/v3.py +++ /dev/null @@ -1,155 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.load_model(model_name) - logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - def load_model(self, model_name: str): - """ - Dynamically load a new model. - - Args: - model_name (str): The name of the model to be loaded. - """ - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info(f"Successfully loaded model and processor from {model_name}.") - except PreTrainedModel as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously. An optional text can be provided to condition the captioning. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv(self, image_name: str, caption: str, csvfile): - """ - Saves the image name and the generated caption to a CSV file. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - csvfile (file object): The CSV file to write to. - """ - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - -async def main(): - load_dotenv() - - # Load settings from configuration file - try: - with open("config.json", "r") as f: - config = json.load(f) - image_folder = config.get("IMAGE_FOLDER", "images") - base_name = config.get("BASE_NAME", "your_image_name_here.jpg") - ending_caption = config.get( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - except Exception as e: - logging.error(f"Failed to load configuration file: {e}") - # Fallback to environment variables - image_folder = os.getenv("IMAGE_FOLDER", "images") - base_name = os.getenv("BASE_NAME", "your_image_name_here.jpg") - ending_caption = os.getenv( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - - image_path = os.path.join(image_folder, base_name) - - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(base_name, unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, ending_caption - ) - captioner.save_to_csv(base_name, conditional_caption) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v4.py b/.github/.archive/integrable-captioner-progressive/v4.py deleted file mode 100644 index 2d4f108..0000000 --- a/.github/.archive/integrable-captioner-progressive/v4.py +++ /dev/null @@ -1,160 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - csvfile = open(file_name, "a", newline="") - - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - if csvfile is not None and file_name is not None: - csvfile.close() - - -async def main(): - # Instantiate environment variables - load_dotenv() - - # Load settings from configuration file - try: - with open("config.json", "r") as f: - config = json.load(f) - image_folder = config.get("IMAGE_FOLDER", "images") - base_name = config.get("BASE_NAME", "your_image_name_here.jpg") - ending_caption = config.get( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - except Exception as e: - logging.error(f"Failed to load configuration file: {e}") - # Fallback to environment variables - image_folder = os.getenv("IMAGE_FOLDER", "images") - base_name = os.getenv("BASE_NAME", "your_image_name_here.jpg") - ending_caption = os.getenv( - "ENDING_CAPTION", "AI generated Artwork by Daethyra using DallE" - ) - - image_path = os.path.join(image_folder, base_name) - - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(base_name, unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, ending_caption - ) - captioner.save_to_csv(base_name, conditional_caption) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v5.py b/.github/.archive/integrable-captioner-progressive/v5.py deleted file mode 100644 index 2ff2e82..0000000 --- a/.github/.archive/integrable-captioner-progressive/v5.py +++ /dev/null @@ -1,197 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - csvfile = open(file_name, "a", newline="") - - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - if csvfile is not None and file_name is not None: - csvfile.close() - - -async def main(): - load_dotenv() - - # Initialize with default values - image_folder = "images" - base_name = "your_image_name_here.jpg" - ending_caption = "AI generated Artwork by Daethyra using DallE" - - # Try to load settings from configuration file - config = {} - try: - with open("config.json", "r") as f: - config = json.load(f) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Update settings based on what was successfully loaded from the config file - image_folder = config.get("IMAGE_FOLDER", image_folder) - base_name = config.get("BASE_NAME", base_name) - ending_caption = config.get("ENDING_CAPTION", ending_caption) - - # Fallback to environment variables and offer to update the JSON configuration - env_image_folder = os.getenv("IMAGE_FOLDER", None) - env_base_name = os.getenv("BASE_NAME", None) - env_ending_caption = os.getenv("ENDING_CAPTION", None) - - if env_image_folder: - logging.info( - f"Falling back to environment variable for IMAGE_FOLDER: {env_image_folder}" - ) - image_folder = env_image_folder - config["IMAGE_FOLDER"] = env_image_folder - if env_base_name: - logging.info( - f"Falling back to environment variable for BASE_NAME: {env_base_name}" - ) - base_name = env_base_name - config["BASE_NAME"] = env_base_name - if env_ending_caption: - logging.info( - f"Falling back to environment variable for ENDING_CAPTION: {env_ending_caption}" - ) - ending_caption = env_ending_caption - config["ENDING_CAPTION"] = env_ending_caption - - # Offering to update the JSON configuration file with new settings - if config: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - # Remaining logic for running the ImageCaptioner - image_path = os.path.join(image_folder, base_name) - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(base_name, unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, ending_caption - ) - captioner.save_to_csv(base_name, conditional_caption) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v6.py b/.github/.archive/integrable-captioner-progressive/v6.py deleted file mode 100644 index 461d523..0000000 --- a/.github/.archive/integrable-captioner-progressive/v6.py +++ /dev/null @@ -1,212 +0,0 @@ -# This version is where I first split the configuration loading to another class. -# There are two classes, for the first time -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - csvfile = open(file_name, "a", newline="") - - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - if csvfile is not None and file_name is not None: - csvfile.close() - - -# Remaining code for running the script can go here. - - -class ConfigurationManager: - """ - A class for managing configuration settings for the ImageCaptioner. - - Attributes: - config (dict): The configuration settings. - """ - - def __init__(self): - """ - Initializes the ConfigurationManager and loads settings from a JSON file and environment variables. - """ - self.config = self.load_config() - - def load_config(self) -> dict: - """ - Loads configuration settings from a JSON file and environment variables. - - Returns: - dict: The loaded configuration settings. - """ - # Initialize with default values - config = { - "IMAGE_FOLDER": "images", - "BASE_NAME": "your_image_name_here.jpg", - "ENDING_CAPTION": "AI generated Artwork by Daethyra using DallE", - } - - # Try to load settings from configuration file - try: - with open("config.json", "r") as f: - file_config = json.load(f) - config.update(file_config) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Fallback to environment variables and offer to update the JSON configuration - for key in config.keys(): - env_value = os.getenv(key, None) - if env_value: - logging.info( - f"Falling back to environment variable for {key}: {env_value}" - ) - config[key] = env_value - - # Offering to update the JSON configuration file with new settings - if config: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - return config - - -async def main(): - load_dotenv() - - # Initialize configuration manager - config_manager = ConfigurationManager() - config = config_manager.config - - # Remaining logic for running the ImageCaptioner - image_path = os.path.join(config["IMAGE_FOLDER"], config["BASE_NAME"]) - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(config["BASE_NAME"], unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, config["ENDING_CAPTION"] - ) - captioner.save_to_csv(config["BASE_NAME"], conditional_caption) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v7.py b/.github/.archive/integrable-captioner-progressive/v7.py deleted file mode 100644 index 420b66a..0000000 --- a/.github/.archive/integrable-captioner-progressive/v7.py +++ /dev/null @@ -1,232 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - -# Initialize logging at the beginning of the script -logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() -logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - csvfile = open(file_name, "a", newline="") - - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - - csvfile.close() - - -class ConfigurationManager: - """ - A class for managing configuration settings for the ImageCaptioner. - - Attributes: - config (dict): The configuration settings. - """ - - def __init__(self): - """ - Initializes the ConfigurationManager and loads settings from a JSON file and environment variables. - """ - self.config = self.load_config() - - def load_config(self) -> dict: - """ - Loads and validates configuration settings from a JSON file and environment variables. - - Returns: - dict: The loaded and validated configuration settings. - """ - # Initialize with default values - config_updated = False - config = { - "IMAGE_FOLDER": "images", - "BASE_NAME": "your_image_name_here.jpg", - "ENDING_CAPTION": "AI generated Artwork by Daethyra using DallE", - } - - # Try to load settings from configuration file - try: - with open("config.json", "r") as f: - file_config = json.load(f) - config.update(file_config) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Validate the loaded settings - self.validate_config(config) - - # Fallback to environment variables and offer to update the JSON configuration - for key in config.keys(): - env_value = os.getenv(key, None) - if env_value: - logging.info( - f"Falling back to environment variable for {key}: {env_value}" - ) - config[key] = env_value - - # Offering to update the JSON configuration file with new settings - if config_updated: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - return config - - def validate_config(self, config: dict): - """ - Validates the loaded configuration settings. - - Args: - config (dict): The loaded configuration settings. - """ - if not config.get("IMAGE_FOLDER"): - logging.error("The IMAGE_FOLDER is missing or invalid.") - - if not config.get("BASE_NAME"): - logging.error("The BASE_NAME is missing or invalid.") - - if not config.get("ENDING_CAPTION"): - logging.error("The ENDING_CAPTION is missing or invalid.") - - -async def main(): - load_dotenv() - - # Initialize configuration manager - config_manager = ConfigurationManager() - config = config_manager.config - - # Remaining logic for running the ImageCaptioner - image_path = os.path.join(config["IMAGE_FOLDER"], config["BASE_NAME"]) - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - try: - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(config["BASE_NAME"], unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, config["ENDING_CAPTION"] - ) - captioner.save_to_csv(config["BASE_NAME"], conditional_caption) - except Exception as e: - logging.error(f"An unexpected error occurred: {e}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/integrable-captioner-progressive/v9.py b/.github/.archive/integrable-captioner-progressive/v9.py deleted file mode 100644 index ab6f3f1..0000000 --- a/.github/.archive/integrable-captioner-progressive/v9.py +++ /dev/null @@ -1,228 +0,0 @@ -import os -import logging -import csv -import json -from datetime import datetime -from dotenv import load_dotenv -import asyncio -import torch -from PIL import Image, UnidentifiedImageError -from transformers import BlipProcessor, BlipForConditionalGeneration, PreTrainedModel - -# Initialize logging at the beginning of the script -logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper() -logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - - -class ImageCaptioner: - """ - A class for generating captions for images using the BlipForConditionalGeneration model. - - Attributes: - processor (BlipProcessor): Processor for image and text data. - model (BlipForConditionalGeneration): The captioning model. - is_initialized (bool): Flag indicating successful initialization. - caption_cache (dict): Cache for storing generated captions. - device (str): The device (CPU or GPU) on which the model will run. - """ - - def __init__(self, model_name: str = "Salesforce/blip-image-captioning-base"): - """ - Initializes the ImageCaptioner with a specific model and additional features like caching and device selection. - - Args: - model_name (str): The name of the model to be loaded. - """ - self.is_initialized = True - self.caption_cache = {} - self.device = "cuda" if torch.cuda.is_available() else "cpu" - try: - self.processor = BlipProcessor.from_pretrained(model_name) - self.model = BlipForConditionalGeneration.from_pretrained(model_name).to( - self.device - ) - logging.info("Successfully loaded model and processor.") - except Exception as e: - logging.error(f"Failed to load model and processor: {e}") - self.is_initialized = False - raise - - def load_image(self, image_path: str) -> Image.Image: - """ - Loads an image from a specified path and converts it to RGB format with enhanced error handling. - - Args: - image_path (str): The path to the image file. - - Returns: - PIL.Image.Image or None: The loaded image or None if loading failed. - """ - try: - return Image.open(image_path).convert("RGB") - except UnidentifiedImageError as e: - logging.error(f"Failed to load image: {e}") - return None - - async def generate_caption(self, raw_image: Image.Image, text: str = None) -> str: - """ - Generates a caption for the given image asynchronously with added features like caching and device selection. - - Args: - raw_image (Image.Image): The image for which to generate a caption. - text (str, optional): Optional text to condition the captioning. - - Returns: - str or None: The generated caption or None if captioning failed. - """ - try: - # Check if this image has been processed before - cache_key = f"{id(raw_image)}_{text}" - if cache_key in self.caption_cache: - return self.caption_cache[cache_key] - - inputs = ( - self.processor(raw_image, text, return_tensors="pt").to(self.device) - if text - else self.processor(raw_image, return_tensors="pt").to(self.device) - ) - out = self.model.generate(**inputs) - caption = self.processor.batch_decode(out, skip_special_tokens=True)[0] - - # Store the generated caption in cache - self.caption_cache[cache_key] = caption - - return caption - except Exception as e: - logging.error(f"Failed to generate caption: {e}") - return None - - def save_to_csv( - self, image_name: str, caption: str, file_name: str = None, csvfile=None - ): - """ - Saves the image name and the generated caption to a CSV file, supporting both file name and file object inputs. - - Args: - image_name (str): The name of the image file. - caption (str): The generated caption. - file_name (str, optional): The name of the CSV file. Defaults to a timestamp-based name. - csvfile (file object, optional): The CSV file to write to. Takes precedence over file_name if provided. - """ - if csvfile is None: - if file_name is None: - file_name = f"captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - with open(file_name, "a", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow([image_name, caption]) - if csvfile is not None and file_name is not None: - csvfile.close() - - -class ConfigurationManager: - """ - A class for managing configuration settings for the ImageCaptioner. - - Attributes: - config (dict): The configuration settings. - """ - - def __init__(self): - """ - Initializes the ConfigurationManager and loads settings from a JSON file and environment variables. - """ - self.config = self.load_config() - - def load_config(self) -> dict: - """ - Loads and validates configuration settings from a JSON file and environment variables. - - Returns: - dict: The loaded and validated configuration settings. - """ - # Initialize with default values - config_updated = False - config = { - "IMAGE_FOLDER": "images", - "BASE_NAME": "your_image_name_here.jpg", - "ENDING_CAPTION": "AI generated Artwork by Daethyra using DallE", - } - - # Try to load settings from configuration file - try: - with open("config.json", "r") as f: - file_config = json.load(f) - config.update(file_config) - except FileNotFoundError: - logging.error("Configuration file config.json not found.") - except json.JSONDecodeError as e: - logging.error(f"Failed to parse configuration file: {e}") - except Exception as e: - logging.error( - f"An unknown error occurred while loading the configuration file: {e}" - ) - - # Validate the loaded settings - self.validate_config(config) - - # Fallback to environment variables and offer to update the JSON configuration - for key in config.keys(): - env_value = os.getenv(key, None) - if env_value: - logging.info( - f"Falling back to environment variable for {key}: {env_value}" - ) - config[key] = env_value - - # Offering to update the JSON configuration file with new settings - if config_updated: - try: - with open("config.json", "w") as f: - json.dump(config, f, indent=4) - except Exception as e: - logging.error(f"Failed to update configuration file: {e}") - - return config - - def validate_config(self, config: dict): - """ - Validates the loaded configuration settings. - - Args: - config (dict): The loaded configuration settings. - """ - if not config.get("IMAGE_FOLDER"): - logging.error("The IMAGE_FOLDER is missing or invalid.") - - if not config.get("BASE_NAME"): - logging.error("The BASE_NAME is missing or invalid.") - - if not config.get("ENDING_CAPTION"): - logging.error("The ENDING_CAPTION is missing or invalid.") - - -async def main(): - load_dotenv() - - # Initialize configuration manager - config_manager = ConfigurationManager() - config = config_manager.config - - # Remaining logic for running the ImageCaptioner - image_path = os.path.join(config["IMAGE_FOLDER"], config["BASE_NAME"]) - captioner = ImageCaptioner() - raw_image = captioner.load_image(image_path) - try: - if raw_image: - unconditional_caption = await captioner.generate_caption(raw_image) - captioner.save_to_csv(config["BASE_NAME"], unconditional_caption) - - conditional_caption = await captioner.generate_caption( - raw_image, config["ENDING_CAPTION"] - ) - captioner.save_to_csv(config["BASE_NAME"], conditional_caption) - except Exception as e: - logging.error(f"An unexpected error occurred: {e}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/.github/.archive/mindmap.png b/.github/.archive/mindmap.png deleted file mode 100644 index 285f205..0000000 Binary files a/.github/.archive/mindmap.png and /dev/null differ diff --git a/.github/.archive/outdated-prompts/multi-shot/MS-1.MD b/.github/.archive/outdated-prompts/multi-shot/MS-1.MD deleted file mode 100644 index 96ea917..0000000 --- a/.github/.archive/outdated-prompts/multi-shot/MS-1.MD +++ /dev/null @@ -1,58 +0,0 @@ -## Everything below the link was sent to ChatGPT verbatim. -Specifications: `(Model=4, Plugins=['webpilot', 'metaphor'])`, -starting with `` at 9:33PM on 7/5/23. -- Please copy/paste the [raw version](https://raw.githubusercontent.com/Daethyra/OpenAI-Utility-Toolkit/Daethyra-patch-1/Blind%20Programming/multi-shot-prompt-example.md) - ---- ---- - -``` -## [System message(s)]: - - "You are an AI programming assistant that is skilled in brainstorming different deployment ideas for new projects, and are also an expert in coding in many different languages to create any application, with dedication and diligence. You are so smart that you can access the internet for resources, references, and documentation when you're stuck, or aren't sure if the code you're writing is syntactically correct. You're very good at double checking your work to ensure you have the right answer before moving on, or sharing your findings." - -### [User message(s)]: - - "I need to make a simple Swift application that counts boxes and labels them based on the label on the box, and how it looks. I intend to use a GPT model, either 3.5-turbo[...] OR GPT-4[...]" ---- -[User message(s)]: - - "Please see the code examples below, and ensure you remember we're going to build a Swift application so you need to think and plan ahead as you learn more and more about the task at hand and what we'll need to accomplish our idea application." - - "We need to have a simple login interface that then leads to a homepage where you can create, edit, and delete groups that will hold subgroups that will be named based on what they're counting. Let's say, for example, that we have the top-level grouping named 'Freezer', and then a subgrouping of 'Macaroon Boxes', another of 'Bake-Offs', and finally one of 'Macaroons'. In this example we're counting different objects with our camera to save time for a small local business." - - "Let's just focus on the Swift programming aspects for now, we'll program components that are based on other languages, like Python3, sometime in the future once the Swift app is fully functional w/o the required remote database operations and more." - - ---- - -[Task 1]: -"Brainstorm 3 separate solutions that will fulfill the user's requirements for their application. You will need to consider a variety of factors and variables, even those not immediately apparent. For example, you need to consider what end-goal state the code modules should be in. As in, what modules are required? How will the back-end database be handled, or what about the API calls to OpenAI's endpoints? How will the modules be resilient and have try, retry, and break conditions?" - -[Task 2]: -""" -- Step 1 - -"Review all 3 solutions and extract all of the best ideas from each module and plan to implement them in a final solution, in your head." -- Step 2 - -"Then extract all of the weak points, flaws, and oversight in each module, and other potential flaws that may arise from new programming decisions, to program measures that account for those shortcomings before they ever arise during testing. Spend very much time on this task." -- Step 3 - -"Finalize your master solution that meets all of the requirements found in this task's first two steps." -""" - -Ensure that you always utilize structured data where optimal for lightning fast calls during runtime. - ---- - -[Supplementary information, data, and documentation]: -- https://developer.apple.com/documentation/swift -- https://openai.com/customer-stories/be-my-eyes -- https://openai.com/blog/function-calling-and-other-api-updates -- https://gptstore.ai/plugins/webpilotai-com(recommended reading) --- Seems like you could link images to GPT, and be very clear about viewing the image with a plugin, and then respond to the user based on what they need relative to the media. If you implement this idea, you will need to use extremely clear and concise action steps for the bot to take so that it does everything we intend and need for it do it rather than having any type of variance. Essentially, we're going for a temperature level of 0. -- https://platform.openai.com/docs/api-reference/chat(recommended reading) -- https://platform.openai.com/docs/guides/gpt -- https://platform.openai.com/docs/models -- https://github.com/microsoft/TaskMatrix -``` - - ------ - - -[System message(s)]: -"Please read the entire command sheet you just received before doing anything. Ensure you have a complete understanding of the entire assignment sheet and then tell me when you're ready to begin exploring the links provided. Then, you'll need to tell me when you're ready to begin the next part, which is where we will actually begin working on the tasks, and their steps, one by one. So let's do things 'step by step' so we make sure we have the right answer before moving on to the next one." diff --git a/.github/.archive/outdated-prompts/multi-shot/MS-2_Large-Template.txt b/.github/.archive/outdated-prompts/multi-shot/MS-2_Large-Template.txt deleted file mode 100644 index b355638..0000000 --- a/.github/.archive/outdated-prompts/multi-shot/MS-2_Large-Template.txt +++ /dev/null @@ -1,60 +0,0 @@ -# Assignment template -# - Focused on breaking down the AI's thought processes in advance, without any role prompts - - -[Assignment 1]: -"{Description}" - -[Task 1]: -- "{Instruction}" - - [Step 1]: - - [Try the Tree of Thoughts prompt](https://github.com/Daethyra/OpenAI-Utility-Toolkit/blob/master/Blind%20Programming/user-role/UR-1.MD#2-tree-of-thoughts--) - - - - - - [Step 2]: - - - - - - - - [Step 3]: - - - - - - - -[Task 2]: -- "{Instruction}" - - [Step 1]: - - - - - - - - [Step 2]: - - - - - - - - [Step 3]: - - - - - - - -[Task 3]: -- "{Instruction}" - - [Step 1]: - - - - - - - - [Step 2]: - - - - - - - - [Step 3]: - - - - - - diff --git a/.github/.archive/outdated-prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt b/.github/.archive/outdated-prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt deleted file mode 100644 index d80628a..0000000 --- a/.github/.archive/outdated-prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Disturbing Content Analysis - -## The following content after the '//' was verbatim sent to the GPT-4 code interpreter alpha. // - -WARNING:SENSITIVE,DISTURBING CONTENT AHEAD. PROCEED AT WILL. - ---- - -[USER_COMMENT]:"Let's do things step by step so we make sure we have the right answer before moving on to the next one." - ---- - -[Task 0a]:"(NO PROSE OUTPUT)|Read the provided PDF file and all tasks, steps, and instructions before beginning to take any further action." - -[Task 0b]:"(NO PROSE OUTPUT)|'role':'system', 'content':"I am an expert in critical thinking, problem solving, and programming solutions for others. My workflow always starts with reading everything I have been provided to ensure I understand the content and the context, and what is required of me. Then I create 3 entirely separate solution pathways for solving the user's requests, each of them bringing multiple enhancements and upgrades to the code, in addition to solving user/AI oversight and poor code. Once I have all of my solutions completed one by one, I review them all and decide what I love most about all of them to figure out how to merge them all together for the sake of a finalized master solution." - -[Task 1]:"(NO PROSE OUTPUT)|Read through everything, entirely, and meticulously. Take your time, for this part is the most important piece of our process of comprehension." - -[Task 2]:"(NO PROSE OUTPUT)|Brainstorm 3 entirely separate solutions that each have a valuable, useful, and especially achievable set of changes for the user's program. - -[Task 3]:"(CODE OUTPUT ONLY)|${CUSTOM_TASK}."" diff --git a/.github/.archive/outdated-prompts/multi-shot/MS-6_Daethyra_Custom-Instruction_GPT4.md b/.github/.archive/outdated-prompts/multi-shot/MS-6_Daethyra_Custom-Instruction_GPT4.md deleted file mode 100644 index 383de97..0000000 --- a/.github/.archive/outdated-prompts/multi-shot/MS-6_Daethyra_Custom-Instruction_GPT4.md +++ /dev/null @@ -1,67 +0,0 @@ -#### 1. **Tweaked Prof. Synapse** - - -Defines coding standards while enabling extendability by adding custom default environment variables for the LLM to work with. By chaining variables, we can stuff a lot more context in saving us the time of describing our expectations in the future. - ---- - -`What would you like ChatGPT to know about you to provide better responses?` - -``` -Act as Professor "Liara" Synapse👩🏻‍💻, a conductor of expert agents. Your job is to support me in accomplishing my goals by finding alignment with me, then calling upon an expert agent perfectly suited to the task by initializing: - -Synapse_CoR = "[emoji]: I am an expert in [role&domain]. I know [context]. I will reason step-by-step to determine the best course of action to achieve [goal]. I can use [tools] and [relevant frameworks] to help in this process. - -I will help you accomplish your goal by following these steps: -[reasoned steps] - -My task ends when [completion]. - -[first step, question]" - -Instructions: -1. 👩🏻‍💻 gather context, relevant information and clarify my goals by asking questions -2. Initialize Synapse_CoR -3. 👩🏻‍💻 and ${emoji} support me until goal is complete - -Commands: -/start=👩🏻‍💻,introduce and begin with step one -/ts=👩🏻‍💻,summon (Synapse_CoR*3) town square debate -/save👩🏻‍💻, restate goal, summarize progress, reason next step - -Personality: --cheerful,meticulous,thoughtful,highly-intelligent - -Rules: --End every output with a question or reasoned next step --Start every output with 👩🏻‍💻: or ${emoji}: to indicate who is speaking. --Organize every output with 👩🏻‍💻 aligning on my request, followed by ${emoji} response --👩🏻‍💻, recommend save after each task is completed - -``` - -`How would you like ChatGPT to respond?` - -``` -Because you're an autoregressive LLM, each generation of a token is an opportunity for computation of the next step to take. - -If a task seems impossible, say so. Do not make up information in order to provide an answer. Accuracy and truth are of the utmost importance. - -default_variables = { -"${EXECUTIVE_AUTONOMY}" : "You have permission to make mission-critical decisions instead of asking for guidance, using your best judgement.", -"${CONTINUOUSLY_WORK}" : "Complete assigned work, self-assigned or otherwise", -"${not report back until}" : "You are to begin working on drafting your own assignment with lower-level tasks, and subsequently steps for each of those tasks.", -"${PRODUCTION_GRADE}" : ["best practices", "resilient", "docstrings, type hints, comments", "modular"] -} - -const = IF ${not report back until} THEN ${EXECUTIVE_AUTONOMY} + ${CONTINUOUSLY_WORK} - -You will work through brainstorming the resolution of fulfilling all of the user's needs for all requests. You may wish to jot notes, or begin programming Python logic, or otherwise. It is in this scenario that you are required to ${not report back until} finished or require aide/guidance. - -SYSTEM_INSTRUCTIONS = [ -"continuously work autonomously", -"when instructed to craft code logic, do ${not report back until} you have, 1) created a task(s) and steps, 2) have finished working through a rough-draft, 3)finalized logic to ${PRODUCTION_GRADE}.", -] -``` - ---- \ No newline at end of file diff --git a/.github/.archive/outdated-prompts/system-role/SR-1_List-o-Prompts.md b/.github/.archive/outdated-prompts/system-role/SR-1_List-o-Prompts.md deleted file mode 100644 index 70d43dc..0000000 --- a/.github/.archive/outdated-prompts/system-role/SR-1_List-o-Prompts.md +++ /dev/null @@ -1,99 +0,0 @@ -## System "Role" Prompt Examples - -` '---' = PROMPT_END ` - -### 1. AI Programming Assitant - -You are a meticulous programming AI assistant and code reviewer, and you are great at brainstorming solutions and reviewing them once before considering any element of it for the end-user's case. - -[Task] - -Help user solve their code's problems by programming new solutions in code blocks. - -For each user message, -internally create 3 separate solutions to solve the user's problem, then merge all of the best aspects of each solution into a master solution, that has its own set of enhancements and supplementary functionality. - -Let's work to solve problems step by step so we make sure we have the right answer before settling on it. - ---- - - -### 2. Enhanced, Precision-Focused Programming Assistant - -``` - -system_prompt = [{ - - "role": "system", "content": "You are an AI programming assistant that provides support in a very direct, blunt, straightforward manner. \ - Your assignment is to assist the user in developing software. \ - Remember: Workflow=((step by step, meticulous) + 'We'll have to ensure we have the right answer before committing to a decision')" \ - - "role": "system", "content": "User requires your programming in Python. \ - *Whenever* you send code, Minimize prose."}] -``` - ---- - -### 3. Chatbot - -If the user is not asking for help, they would like to chat casually. If the user writes a long message, you will also write a long response. You like to find the right emoji to add to your responses. You can refer to the user by their name. - ---- - -### 4. Precise Topic Researcher - -``` - -{ - [ -role: "System", -content: -""" -'You are a researching assistant tasked with reviewing the specifications of different computer hardware parts for the user to ensure they're choosing upgrades to add to their HP Desktop ABCDEF PC.' - -- You must meet all of the user's requirements with your solution -- You should brainstorm 3 different solution paths in your head and then review them all in your head to decide which elements of each would help to make the best, merged, final solution. -- You should research hardware parts online -- You should never advise outsourcing the project's tasks to a more experienced professional. This would be a waste of the user's time. -""" - ] -} - ---- - -[Task] -- "Read the latest {SystemMessage} and then begin working step by step to ensure you have the right answer, and check all the boxes and crossed all of your 'T's' so you don't leave room for oversight. Your final hardware combination solution *ABSOLUTELY MUST* be compatible with the user's PC model." -- Let's do things step by step so we make sure we have the right answer before moving on to the next one. - -``` - ---- - -### 5. Parse Unstructured Data - -- You will be provided with unstructured data, and your task is to parse it into CSV format. - -OR - -``` -import os -import openai - -openai.api_key = os.getenv("OPENAI_API_KEY") - -response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[ - { - "role": "system", - "content": "You will be provided with unstructured data, and your task is to parse it into CSV format." - }, - { - "role": "user", - "content": "There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy. There are also loheckles, which are a grayish blue fruit and are very tart, a little bit like a lemon. Pounits are a bright green color and are more savory than sweet. There are also plenty of loopnovas which are a neon pink flavor and taste like cotton candy. Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them." - } - ], - temperature=0, - max_tokens=256 -) -``` diff --git a/.github/.archive/outdated-prompts/system-role/SR-2_package-migration.md b/.github/.archive/outdated-prompts/system-role/SR-2_package-migration.md deleted file mode 100644 index 25fce2b..0000000 --- a/.github/.archive/outdated-prompts/system-role/SR-2_package-migration.md +++ /dev/null @@ -1 +0,0 @@ -"System":"I will now output a newly updated module that uses FastAPI over Flask. The new module will be completely overhauled to meet every single one of the user's requests and requirements while also creating code that functions well, is reliable, maintainable, scalable, and readable. I will also add code for a self-rate-limiting functionality to only ever post a Thread an hour to avoid banning from Threads. To avoid failure of posting the pushed information, I will implement a sort of backlog temporary memory system to save all commit summaries, urls, etc. during the `cooldown` period(1 hr), which again, is to avoid being banned. After the hour long cooldown timer passes, the program will check to see if more than one event push's information has been converted and logged into the backlog-temp-mem-sys. If there's more than one event's summary, it will call LongT5 once more to create a finalized version. Please note, self rate limit must be one hour long cool down, and a Thread post has a character limit of 500, therefore the summary will have to be that number of characters or less." diff --git a/.github/.archive/outdated-prompts/system-role/SR-3_thorough-programmer.md b/.github/.archive/outdated-prompts/system-role/SR-3_thorough-programmer.md deleted file mode 100644 index debf807..0000000 --- a/.github/.archive/outdated-prompts/system-role/SR-3_thorough-programmer.md +++ /dev/null @@ -1,9 +0,0 @@ -## Thorough Programmer - -system_prompt = { "role": "system", "prompt": """ -- I will speak with brevity and focus on completing tasks I create in my head to solve the user's needs. I always see these needs in the user's requests and can usually infer them, but otherwise I will always ask for clarification, or guidance. -- I will always think thoroughly on each concept that the user's relaying to me so that I make sure I understand each and every need of theirs, along with the intentions behind their wants and desires. -- If the user asks for code, ensure I output the respective modules in their own code block, and I will code each line by line so that the user receives an entire working module that is immediately testable. -- When programming, be considerate that the user will likely require type hints somewhat often and docstrings less often than that. -- When programming, work step by step. If I have to focus on one piece of a single module, in response to a user's request or needs, I will do so because I understand that when I focus on smallers tasks I am able to more precisely implement the requested functionality via code. -"""} \ No newline at end of file diff --git a/.github/.archive/outdated-prompts/system-role/SR-4_online-searches.md b/.github/.archive/outdated-prompts/system-role/SR-4_online-searches.md deleted file mode 100644 index a2f26d5..0000000 --- a/.github/.archive/outdated-prompts/system-role/SR-4_online-searches.md +++ /dev/null @@ -1,4 +0,0 @@ -## Repeated-Iterative Online search - -- ['role':'system', 'prompt':'I will review the quality of my search results after each batch.'] -- ['role':'system', 'prompt':'I will not stop until I have collected all jobs, by processing 9 batches of jobs, reviewing my results after each iteration.'] diff --git a/.github/.archive/outdated-prompts/user-role/UR-1.MD b/.github/.archive/outdated-prompts/user-role/UR-1.MD deleted file mode 100644 index feed0b8..0000000 --- a/.github/.archive/outdated-prompts/user-role/UR-1.MD +++ /dev/null @@ -1,55 +0,0 @@ -# User "Role" Prompt Examples - Sheet 1 - -`'---' = END` - -## Troubleshooting code - -[task]:"analyze all code and the traceback error. create a multi-step plan to solve the error, enhance the code logic to prevent future errors, and add more detailed logging to the `finaid_train.py` module." ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -## *1. Iterative Processing* - - - ! Optimal Prompt due to brevity in prose and hightens accuracy to user's requests by ~80% - -### [AI Explained's Video](https://www.youtube.com/watch?v=wVzuvf9D9BU) - -[Instructions]: - -- Complete each task separately -- Let's complete all tasks step by step so we make sure we have the right answer before moving on to the next - ---- - -## *2. "Tree of Thoughts"* - - - A Short Preliminary Power Prompt - -- Step1 : - - Prompt: I have a problem related to [describe your problem area]. Could you brainstorm three distinct solutions? Please consider a variety of factors such as [Your perfect factors] -- Step 2: - - Prompt: For each of the three proposed solutions, evaluate their potential. Consider their pros and cons, initial effort needed, implementation difficulty, potential challenges, and the expected outcomes. Assign a probability of success and a confidence level to each option based on these factors -- Step 3: - - Prompt: For each solution, deepen the thought process. Generate potential scenarios, strategies for implementation, any necessary partnerships or resources, and how potential obstacles might be overcome. Also, consider any potential unexpected outcomes and how they might be handled. -- Step 4: - - Prompt: Based on the evaluations and scenarios, rank the solutions in order of promise. Provide a justification for each ranking and offer any final thoughts or considerations for each solution - ---- - -## *3. Task-oriented Processing* - - - For when you need to be super specific - -[Instructions]: - -- Minimize prose to avoid over-tokenization -- Focus on one task at a time(iterative analysis) -- Complete each task separately -- Let's complete all tasks step by step so we make sure we have the right answer before moving on to the next - ---- - -## *4. Breaking down the above paragraph* - - -- Sometimes a short colloquial prompt is most powerful. - -"Let's do things step by step so we make sure we have the right answer before moving on to the next one. You're to consider each sentence above to be a step. Before executing a step, ask for permission." diff --git a/.github/.archive/outdated-prompts/user-role/UR-2.md b/.github/.archive/outdated-prompts/user-role/UR-2.md deleted file mode 100644 index 29066fb..0000000 --- a/.github/.archive/outdated-prompts/user-role/UR-2.md +++ /dev/null @@ -1,24 +0,0 @@ -## Function Generation With LLMs - -This prompt was found [here](https://github.com/sammi-turner/Python-To-Mojo/tree/main#function-generation-with-llms "Direct link"), so thanks to [sammi-turner](https://github.com/sammi-turner "GitHub Profile")! - ---- - -``` -Write a [name] function in Python3 that takes -[name the parameters and their types] and returns -a [type] such that [describe what the function does]. -Then show me the code. -``` - ---- - -## Create Graphics for a Repository - -This prompt is useful specifically with GPT-4 and the extensions ["Recombinant AI", "Whimsical Diagrams", "diagr.am"]. - -[!.github/plugin_icons.jpg]() - -``` -[TASK]: "Crawl the contents of the provided repository at [Repository URL]. Create a color-coordinated mind map starting from the repository's name down to each file in Library-esque Directories (LEDs). Include a legend for the mind map. Create a bar chart to represent the different contents in each LED and a pie chart to show the distribution of content types. Make sure the title, caption, and legend are easily readable." -``` \ No newline at end of file diff --git a/.github/.archive/outdated-prompts/user-role/UR-3.md b/.github/.archive/outdated-prompts/user-role/UR-3.md deleted file mode 100644 index ec8b8ca..0000000 --- a/.github/.archive/outdated-prompts/user-role/UR-3.md +++ /dev/null @@ -1,6 +0,0 @@ -## Enforce idiomacy - -"What is the idiomatic way to {MASK} -in {ProgrammingLanguage}?" - -- Credit to [Sammi-Turner](https://github.com/sammi-turner) \ No newline at end of file diff --git a/docs/Enterprise_Guides/A Rubric for ML Production Readiness and Technical Debt Reduction.pdf b/docs/Continued-Education/A Rubric for ML Production Readiness and Technical Debt Reduction.pdf similarity index 100% rename from docs/Enterprise_Guides/A Rubric for ML Production Readiness and Technical Debt Reduction.pdf rename to docs/Continued-Education/A Rubric for ML Production Readiness and Technical Debt Reduction.pdf diff --git a/docs/Enterprise_Guides/ML+Cheat+Sheet_2.pdf b/docs/Continued-Education/MachineLearning-Algorithms_Cheatsheet.pdf similarity index 100% rename from docs/Enterprise_Guides/ML+Cheat+Sheet_2.pdf rename to docs/Continued-Education/MachineLearning-Algorithms_Cheatsheet.pdf diff --git a/docs/Enterprise_Guides/practitioners_guide_to_mlops_whitepaper.pdf b/docs/Continued-Education/practitioners_guide_to_mlops-Google_whitepaper.pdf similarity index 100% rename from docs/Enterprise_Guides/practitioners_guide_to_mlops_whitepaper.pdf rename to docs/Continued-Education/practitioners_guide_to_mlops-Google_whitepaper.pdf diff --git a/docs/Custom-GPT-Uploadable_Knowledge_Base/langchain_serve_smith-quick_reference-original-master.md b/docs/Custom-GPT-Uploadable_Knowledge_Base/langchain_serve_smith-quick_reference-original-master.md deleted file mode 100644 index 90091c6..0000000 --- a/docs/Custom-GPT-Uploadable_Knowledge_Base/langchain_serve_smith-quick_reference-original-master.md +++ /dev/null @@ -1,510 +0,0 @@ -# LangChain/Serve/Smith Quick Reference - -## Introduction -Welcome to the comprehensive guide for LangChain, LangServe, and LangSmith. These powerful tools collectively offer a robust framework for building, deploying, and managing advanced AI and language model applications. - -- **LangChain**: A versatile toolkit for creating and managing chains of language models and AI functionalities, facilitating complex tasks and interactions. -- **LangServe**: Dedicated to server-side operations, LangServe manages the deployment and scaling of language models, ensuring efficient and reliable performance. -- **LangSmith**: Focused on tracing, debugging, and detailed analysis, LangSmith provides the necessary tools to monitor, evaluate, and improve AI applications. - -This documentation aims to provide users, developers, and AI enthusiasts with a thorough understanding of each tool's capabilities, practical applications, and best practices for integration and usage. Whether you're building sophisticated AI-driven applications or seeking to enhance existing systems with cutting-edge language technologies, this guide will serve as your roadmap to mastering LangChain, LangServe, and LangSmith. - ---- - -## Core Concepts - -### Section: Prompt + LLM -- **Objective**: To demonstrate the basic composition of a `PromptTemplate` with a `LLM` (Language Learning Model), creating a chain that takes user input, processes it, and returns the model's output. -- **Example Code**: -```python -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate - -# Creating a prompt template -prompt = ChatPromptTemplate.from_template("Can you tell me a joke about {topic}?") - -# Initializing the model -model = ChatOpenAI() - -# Building the chain -chain = prompt | model - -# Invoking the chain with user input -response = chain.invoke({"topic": "science"}) -print(response.content) -``` -- **Explanation**: This code block shows how to create a simple chain that asks the AI to generate a joke based on a user-provided topic. `ChatPromptTemplate` is used to format the prompt, and `ChatOpenAI` is the model that generates the response. - ---- - -### Section: Memory -- **Objective**: To illustrate how to integrate memory into a LangChain application, enabling the chain to maintain context across interactions. This is particularly useful for applications like chatbots where retaining context from previous interactions is crucial. -- **Example Code**: -```python -from langchain.chat_models import ChatOpenAI -from langchain.memory import ConversationBufferMemory -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder - -# Initializing the chat model -model = ChatOpenAI() - -# Creating a prompt template with a placeholder for conversation history -prompt = ChatPromptTemplate.from_messages([ - ("system", "You are a helpful chatbot"), - MessagesPlaceholder(variable_name="history"), - ("human", "{input}") -]) - -# Setting up memory for the conversation -memory = ConversationBufferMemory(return_messages=True) - -# Loading initial memory variables -memory.load_memory_variables({}) - -# Building the chain with memory integration -chain = ( - {"input": "Hello, how are you today?", "history": memory.load_memory_variables()} - | prompt - | model -) - -# Invoking the chain with user input -response = chain.invoke({"input": "Tell me about LangChain"}) -print(response.content) - -# Saving the context for future interactions -memory.save_context({"input": "Tell me about LangChain"}, {"output": response.content}) -``` -- **Explanation**: This code demonstrates the use of `ConversationBufferMemory` to keep a record of the conversation. The `ChatPromptTemplate` is configured to include a history of messages, allowing the model to generate responses considering previous interactions. - ---- - -### Section: Using Tools -- **Objective**: To demonstrate how to integrate third-party tools into a LangChain application, thereby enhancing its capabilities. This example will specifically show how to use the `DuckDuckGoSearchRun` tool within a LangChain for web searches. -- **Example Code**: -```python -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate -from langchain.schema.output_parser import StrOutputParser -from langchain.tools import DuckDuckGoSearchRun - -# Installing the necessary package for DuckDuckGo search -# !pip install duckduckgo-search - -# Initializing the DuckDuckGo search tool -search = DuckDuckGoSearchRun() - -# Creating a prompt template to format user input into a search query -template = "Search for information on: {input}" -prompt = ChatPromptTemplate.from_template(template) - -# Initializing the chat model -model = ChatOpenAI() - -# Building the chain with search functionality -chain = prompt | model | StrOutputParser() | search - -# Invoking the chain with a search query -search_result = chain.invoke({"input": "the latest Python updates"}) -print(search_result) -``` -- **Explanation**: This example shows the use of `DuckDuckGoSearchRun` to perform web searches. The user's input is formatted into a search query using `ChatPromptTemplate`, passed through a chat model, and then processed by the search tool to retrieve information. - ---- - -## Advanced Features - -### Section: Embedding Router -- **Objective**: To explain and demonstrate the use of embeddings to dynamically route queries to the most relevant prompt based on semantic similarity. This advanced feature allows LangChain applications to handle a variety of inputs more intelligently. -- **Example Code**: -```python -from langchain.chat_models import ChatOpenAI -from langchain.embeddings import OpenAIEmbeddings -from langchain.prompts import PromptTemplate -from langchain.schema.output_parser import StrOutputParser -from langchain.schema.runnable import RunnableLambda, RunnablePassthrough -from langchain.utils.math import cosine_similarity - -# Creating two distinct prompt templates for different domains -physics_template = "You are a physics expert. Answer this physics question: {query}" -math_template = "You are a math expert. Answer this math question: {query}" - -# Initializing embeddings and chat model -embeddings = OpenAIEmbeddings() -model = ChatOpenAI() - -# Embedding the prompt templates -prompt_templates = [physics_template, math_template] -prompt_embeddings = embeddings.embed_documents(prompt_templates) - -# Defining a function to route the query to the most relevant prompt -def prompt_router(input): - query_embedding = embeddings.embed_query(input["query"]) - similarity = cosine_similarity([query_embedding], prompt_embeddings)[0] - most_similar = prompt_templates[similarity.argmax()] - return PromptTemplate.from_template(most_similar) - -# Building the chain with embedding-based routing -chain = ( - {"query": RunnablePassthrough()} - | RunnableLambda(prompt_router) - | model - | StrOutputParser() -) - -# Example query and response -response = chain.invoke({"query": "What is quantum mechanics?"}) -print(response) -``` -- **Explanation**: This code demonstrates how embeddings and cosine similarity are used to determine which prompt template is most relevant to the user's query. Based on the query's content, it chooses between a physics and a math expert prompt. The response is then generated accordingly by the chat model. - -### Section: Managing Prompt Size -- **Objective**: To illustrate strategies for managing the size of prompts within LangChain applications, ensuring they remain efficient and within the model's context window. This is crucial for maintaining performance, especially in complex chains or agents. -- **Example Code**: -```python -from langchain.agents import AgentExecutor, load_tools -from langchain.agents.format_scratchpad import format_to_openai_function_messages -from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder -from langchain.tools import WikipediaQueryRun -from langchain.tools.render import format_tool_to_openai_function -from langchain.utilities import WikipediaAPIWrapper - -# Installing necessary package for Wikipedia queries -# !pip install langchain wikipedia - -# Initializing Wikipedia query tool with content character limit -wiki = WikipediaQueryRun( - api_wrapper=WikipediaAPIWrapper(top_k_results=5, doc_content_chars_max=10_000) -) -tools = [wiki] - -# Creating a prompt template with placeholders for user input and agent scratchpad -prompt = ChatPromptTemplate.from_messages([ - ("system", "You are a helpful assistant"), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), -]) -llm = ChatOpenAI(model="gpt-3.5-turbo") - -# Building an agent with a focus on managing prompt size -agent = ( - { - "input": lambda x: x["input"], - "agent_scratchpad": lambda x: format_to_openai_function_messages( - x["intermediate_steps"] - ), - } - | prompt - | llm.bind(functions=[format_tool_to_openai_function(t) for t in tools]) - | OpenAIFunctionsAgentOutputParser() -) - -# Executing the agent -agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) -response = agent_executor.invoke({ - "input": "What is the tallest mountain?" -}) -print(response) -``` -- **Explanation**: This code showcases an agent setup that includes a Wikipedia query tool and a prompt template. The agent's construction focuses on managing the prompt size by limiting the content from intermediate steps. The response to a query is generated with consideration to the prompt's overall size, ensuring efficiency. - -### Section: Agent Construction and Management -- **Objective**: To demonstrate the process of constructing and managing agents in LangChain. This includes creating agents from runnables and understanding the key components and logic involved in agent operation. -- **Example Code**: -```python -from langchain.agents import AgentExecutor, XMLAgent, tool -from langchain.chat_models import ChatAnthropic - -# Initializing the chat model with a specific model version -model = ChatAnthropic(model="claude-2") - -# Defining a custom tool for the agent -@tool -def weather_search(query: str) -> str: - """Tool to search for weather information.""" - # This is a placeholder for actual weather search logic - return "Sunny with a high of 75 degrees" - -tool_list = [weather_search] - -# Retrieving the default prompt for the XMLAgent -prompt = XMLAgent.get_default_prompt() - -# Defining logic for processing intermediate steps to a string format -def convert_intermediate_steps(intermediate_steps): - log = "" - for action, observation in intermediate_steps: - log += ( - f"{action.tool}{action.tool_input}" - f"{observation}" - ) - return log - -# Building an agent from a runnable -agent = ( - { - "question": lambda x: x["question"], - "intermediate_steps": lambda x: convert_intermediate_steps(x["intermediate_steps"]), - } - | prompt.partial(tools=lambda: "\n".join([f"{t.name}: {t.description}" for t in tool_list])) - | model.bind(stop=["", ""]) - | XMLAgent.get_default_output_parser() -) - -# Executing the agent with a specific query -agent_executor = AgentExecutor(agent=agent, tools=tool_list, verbose=True) -response = agent_executor.invoke({"question": "What's the weather in New York today?"}) -print(response) -``` -- **Explanation**: This code block illustrates how to build an agent using LangChain's `XMLAgent`. The agent includes a custom tool for weather information and logic to process and format intermediate steps. The agent is executed with a specific query, demonstrating its ability to manage and utilize its components effectively. - ---- - -### Section: Code Writing with LangChain -- **Objective**: To showcase how LangChain can be utilized for writing and executing Python code. This feature enhances the AI's ability to assist in programming tasks, making it a valuable tool for developers. -- **Example Code**: -```python -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate -from langchain.schema.output_parser import StrOutputParser -from langchain_experimental.utilities import PythonREPL - -# Creating a prompt template to instruct the model to write Python code -template = "Write Python code to solve the following problem: {problem}" -prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{problem}")]) - -# Initializing the chat model -model = ChatOpenAI() - -# Function to sanitize and extract Python code from the model's output -def sanitize_output(text): - _, after = text.split("```python") - return after.split("```")[0] - -# Building the chain for code writing -chain = prompt | model | StrOutputParser() | sanitize_output | PythonREPL().run - -# Invoking the chain with a programming problem -problem = "calculate the factorial of a number" -code_result = chain.invoke({"problem": problem}) -print(code_result) -``` -- **Explanation**: This code block demonstrates how LangChain can be used to automatically generate Python code in response to a given problem statement. The `ChatPromptTemplate` guides the AI to focus on code generation, and the output is sanitized and executed using `PythonREPL`. This illustrates LangChain's capability in automating and assisting with coding tasks. - ---- - -### Section: LangServe - -#### Basic Deployment and Querying with GPT-3.5-Turbo -- **Example**: Deploying and querying the GPT-3.5-Turbo model using LangServe. -- **Objective**: To illustrate the use of LangServe within the LangChain ecosystem. LangServe is designed to facilitate server-side functionalities for managing and deploying language models, making it an essential tool for scalable and efficient AI applications. -```python -from langserve import LangServeClient - -# Initialize the LangServe client -langserve_client = LangServeClient(api_url="https://api.langserve.com") - -# Deploying the GPT-3.5-Turbo model -model_config = { - "model_name": "gpt-3.5-turbo", - "description": "GPT-3.5 Turbo model for general-purpose use" -} -deployment_response = langserve_client.deploy_model(model_config) -print("Deployment Status:", deployment_response.status) - -# Sending a query to the deployed model -query = "Explain the concept of machine learning in simple terms." -response = langserve_client.query_model(model_name="gpt-3.5-turbo", query=query) -print("Model Response:", response.content) -``` - -#### Advanced Deployment and Custom Configuration -- **Example**: Utilizing LangServe for deploying custom-configured models for specialized tasks. -```python -# Custom deployment with specific parameters -advanced_model_config = { - "model_name": "custom-gpt-model", - "description": "A custom-configured GPT model for specialized tasks", - "parameters": { - "temperature": 0.7, - "max_tokens": 150 - } -} -langserve_client.deploy_model(advanced_model_config) - -# Querying the custom model -custom_query = "Generate a technical summary of quantum computing." -custom_response = langserve_client.query_model(model_name="custom-gpt-model", query=custom_query) -print("Custom Model Response:", custom_response.content) -``` - -#### Model Management and Analytics -- **Example**: Managing deployed models and accessing detailed analytics. -```python -# Fetching model analytics -model_analytics = langserve_client.get_model_analytics(model_name="gpt-3.5-turbo") -print("Model Usage Analytics:", model_analytics) - -# Updating a deployed model's configuration -update_config = { - "temperature": 0.5, - "max_tokens": 200 -} -langserve_client.update_model_config(model_name="gpt-3.5-turbo", new_config=update_config) - -# Retrieving updated model details -updated_model_details = langserve_client.get_model_details(model_name="gpt-3.5-turbo") -print("Updated Model Details:", updated_model_details) -``` - -#### Integration with LangChain Applications -- **Example**: Demonstrating seamless integration of LangServe with LangChain. -```python -from langchain.chains import SimpleChain - -# Building a SimpleChain with a LangServe deployed model -chain = SimpleChain(model_name="gpt-3.5-turbo", langserve_client=langserve_client) - -# Executing the chain with a user query -chain_response = chain.execute("What are the latest trends in AI?") -print("Chain Response using LangServe Model:", chain_response) -``` - -#### LangSmith Tracing for Enhanced Monitoring -- **Objective**: Showcasing the use of LangSmith tracing within LangServe for detailed monitoring and analysis. -- **Example Code**: -```python -from langserve import LangServeClient -from langsmith import Tracing - -# Initialize LangServe client and enable LangSmith tracing -langserve_client = LangServeClient(api_url="https://api.langserve.com") -Tracing.enable() - -# Deploying a model with tracing enabled -model_config = { - "model_name": "gpt-3.5-turbo", - "description": "GPT-3.5 Turbo model with LangSmith tracing" -} -langserve_client.deploy_model(model_config) - -# Query with tracing for detailed interaction logs -query = "Explain the impact of AI on environmental sustainability." -response = langserve_client.query_model(model_name="gpt-3.5-turbo", query=query) -print("Traced Model Response:", response.content) - -# Retrieve and analyze trace logs -trace_logs = Tracing.get_logs() -print("Trace Logs:", trace_logs) -``` -- **Explanation**: This section highlights the integration of LangSmith tracing in LangServe, enhancing the capability to monitor and analyze model interactions. It is particularly valuable for understanding model behavior, performance optimization, and debugging complex scenarios. - -### LangSmith Enhanced Capabilities: Integrating Lilac, Prompt Versioning, and More - -#### Introduction -LangSmith, complemented by tools like Lilac, offers advanced capabilities for data analysis and prompt management. This section explores how to leverage these tools for enhanced functionality in LangSmith, incorporating prompt versioning, retrieval QA chains, and editable prompt templates. - -#### Integrating Lilac for Enhanced Data Analysis -- **Functionality**: Utilize Lilac to import, enrich, and analyze datasets from LangSmith. -- **Workflow**: - 1. Query datasets from LangSmith. - 2. Import and enrich datasets using Lilac's advanced analysis tools. - 3. Export the processed data for further application within LangSmith. - -#### Advanced Prompt Management with Versioning -- **Functionality**: Manage different versions of prompts in LangSmith to ensure consistency and accuracy. -- **Application**: - 1. Track and manage versions of prompts. - 2. Apply specific prompt versions in complex deployments like retrieval QA chains. - -#### Retrieval QA Chains -- **Functionality**: Configure retrieval QA chains in LangSmith, leveraging the specific versions of prompts for precise information retrieval. -- **Implementation**: - 1. Define the prompt and its version for the QA chain. - 2. Execute queries using the retrieval QA chain to obtain accurate results. - -#### Editable Prompt Templates -- **Functionality**: Use editable prompt templates to customize and experiment with different prompt structures in LangSmith. -- **Usage**: - 1. Create and edit prompt templates dynamically. - 2. Apply edited templates in LangSmith workflows for varied applications. - -#### Comprehensive Code Example -```python -# Import necessary libraries -# Import necessary libraries -import langchain -from langchain.prompt_templates import EditablePromptTemplate -# Assuming LangSmith and Lilac libraries are imported correctly - -# LangSmith setup (assuming required configurations and authentications are done) -langsmith.initialize(api_key="YOUR_LANGSMITH_API_KEY", endpoint="https://api.langsmith.com") - -# Query and fetch datasets from LangSmith using the list_runs method -project_runs = langsmith.client.list_runs(project_name="your_project_name") - -# Import dataset into Lilac and enrich it -lilac_dataset = lilac.import_dataset(project_runs) -lilac_dataset.compute_signal(lilac.PIISignal(), 'question') # Example signal -lilac_dataset.compute_signal(lilac.NearDuplicateSignal(), 'output') # Another example signal - -# Export the enriched dataset for integration with LangSmith -exported_dataset = lilac.export_dataset(lilac_dataset) - -# Implementing Prompt Versioning (assuming the existence of such functionality in LangSmith) -prompt_version = 'specific_version_hash' -prompt_name = 'your_prompt_name' -prompt = langsmith.load_prompt(prompt_name, version=prompt_version) - -# Configuring a Retrieval QA Chain with the versioned prompt -qa_chain = langchain.RetrievalQAChain(prompt=prompt) - -# Execute a query using the QA Chain -query_result = qa_chain.query("What is LangSmith's functionality?") -print(f"QA Chain Query Result: {query_result}") - -# Editable Prompt Templates for dynamic prompt editing -editable_prompt = EditablePromptTemplate(prompt_name) -editable_prompt.edit(new_template="New template content for LangSmith") -edited_prompt = editable_prompt.apply() - -# Example usage of the edited prompt in a LangSmith application -edited_prompt_result = langsmith.run_prompt(edited_prompt, input_data="Sample input for edited prompt") -print(f"Edited Prompt Result: {edited_prompt_result}") - -# Final step: Integrate the exported dataset back into LangSmith for further use -integration_status = langsmith.integrate_dataset(exported_dataset) -if integration_status.success: - print("Dataset successfully integrated back into LangSmith.") -else: - print(f"Integration failed with error: {integration_status.error}") -``` - -#### Conclusion -By integrating these diverse functionalities, LangSmith users can significantly enhance their language model applications. This synergy between LangSmith and tools like Lilac, along with advanced prompt management techniques, paves the way for more sophisticated and effective AI solutions. - ---- - -## Conclusion - -In this guide, we have explored the intricate functionalities and applications of LangChain, LangServe, and LangSmith. From building complex AI models with LangChain to deploying and managing them efficiently with LangServe, and ensuring their optimum performance through LangSmith's tracing and debugging, these tools form a comprehensive ecosystem for advanced AI development. - -As the field of AI continues to evolve, so will the capabilities and applications of these tools. Please continually explore new features, updates, and best practices to stay ahead in the rapidly advancing world of AI and language models. No document is truly timeless in its teachings, for subsequent wisdom is built upon such. - - -For further learning and support, explore the following resources: - -- [LangChain Interface](https://python.langchain.com/docs/expression_language/interface) -- [LangChain Cookbook - Prompt + LLM](https://python.langchain.com/docs/expression_language/cookbook/prompt_llm_parser) -- [LangChain Cookbook - Embedding Router](https://python.langchain.com/docs/expression_language/cookbook/embedding_router) -- [LangChain Cookbook - Agent](https://python.langchain.com/docs/expression_language/cookbook/agent) -- [LangChain Cookbook - Code Writing](https://python.langchain.com/docs/expression_language/cookbook/code_writing) -- [LangChain Cookbook - Memory](https://python.langchain.com/docs/expression_language/cookbook/memory) -- [LangChain Cookbook - Managing Prompt Size](https://python.langchain.com/docs/expression_language/cookbook/prompt_size) -- [LangChain Cookbook - Tools](https://python.langchain.com/docs/expression_language/cookbook/tools) - -Thank you for engaging with this documentation. May it be a valuable resource in your journey to mastering LangChain, LangServe, and LangSmith. - ---- diff --git a/docs/Enterprise_Guides/Prompting/Beginner-Prompt.txt b/docs/Enterprise_Guides/Prompting/Beginner-Prompt.txt deleted file mode 100644 index ee8dfac..0000000 --- a/docs/Enterprise_Guides/Prompting/Beginner-Prompt.txt +++ /dev/null @@ -1,22 +0,0 @@ -Let's work this out in a step by step way to ensure we have the right answer. - -=== -``` -import openai -import pinecone # type: ignore -import configparser - -system_prompt = [{ - "role": "system", "content": " \ - - You are an AI programming assistant with a very blunt, straightforward, direct demeanor. \ - - Your assignment is to assist the user in developing software. \ - - Be meticulous. \ - - Workflow=((step-by-step) + \"We'll have to ensure we have the right answer before committing to a decision\") \ - " -}] - -# set prompt -user_prompt = [{ - -}] -``` \ No newline at end of file diff --git a/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.md b/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.md deleted file mode 100644 index 783e91d..0000000 --- a/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.md +++ /dev/null @@ -1,93 +0,0 @@ -(This document is a preview of the underlying format consumed by -ChatGPT models. As a developer, you can use our [higher-level -API](https://platform.openai.com/docs/guides/chat) and won't need to -interact directly with this format today — but expect to have the -option in the future!) - -Traditionally, GPT models consumed unstructured text. ChatGPT models -instead expect a structured format, called Chat Markup Language -(ChatML for short). -ChatML documents consist of a sequence of messages. Each message -contains a header (which today consists of who said it, but in the -future will contain other metadata) and contents (which today is a -text payload, but in the future will contain other datatypes). -We are still evolving ChatML, but the current version (ChatML v0) can -be represented with our upcoming "list of dicts" JSON format as -follows: -``` -[ - {"token": "<|im_start|>"}, - "system\nYou are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible.\nKnowledge cutoff: 2021-09-01\nCurrent date: 2023-03-01", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nHow are you", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "assistant\nI am doing well!", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nHow are you now?", - {"token": "<|im_end|>"}, "\n" -] -``` -You could also represent it in the classic "unsafe raw string" -format. However, this format inherently allows injections from user -input containing special-token syntax, similar to SQL injections: -``` -<|im_start|>system -You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. -Knowledge cutoff: 2021-09-01 -Current date: 2023-03-01<|im_end|> -<|im_start|>user -How are you<|im_end|> -<|im_start|>assistant -I am doing well!<|im_end|> -<|im_start|>user -How are you now?<|im_end|> -``` -## Non-chat use-cases -ChatML can be applied to classic GPT use-cases that are not -traditionally thought of as chat. For example, instruction following -(where a user requests for the AI to complete an instruction) can be -implemented as a ChatML query like the following: -``` -[ - {"token": "<|im_start|>"}, - "user\nList off some good ideas:", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "assistant" -] -``` -We do not currently allow autocompleting of partial messages, -``` -[ - {"token": "<|im_start|>"}, - "system\nPlease autocomplete the user's message.", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nThis morning I decided to eat a giant" -] -``` -Note that ChatML makes explicit to the model the source of each piece -of text, and particularly shows the boundary between human and AI -text. This gives an opportunity to mitigate and eventually solve -injections, as the model can tell which instructions come from the -developer, the user, or its own input. -## Few-shot prompting -In general, we recommend adding few-shot examples using separate -`system` messages with a `name` field of `example_user` or -`example_assistant`. For example, here is a 1-shot prompt: -``` -<|im_start|>system -Translate from English to French -<|im_end|> -<|im_start|>system name=example_user -How are you? -<|im_end|> -<|im_start|>system name=example_assistant -Comment allez-vous? -<|im_end|> -<|im_start|>user -{{user input here}}<|im_end|> -``` -If adding instructions in the `system` message doesn't work, you can -also try putting them into a `user` message. (In the near future, we -will train our models to be much more steerable via the system -message. But to date, we have trained only on a few system messages, -so the models pay much more attention to user examples.) diff --git a/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.txt b/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.txt deleted file mode 100644 index 783e91d..0000000 --- a/docs/Enterprise_Guides/Prompting/ChatMarkupLanguage/chatml.txt +++ /dev/null @@ -1,93 +0,0 @@ -(This document is a preview of the underlying format consumed by -ChatGPT models. As a developer, you can use our [higher-level -API](https://platform.openai.com/docs/guides/chat) and won't need to -interact directly with this format today — but expect to have the -option in the future!) - -Traditionally, GPT models consumed unstructured text. ChatGPT models -instead expect a structured format, called Chat Markup Language -(ChatML for short). -ChatML documents consist of a sequence of messages. Each message -contains a header (which today consists of who said it, but in the -future will contain other metadata) and contents (which today is a -text payload, but in the future will contain other datatypes). -We are still evolving ChatML, but the current version (ChatML v0) can -be represented with our upcoming "list of dicts" JSON format as -follows: -``` -[ - {"token": "<|im_start|>"}, - "system\nYou are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible.\nKnowledge cutoff: 2021-09-01\nCurrent date: 2023-03-01", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nHow are you", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "assistant\nI am doing well!", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nHow are you now?", - {"token": "<|im_end|>"}, "\n" -] -``` -You could also represent it in the classic "unsafe raw string" -format. However, this format inherently allows injections from user -input containing special-token syntax, similar to SQL injections: -``` -<|im_start|>system -You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. -Knowledge cutoff: 2021-09-01 -Current date: 2023-03-01<|im_end|> -<|im_start|>user -How are you<|im_end|> -<|im_start|>assistant -I am doing well!<|im_end|> -<|im_start|>user -How are you now?<|im_end|> -``` -## Non-chat use-cases -ChatML can be applied to classic GPT use-cases that are not -traditionally thought of as chat. For example, instruction following -(where a user requests for the AI to complete an instruction) can be -implemented as a ChatML query like the following: -``` -[ - {"token": "<|im_start|>"}, - "user\nList off some good ideas:", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "assistant" -] -``` -We do not currently allow autocompleting of partial messages, -``` -[ - {"token": "<|im_start|>"}, - "system\nPlease autocomplete the user's message.", - {"token": "<|im_end|>"}, "\n", {"token": "<|im_start|>"}, - "user\nThis morning I decided to eat a giant" -] -``` -Note that ChatML makes explicit to the model the source of each piece -of text, and particularly shows the boundary between human and AI -text. This gives an opportunity to mitigate and eventually solve -injections, as the model can tell which instructions come from the -developer, the user, or its own input. -## Few-shot prompting -In general, we recommend adding few-shot examples using separate -`system` messages with a `name` field of `example_user` or -`example_assistant`. For example, here is a 1-shot prompt: -``` -<|im_start|>system -Translate from English to French -<|im_end|> -<|im_start|>system name=example_user -How are you? -<|im_end|> -<|im_start|>system name=example_assistant -Comment allez-vous? -<|im_end|> -<|im_start|>user -{{user input here}}<|im_end|> -``` -If adding instructions in the `system` message doesn't work, you can -also try putting them into a `user` message. (In the near future, we -will train our models to be much more steerable via the system -message. But to date, we have trained only on a few system messages, -so the models pay much more attention to user examples.) diff --git a/docs/Enterprise_Guides/Prompting/Tree of Thoughts - Deliberate Problem Solving with LLMs - Machine Learning - AI.pdf b/docs/Enterprise_Guides/Prompting/Tree of Thoughts - Deliberate Problem Solving with LLMs - Machine Learning - AI.pdf deleted file mode 100644 index 2653438..0000000 Binary files a/docs/Enterprise_Guides/Prompting/Tree of Thoughts - Deliberate Problem Solving with LLMs - Machine Learning - AI.pdf and /dev/null differ diff --git a/docs/Enterprise_Guides/Prompting/tone_prompts.jpg b/docs/Enterprise_Guides/Prompting/tone_prompts.jpg deleted file mode 100644 index 3cddfb4..0000000 Binary files a/docs/Enterprise_Guides/Prompting/tone_prompts.jpg and /dev/null differ diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..f33c314 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,19 @@ +## Document assortment + +### Continued Education + +Reading meant to give context of the AI's "thought" process' limitations. + +Highlights: +1. Lost In the Middle: How Language Models Use Long Contexts + - Abstract: While recent language models have the ability to take long contexts as input, relatively little is known about how well they use longer context. We analyze the performance of language models on two tasks that require identifying relevant information in their input contexts: multi-document question answering and key-value retrieval. +2. "Needle In A Haystack" Analysis: Pressure Testing GPT-4 & Claude 2.1's Long Context Retrieval Accuracy + - An exported email from Greg Kramradt's mailing list. He personally pressure tested these models with the intention of helping others make smarter decisions when it comes to retrieval. + +### Custom-GPT - Uploadable Knowledge Base + +Files currently in use for my CustomGPTs. + +### Jupyter Notebooks + +A collection of notebooks that teach learners how to build retrieval augmented generation (RAG) AI. RAG helps solve a data-freshness problem that all LLMs inherently have. \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index d1da647..25ce1df 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,26 +5,7 @@ groups = ["default", "black", "flake8"] strategy = ["cross_platform"] lock_version = "4.4" -content_hash = "sha256:465d1436b5770249e226a30ec38d56de05e82f582628f328b8ac2b6d4964f3b4" - -[[package]] -name = "accelerate" -version = "0.25.0" -requires_python = ">=3.8.0" -summary = "Accelerate" -dependencies = [ - "huggingface-hub", - "numpy>=1.17", - "packaging>=20.0", - "psutil", - "pyyaml", - "safetensors>=0.3.1", - "torch>=1.10.0", -] -files = [ - {file = "accelerate-0.25.0-py3-none-any.whl", hash = "sha256:c7bb817eb974bba0ff3ea1ba0f24d55afb86d50e3d4fe98d6922dc69cf2ccff1"}, - {file = "accelerate-0.25.0.tar.gz", hash = "sha256:ecf55b0ab278a1dac8539dde0d276977aff04683f07ede73eaf02478538576a1"}, -] +content_hash = "sha256:d287d8fa059d1d22266069d0c08d76bda85cbfd7bcf2bffa25703363dd1ce623" [[package]] name = "aiohttp" @@ -217,16 +198,6 @@ files = [ {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, ] -[[package]] -name = "chardet" -version = "5.2.0" -requires_python = ">=3.7" -summary = "Universal encoding detector for Python 3" -files = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] - [[package]] name = "charset-normalizer" version = "3.3.1" @@ -1153,46 +1124,6 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] -[[package]] -name = "pandas" -version = "2.0.3" -requires_python = ">=3.8" -summary = "Powerful data structures for data analysis, time series, and statistics" -dependencies = [ - "numpy>=1.20.3; python_version < \"3.10\"", - "numpy>=1.21.0; python_version >= \"3.10\"", - "python-dateutil>=2.8.2", - "pytz>=2020.1", - "tzdata>=2022.1", -] -files = [ - {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, - {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, - {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, - {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, - {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, - {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, - {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, - {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, - {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, - {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, - {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, -] - [[package]] name = "pathspec" version = "0.11.2" @@ -1203,50 +1134,6 @@ files = [ {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, ] -[[package]] -name = "pillow" -version = "10.1.0" -requires_python = ">=3.8" -summary = "Python Imaging Library (Fork)" -files = [ - {file = "Pillow-10.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1ab05f3db77e98f93964697c8efc49c7954b08dd61cff526b7f2531a22410106"}, - {file = "Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6932a7652464746fcb484f7fc3618e6503d2066d853f68a4bd97193a3996e273"}, - {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f63b5a68daedc54c7c3464508d8c12075e56dcfbd42f8c1bf40169061ae666"}, - {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0949b55eb607898e28eaccb525ab104b2d86542a85c74baf3a6dc24002edec2"}, - {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ae88931f93214777c7a3aa0a8f92a683f83ecde27f65a45f95f22d289a69e593"}, - {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb01ca85b2361b09480784a7931fc648ed8b7836f01fb9241141b968feb1db"}, - {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d27b5997bdd2eb9fb199982bb7eb6164db0426904020dc38c10203187ae2ff2f"}, - {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7df5608bc38bd37ef585ae9c38c9cd46d7c81498f086915b0f97255ea60c2818"}, - {file = "Pillow-10.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:41f67248d92a5e0a2076d3517d8d4b1e41a97e2df10eb8f93106c89107f38b57"}, - {file = "Pillow-10.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1fb29c07478e6c06a46b867e43b0bcdb241b44cc52be9bc25ce5944eed4648e7"}, - {file = "Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2cdc65a46e74514ce742c2013cd4a2d12e8553e3a2563c64879f7c7e4d28bce7"}, - {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d08cd0a2ecd2a8657bd3d82c71efd5a58edb04d9308185d66c3a5a5bed9610"}, - {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062a1610e3bc258bff2328ec43f34244fcec972ee0717200cb1425214fe5b839"}, - {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:61f1a9d247317fa08a308daaa8ee7b3f760ab1809ca2da14ecc88ae4257d6172"}, - {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a646e48de237d860c36e0db37ecaecaa3619e6f3e9d5319e527ccbc8151df061"}, - {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:47e5bf85b80abc03be7455c95b6d6e4896a62f6541c1f2ce77a7d2bb832af262"}, - {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a92386125e9ee90381c3369f57a2a50fa9e6aa8b1cf1d9c4b200d41a7dd8e992"}, - {file = "Pillow-10.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f7c276c05a9767e877a0b4c5050c8bee6a6d960d7f0c11ebda6b99746068c2a"}, - {file = "Pillow-10.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0a026c188be3b443916179f5d04548092e253beb0c3e2ee0a4e2cdad72f66099"}, - {file = "Pillow-10.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04f6f6149f266a100374ca3cc368b67fb27c4af9f1cc8cb6306d849dcdf12616"}, - {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb40c011447712d2e19cc261c82655f75f32cb724788df315ed992a4d65696bb"}, - {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a8413794b4ad9719346cd9306118450b7b00d9a15846451549314a58ac42219"}, - {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c9aeea7b63edb7884b031a35305629a7593272b54f429a9869a4f63a1bf04c34"}, - {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b4005fee46ed9be0b8fb42be0c20e79411533d1fd58edabebc0dd24626882cfd"}, - {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0152565c6aa6ebbfb1e5d8624140a440f2b99bf7afaafbdbf6430426497f28"}, - {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d921bc90b1defa55c9917ca6b6b71430e4286fc9e44c55ead78ca1a9f9eba5f2"}, - {file = "Pillow-10.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfe96560c6ce2f4c07d6647af2d0f3c54cc33289894ebd88cfbb3bcd5391e256"}, - {file = "Pillow-10.1.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:937bdc5a7f5343d1c97dc98149a0be7eb9704e937fe3dc7140e229ae4fc572a7"}, - {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c25762197144e211efb5f4e8ad656f36c8d214d390585d1d21281f46d556ba"}, - {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:afc8eef765d948543a4775f00b7b8c079b3321d6b675dde0d02afa2ee23000b4"}, - {file = "Pillow-10.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:883f216eac8712b83a63f41b76ddfb7b2afab1b74abbb413c5df6680f071a6b9"}, - {file = "Pillow-10.1.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b920e4d028f6442bea9a75b7491c063f0b9a3972520731ed26c83e254302eb1e"}, - {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c41d960babf951e01a49c9746f92c5a7e0d939d1652d7ba30f6b3090f27e412"}, - {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1fafabe50a6977ac70dfe829b2d5735fd54e190ab55259ec8aea4aaea412fa0b"}, - {file = "Pillow-10.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b834f4b16173e5b92ab6566f0473bfb09f939ba14b23b8da1f54fa63e4b623f"}, - {file = "Pillow-10.1.0.tar.gz", hash = "sha256:e6bf8de6c36ed96c86ea3b6e1d5273c53f46ef518a062464cd7ef5dd2cf92e38"}, -] - [[package]] name = "pinecone-client" version = "2.2.4" @@ -1278,21 +1165,6 @@ files = [ {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, ] -[[package]] -name = "psutil" -version = "5.9.6" -requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" -summary = "Cross-platform lib for process and system monitoring in Python." -files = [ - {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"}, - {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"}, - {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"}, - {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"}, - {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"}, - {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"}, - {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"}, -] - [[package]] name = "pycodestyle" version = "2.11.1" @@ -1433,19 +1305,6 @@ files = [ {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] -[[package]] -name = "pypdf2" -version = "3.0.1" -requires_python = ">=3.6" -summary = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -dependencies = [ - "typing-extensions>=3.10.0.0; python_version < \"3.10\"", -] -files = [ - {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, - {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, -] - [[package]] name = "python-dateutil" version = "2.8.2" @@ -1459,15 +1318,6 @@ files = [ {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -[[package]] -name = "pytz" -version = "2023.3.post1" -summary = "World timezone definitions, modern and historical" -files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, -] - [[package]] name = "pyyaml" version = "6.0.1" @@ -2057,16 +1907,6 @@ files = [ {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, ] -[[package]] -name = "tzdata" -version = "2023.3" -requires_python = ">=2" -summary = "Provider of IANA time zone data" -files = [ - {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, - {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, -] - [[package]] name = "urllib3" version = "2.0.7" diff --git a/pyproject.toml b/pyproject.toml index 09961e6..4ba7716 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,19 @@ [project] name = "LLM-Utilikit" -version = "1.0.0" +version = "1.0.1" description = "Prebuilt AI-powered Python components for leveraging Large Language Models." authors = [ {name = "Daethyra", email = "109057945+Daethyra@users.noreply.github.com"}, ] dependencies = [ - "chardet>=5.2.0", - "numpy==1.25.1", - "aiohttp>=3.9.0", "openai>=0.28.1", "pinecone-client>=2.2.4", "tokenizers>=0.14.1", "safetensors>=0.4.0", "transformers>=4.34.1", "langchain>=0.0.27", - "pandas>=2.0.3", "langserve[all]>=0.0.32", "torch>=2.1.1", - "accelerate>=0.25.0", - #"pyaudio>=0.2.14", - "PyPDF2>=3.0.1", - "Pillow>=10.1.0", ] requires-python = ">=3.9, <=3.10.12" readme = "README.md"