From 8d59c15e2aaf800a63e21a31781a2a6815302ec0 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 17 Mar 2023 14:14:25 +0530 Subject: [PATCH 01/23] add copy to clipboard option --- gpt_repository_loader.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gpt_repository_loader.py b/gpt_repository_loader.py index 68c663d..b589fdf 100755 --- a/gpt_repository_loader.py +++ b/gpt_repository_loader.py @@ -3,6 +3,7 @@ import os import sys import fnmatch +import pyperclip def get_ignore_list(ignore_file_path): ignore_list = [] @@ -58,4 +59,11 @@ def process_repository(repo_path, ignore_list, output_file): with open('output.txt', 'a') as output_file: output_file.write("--END--") print("Repository contents written to output.txt.") + + # Copy the output to the clipboard if the -c flag is provided + if "-c" in sys.argv: + with open('output.txt', 'r') as output_file: + clipboard_contents = output_file.read() + pyperclip.copy(clipboard_contents) + print("Repository contents copied to clipboard.") \ No newline at end of file From b094525c1869cb1c48fe2a88825e9cc075fc5fbd Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 17 Mar 2023 14:15:38 +0530 Subject: [PATCH 02/23] add requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2069479 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyperclip \ No newline at end of file From 5fb35a7c6d7d46135e8d3b04a571025e983260b7 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 17 Mar 2023 15:25:11 +0530 Subject: [PATCH 03/23] add script to install gpt_repository_loader as a console script --- .gitignore | 2 + .gptignore | 4 +- gpt_repository_loader/__init__.py | 1 + .../gpt_repository_loader.py | 6 ++- setup.py | 37 +++++++++++++++++++ 5 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 gpt_repository_loader/__init__.py rename gpt_repository_loader.py => gpt_repository_loader/gpt_repository_loader.py (99%) create mode 100755 setup.py diff --git a/.gitignore b/.gitignore index fe4f192..b06452a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ __pycache__/ *.pyo *.pyd +dist +build # Output file output.txt diff --git a/.gptignore b/.gptignore index a0f1e1c..8071c8f 100644 --- a/.gptignore +++ b/.gptignore @@ -4,4 +4,6 @@ __pycache__/ .git/* .gptignore LICENSE -.github/* \ No newline at end of file +.github/* +dist +build \ No newline at end of file diff --git a/gpt_repository_loader/__init__.py b/gpt_repository_loader/__init__.py new file mode 100644 index 0000000..144e4df --- /dev/null +++ b/gpt_repository_loader/__init__.py @@ -0,0 +1 @@ +from .gpt_repository_loader import main \ No newline at end of file diff --git a/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py similarity index 99% rename from gpt_repository_loader.py rename to gpt_repository_loader/gpt_repository_loader.py index b589fdf..5a60cdb 100755 --- a/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -31,7 +31,7 @@ def process_repository(repo_path, ignore_list, output_file): output_file.write(f"{relative_file_path}\n") output_file.write(f"{contents}\n") -if __name__ == "__main__": +def main(): if len(sys.argv) < 2: print("Usage: python git_to_text.py /path/to/git/repository [-p /path/to/preamble.txt]") sys.exit(1) @@ -66,4 +66,6 @@ def process_repository(repo_path, ignore_list, output_file): clipboard_contents = output_file.read() pyperclip.copy(clipboard_contents) print("Repository contents copied to clipboard.") - \ No newline at end of file + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..4fbda69 --- /dev/null +++ b/setup.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +from setuptools import setup, find_packages + +with open("README.md", "r") as fh: + long_description = fh.read() + +setup( + name="gpt-repository-loader", + version="0.1.2", + author="Felvin", + author_email="team@felvin.com", + description="A utility to convert a Git repository into a text representation.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/felvin-search/gpt-repository-loader", + packages=find_packages(), + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + ], + python_requires=">=3.6", + install_requires=["pyperclip"], + entry_points={ + "console_scripts": [ + "gpt-repository-loader=gpt_repository_loader:main", + ], + }, +) + From 20bab53c76a0fb6834aceb33c863d017d99d29f6 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 17 Mar 2023 17:03:02 +0530 Subject: [PATCH 04/23] fix the ignoring of dist and build --- .gitignore | 2 ++ .gptignore | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b06452a..eaff616 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__/ dist build +dist/* +build/* # Output file output.txt diff --git a/.gptignore b/.gptignore index 8071c8f..d2aa9af 100644 --- a/.gptignore +++ b/.gptignore @@ -6,4 +6,6 @@ __pycache__/ LICENSE .github/* dist -build \ No newline at end of file +build +dist/* +build/* \ No newline at end of file From bfbee00d3ac5071c2ccb182413a51d777a3bb576 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Sun, 19 Mar 2023 10:58:06 +0530 Subject: [PATCH 05/23] allow using the script as a library as well --- README.md | 18 ++++++- gpt_repository_loader/__init__.py | 2 +- .../gpt_repository_loader.py | 54 +++++++++++-------- setup.py | 2 +- 4 files changed, 50 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index f551d06..81cd5ac 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,26 @@ `gpt-repository-loader` is a command-line tool that converts the contents of a Git repository into a text format, preserving the structure of the files and file contents. The generated output can be interpreted by AI language models, allowing them to process the repository's contents for various tasks, such as code review or documentation generation. +## Installation & Usage +`pip install gpt-repository-loader` install the project locally. + +Now you'll be able to use the console script `gpt-repository-loader` with any project. + +```gpt-repository-loader /path/to/git/repository -c``` +this will copy the content of the project into clipboard which you can directly paste into + +You can also now use `gpt-repository-loader` as a library. + +```python +from gpt_repository_loader import git_repo_to_text +repo_text = git_repo_to_text('/path/to/repository') +``` + + ## Contributing Some context around building this is [located here](https://github.com/mpoon/gpt-repository-loader/discussions/18). Appreciate any issues and pull requests in the spirit of having mostly GPT build out this tool. Using [ChatGPT Plus](https://chat.openai.com/) is recommended for quick access to GPT-4. -## Getting Started +## Getting Started with contribution To get started with `gpt-repository-loader`, follow these steps: diff --git a/gpt_repository_loader/__init__.py b/gpt_repository_loader/__init__.py index 144e4df..bf8d4b5 100644 --- a/gpt_repository_loader/__init__.py +++ b/gpt_repository_loader/__init__.py @@ -1 +1 @@ -from .gpt_repository_loader import main \ No newline at end of file +from .gpt_repository_loader import main, git_repo_to_text \ No newline at end of file diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 5a60cdb..82f3684 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -4,6 +4,7 @@ import sys import fnmatch import pyperclip +import io def get_ignore_list(ignore_file_path): ignore_list = [] @@ -18,7 +19,7 @@ def should_ignore(file_path, ignore_list): return True return False -def process_repository(repo_path, ignore_list, output_file): +def process_repository(repo_path, ignore_list, output_stream): for root, _, files in os.walk(repo_path): for file in files: file_path = os.path.join(root, file) @@ -27,9 +28,32 @@ def process_repository(repo_path, ignore_list, output_file): if not should_ignore(relative_file_path, ignore_list): with open(file_path, 'r', errors='ignore') as file: contents = file.read() - output_file.write("-" * 4 + "\n") - output_file.write(f"{relative_file_path}\n") - output_file.write(f"{contents}\n") + output_stream.write("-" * 4 + "\n") + output_stream.write(f"{relative_file_path}\n") + output_stream.write(f"{contents}\n") + +def git_repo_to_text(repo_path, preamble_file=None): + ignore_file_path = os.path.join(repo_path, ".gptignore") + + if os.path.exists(ignore_file_path): + ignore_list = get_ignore_list(ignore_file_path) + else: + ignore_list = [] + + output_stream = io.StringIO() + + if preamble_file: + with open(preamble_file, 'r') as pf: + preamble_text = pf.read() + output_stream.write(f"{preamble_text}\n") + else: + output_stream.write("The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\n") + + process_repository(repo_path, ignore_list, output_stream) + + output_stream.write("--END--") + + return output_stream.getvalue() def main(): if len(sys.argv) < 2: @@ -37,34 +61,18 @@ def main(): sys.exit(1) repo_path = sys.argv[1] - ignore_file_path = os.path.join(repo_path, ".gptignore") - preamble_file = None if "-p" in sys.argv: preamble_file = sys.argv[sys.argv.index("-p") + 1] - if os.path.exists(ignore_file_path): - ignore_list = get_ignore_list(ignore_file_path) - else: - ignore_list = [] + repo_as_text = git_repo_to_text(repo_path, preamble_file) with open('output.txt', 'w') as output_file: - if preamble_file: - with open(preamble_file, 'r') as pf: - preamble_text = pf.read() - output_file.write(f"{preamble_text}\n") - else: - output_file.write("The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\n") - process_repository(repo_path, ignore_list, output_file) - with open('output.txt', 'a') as output_file: - output_file.write("--END--") + output_file.write(repo_as_text) print("Repository contents written to output.txt.") - # Copy the output to the clipboard if the -c flag is provided if "-c" in sys.argv: - with open('output.txt', 'r') as output_file: - clipboard_contents = output_file.read() - pyperclip.copy(clipboard_contents) + pyperclip.copy(repo_as_text) print("Repository contents copied to clipboard.") if __name__ == "__main__": diff --git a/setup.py b/setup.py index 4fbda69..785d84c 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.2", + version="0.1.4", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From a214ffc950cb8f6bceddf13202ed072ed9ecad96 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Sun, 19 Mar 2023 11:16:30 +0530 Subject: [PATCH 06/23] add a basic release script --- release.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 release.sh diff --git a/release.sh b/release.sh new file mode 100755 index 0000000..42d0915 --- /dev/null +++ b/release.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# Remember you'll need to manually update the version in setup.py +# Also TWINE_USERNAME and TWINE_PASSWORD env variables should be set +python setup.py sdist bdist_wheel +twine upload dist/* --verbose \ No newline at end of file From 4de57686ace018359c64ede4c786e417e02d9517 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Sun, 19 Mar 2023 11:23:12 +0530 Subject: [PATCH 07/23] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 785d84c..96bc7e3 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.4", + version="0.1.5", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 204766d0c1df7db5e27051017cc9f7c00efc3fb9 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Sun, 19 Mar 2023 14:04:47 +0530 Subject: [PATCH 08/23] Use .gitignore if .gptignore is not present, also ignore .git --- gpt_repository_loader/gpt_repository_loader.py | 16 ++++++++++------ setup.py | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 82f3684..0c07d09 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -33,12 +33,15 @@ def process_repository(repo_path, ignore_list, output_stream): output_stream.write(f"{contents}\n") def git_repo_to_text(repo_path, preamble_file=None): - ignore_file_path = os.path.join(repo_path, ".gptignore") - - if os.path.exists(ignore_file_path): - ignore_list = get_ignore_list(ignore_file_path) - else: - ignore_list = [] + gpt_ignore_path = os.path.join(repo_path, ".gptignore") + git_ignore_path = os.path.join(repo_path, ".gitignore") + + ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore'] + + if os.path.exists(gpt_ignore_path): + ignore_list += get_ignore_list(gpt_ignore_path) + elif os.path.exists(git_ignore_path): + ignore_list += get_ignore_list(git_ignore_path) output_stream = io.StringIO() @@ -55,6 +58,7 @@ def git_repo_to_text(repo_path, preamble_file=None): return output_stream.getvalue() + def main(): if len(sys.argv) < 2: print("Usage: python git_to_text.py /path/to/git/repository [-p /path/to/preamble.txt]") diff --git a/setup.py b/setup.py index 96bc7e3..4376485 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.5", + version="0.1.7", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 3c95317a86f00fa44df445d76d473963f8acde2d Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Tue, 21 Mar 2023 11:48:49 +0530 Subject: [PATCH 09/23] print directory structure --- gpt_repository_loader/__init__.py | 2 +- .../gpt_repository_loader.py | 57 ++++++++++++++----- setup.py | 2 +- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/gpt_repository_loader/__init__.py b/gpt_repository_loader/__init__.py index bf8d4b5..f4edfd7 100644 --- a/gpt_repository_loader/__init__.py +++ b/gpt_repository_loader/__init__.py @@ -1 +1 @@ -from .gpt_repository_loader import main, git_repo_to_text \ No newline at end of file +from .gpt_repository_loader import main, git_repo_to_text, print_directory_structure,get_ignore_list \ No newline at end of file diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 0c07d09..0bca283 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -6,13 +6,6 @@ import pyperclip import io -def get_ignore_list(ignore_file_path): - ignore_list = [] - with open(ignore_file_path, 'r') as ignore_file: - for line in ignore_file: - ignore_list.append(line.strip()) - return ignore_list - def should_ignore(file_path, ignore_list): for pattern in ignore_list: if fnmatch.fnmatch(file_path, pattern): @@ -32,16 +25,35 @@ def process_repository(repo_path, ignore_list, output_stream): output_stream.write(f"{relative_file_path}\n") output_stream.write(f"{contents}\n") -def git_repo_to_text(repo_path, preamble_file=None): +def get_ignore_list(repo_path): + ignore_list = [] + ignore_file_path = None + gpt_ignore_path = os.path.join(repo_path, ".gptignore") git_ignore_path = os.path.join(repo_path, ".gitignore") - - ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore'] - + if os.path.exists(gpt_ignore_path): - ignore_list += get_ignore_list(gpt_ignore_path) + ignore_file_path = gpt_ignore_path elif os.path.exists(git_ignore_path): - ignore_list += get_ignore_list(git_ignore_path) + ignore_file_path = git_ignore_path + else: + print("No ignore file present") + + if ignore_file_path: + with open(ignore_file_path, 'r') as ignore_file: + for line in ignore_file: + line = line.strip() + if not line or line.startswith("#"): + continue + ignore_list.append(line) + + default_ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*'] + ignore_list += default_ignore_list + + return ignore_list + +def git_repo_to_text(repo_path, preamble_file=None): + ignore_list = get_ignore_list(repo_path) output_stream = io.StringIO() @@ -58,7 +70,6 @@ def git_repo_to_text(repo_path, preamble_file=None): return output_stream.getvalue() - def main(): if len(sys.argv) < 2: print("Usage: python git_to_text.py /path/to/git/repository [-p /path/to/preamble.txt]") @@ -79,5 +90,23 @@ def main(): pyperclip.copy(repo_as_text) print("Repository contents copied to clipboard.") + +def print_directory_structure(repo_path, indent=0, max_depth=2, ignore_list=None): + if ignore_list is None: + ignore_list = get_ignore_list(repo_path) + + if indent <= max_depth: + for item in os.listdir(repo_path): + full_path = os.path.join(repo_path, item) + if os.path.isdir(full_path): + if should_ignore(full_path, ignore_list) or should_ignore(item, ignore_list): + continue + print("| " * indent + "|--" + item + "/") + print_directory_structure(full_path, indent + 1, max_depth, ignore_list) + else: + if should_ignore(full_path, ignore_list) or should_ignore(item, ignore_list): + continue + print("| " * indent + "|--" + item) + if __name__ == "__main__": main() \ No newline at end of file diff --git a/setup.py b/setup.py index 4376485..db1619e 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.7", + version="0.1.9", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 17b3b1942d72802f1b1fc394ab958518b058cbce Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Wed, 22 Mar 2023 15:46:52 +0530 Subject: [PATCH 10/23] add pycache in default ignore list --- gpt_repository_loader/gpt_repository_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 0bca283..a5c7dd1 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -47,7 +47,7 @@ def get_ignore_list(repo_path): continue ignore_list.append(line) - default_ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*'] + default_ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*'] ignore_list += default_ignore_list return ignore_list From 24229d0e4d942a7ebe56d8b92e9b7bce7ae12350 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 23 Mar 2023 11:52:28 +0530 Subject: [PATCH 11/23] add node_modules in the default ignore list --- .../gpt_repository_loader.py | 29 ++++++++++--------- setup.py | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index a5c7dd1..9dd3eed 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -12,19 +12,6 @@ def should_ignore(file_path, ignore_list): return True return False -def process_repository(repo_path, ignore_list, output_stream): - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - relative_file_path = os.path.relpath(file_path, repo_path) - - if not should_ignore(relative_file_path, ignore_list): - with open(file_path, 'r', errors='ignore') as file: - contents = file.read() - output_stream.write("-" * 4 + "\n") - output_stream.write(f"{relative_file_path}\n") - output_stream.write(f"{contents}\n") - def get_ignore_list(repo_path): ignore_list = [] ignore_file_path = None @@ -47,11 +34,25 @@ def get_ignore_list(repo_path): continue ignore_list.append(line) - default_ignore_list = ['.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*'] + default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*'] ignore_list += default_ignore_list return ignore_list +def process_repository(repo_path, ignore_list, output_stream): + for root, _, files in os.walk(repo_path): + for file in files: + file_path = os.path.join(root, file) + relative_file_path = os.path.relpath(file_path, repo_path) + + if not should_ignore(relative_file_path, ignore_list): + with open(file_path, 'r', errors='ignore') as file: + contents = file.read() + output_stream.write("-" * 4 + "\n") + output_stream.write(f"{relative_file_path}\n") + output_stream.write(f"{contents}\n") + + def git_repo_to_text(repo_path, preamble_file=None): ignore_list = get_ignore_list(repo_path) diff --git a/setup.py b/setup.py index db1619e..6a7a943 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.9", + version="0.1.10", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From c714082dafc0160cb2071b9512d4f21c37ebccba Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 28 Mar 2024 07:58:00 +0530 Subject: [PATCH 12/23] use argparse for argument parsing --- .../gpt_repository_loader.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 9dd3eed..50deec9 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import os -import sys +import argparse import fnmatch import pyperclip import io @@ -72,22 +72,19 @@ def git_repo_to_text(repo_path, preamble_file=None): return output_stream.getvalue() def main(): - if len(sys.argv) < 2: - print("Usage: python git_to_text.py /path/to/git/repository [-p /path/to/preamble.txt]") - sys.exit(1) + parser = argparse.ArgumentParser(description="Convert a Git repository to text.") + parser.add_argument("repo_path", help="Path to the Git repository.") + parser.add_argument("-p", "--preamble", help="Path to a preamble file.") + parser.add_argument("-c", "--copy", action="store_true", help="Copy the repository contents to clipboard.") + args = parser.parse_args() - repo_path = sys.argv[1] - preamble_file = None - if "-p" in sys.argv: - preamble_file = sys.argv[sys.argv.index("-p") + 1] - - repo_as_text = git_repo_to_text(repo_path, preamble_file) + repo_as_text = git_repo_to_text(args.repo_path, args.preamble) with open('output.txt', 'w') as output_file: output_file.write(repo_as_text) print("Repository contents written to output.txt.") - if "-c" in sys.argv: + if args.copy: pyperclip.copy(repo_as_text) print("Repository contents copied to clipboard.") @@ -110,4 +107,4 @@ def print_directory_structure(repo_path, indent=0, max_depth=2, ignore_list=None print("| " * indent + "|--" + item) if __name__ == "__main__": - main() \ No newline at end of file + main() From ca767dda6558955bad34cd9ae15eb85008cf6598 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 28 Mar 2024 07:59:13 +0530 Subject: [PATCH 13/23] bump patch version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6a7a943..5ea4d67 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.10", + version="0.1.11", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 2748a928bd280dac6c52288055adb3730a34171e Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 29 Mar 2024 09:47:03 +0530 Subject: [PATCH 14/23] don't write to repo if -c flag is there --- gpt_repository_loader/gpt_repository_loader.py | 8 ++++---- release.sh | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 50deec9..7d3b760 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -80,13 +80,13 @@ def main(): repo_as_text = git_repo_to_text(args.repo_path, args.preamble) - with open('output.txt', 'w') as output_file: - output_file.write(repo_as_text) - print("Repository contents written to output.txt.") - if args.copy: pyperclip.copy(repo_as_text) print("Repository contents copied to clipboard.") + else: + with open('output.txt', 'w') as output_file: + output_file.write(repo_as_text) + print("Repository contents written to output.txt.") def print_directory_structure(repo_path, indent=0, max_depth=2, ignore_list=None): diff --git a/release.sh b/release.sh index 42d0915..860c4a4 100755 --- a/release.sh +++ b/release.sh @@ -3,4 +3,5 @@ # Remember you'll need to manually update the version in setup.py # Also TWINE_USERNAME and TWINE_PASSWORD env variables should be set python setup.py sdist bdist_wheel -twine upload dist/* --verbose \ No newline at end of file +source .env +twine upload dist/* --verbose --username $TWINE_USERNAME --password $TWINE_PASSWORD \ No newline at end of file From 825fe7e9403afdf9b83dda139e6ca24d29e1a65e Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 16 May 2024 23:40:30 +0530 Subject: [PATCH 15/23] Update README.md --- README.md | 59 +++++++++++-------------------------------------------- 1 file changed, 11 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 81cd5ac..8ba4d06 100644 --- a/README.md +++ b/README.md @@ -1,54 +1,17 @@ # gpt-repository-loader -`gpt-repository-loader` is a command-line tool that converts the contents of a Git repository into a text format, preserving the structure of the files and file contents. The generated output can be interpreted by AI language models, allowing them to process the repository's contents for various tasks, such as code review or documentation generation. +## Installation -## Installation & Usage -`pip install gpt-repository-loader` install the project locally. +`pip install gpt-repository-loader` -Now you'll be able to use the console script `gpt-repository-loader` with any project. +## How to use? +Go to the directory you are interested in, run +```gpt-repository-loader . -c``` +This will copy ALL the git tracked content in the repository on clipboard and then you can use [Gemini](https://aistudio.google.com/app/prompts/new_chat)/[Claude](https://claude.ai)/[ChatGPT](https://chatgpt.com) to ask questions on it. -```gpt-repository-loader /path/to/git/repository -c``` -this will copy the content of the project into clipboard which you can directly paste into +## What to use it for? +- Build a README for codebases +- Work with Legacy code +- Debug issues -You can also now use `gpt-repository-loader` as a library. - -```python -from gpt_repository_loader import git_repo_to_text -repo_text = git_repo_to_text('/path/to/repository') -``` - - -## Contributing -Some context around building this is [located here](https://github.com/mpoon/gpt-repository-loader/discussions/18). Appreciate any issues and pull requests in the spirit of having mostly GPT build out this tool. Using [ChatGPT Plus](https://chat.openai.com/) is recommended for quick access to GPT-4. - -## Getting Started with contribution - -To get started with `gpt-repository-loader`, follow these steps: - -1. Ensure you have Python 3 installed on your system. -2. Clone or download the `gpt-repository-loader` repository. -3. Navigate to the repository's root directory in your terminal. -4. Run `gpt-repository-loader` with the following command: - - ```bash - python gpt_repository_loader.py /path/to/git/repository - ``` - Replace `/path/to/git/repository` with the path to the Git repository you want to process. - -5. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks. - -## Running Tests - -To run the tests for `gpt-repository-loader`, follow these steps: - -1. Ensure you have Python 3 installed on your system. -2. Navigate to the repository's root directory in your terminal. -3. Run the tests with the following command: - - ```bash - python -m unittest test_gpt_repository_loader.py - ``` -Now, the test harness is added to the `gpt-repository-loader` project. You can run the tests by executing the command `python -m unittest test_gpt_repository_loader.py` in your terminal. - -## License -This project is licensed under the MIT License - see the LICENSE file for details. +Gemini's 1M context window is REALLLY big, and it under utilized. From 4d974a020e5e7a687e71854bcc2baabd7b5408f3 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 16 May 2024 23:18:16 +0530 Subject: [PATCH 16/23] use git ls-files to get the list of tracked files --- .../gpt_repository_loader.py | 22 +++++++++---------- setup.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 7d3b760..40dbf55 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -5,6 +5,7 @@ import fnmatch import pyperclip import io +import subprocess def should_ignore(file_path, ignore_list): for pattern in ignore_list: @@ -40,17 +41,16 @@ def get_ignore_list(repo_path): return ignore_list def process_repository(repo_path, ignore_list, output_stream): - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - relative_file_path = os.path.relpath(file_path, repo_path) - - if not should_ignore(relative_file_path, ignore_list): - with open(file_path, 'r', errors='ignore') as file: - contents = file.read() - output_stream.write("-" * 4 + "\n") - output_stream.write(f"{relative_file_path}\n") - output_stream.write(f"{contents}\n") + git_files = subprocess.check_output(["git", "ls-files"], cwd=repo_path, universal_newlines=True).splitlines() + + for file_path in git_files: + if not should_ignore(file_path, ignore_list): + full_path = os.path.join(repo_path, file_path) + with open(full_path, 'r', errors='ignore') as file: + contents = file.read() + output_stream.write("-" * 4 + "\n") + output_stream.write(f"{file_path}\n") + output_stream.write(f"{contents}\n") def git_repo_to_text(repo_path, preamble_file=None): diff --git a/setup.py b/setup.py index 5ea4d67..9ef73ca 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.11", + version="0.1.12", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 05f9d4948da4ecddcf9c1d729447194a3e1cb36c Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Thu, 16 May 2024 23:19:41 +0530 Subject: [PATCH 17/23] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9ef73ca..b36097c 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.1.12", + version="0.9.0", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 62082559a5deaf117e854e0f0b6f3cb69f7746f2 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Tue, 21 May 2024 11:22:57 +0530 Subject: [PATCH 18/23] add package-lock.json in default ignore list --- gpt_repository_loader/gpt_repository_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 40dbf55..e775b19 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -35,7 +35,7 @@ def get_ignore_list(repo_path): continue ignore_list.append(line) - default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*'] + default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*', 'package-lock.json'] ignore_list += default_ignore_list return ignore_list From b7a331185b5aaae046f3fd101fd7bacc5a3d5515 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Wed, 29 May 2024 10:51:40 +0530 Subject: [PATCH 19/23] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b36097c..bcdb11e 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.9.0", + version="0.9.1", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 0189bfbc04a5b19abe6914806e203d23bb4cc7c8 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Wed, 29 May 2024 10:57:53 +0530 Subject: [PATCH 20/23] update readme --- README.md | 18 ++++++++++++++++-- setup.py | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8ba4d06..aef0aa5 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,28 @@ `pip install gpt-repository-loader` +## Linux Requirements +On Linux, ensure that you have `xclip` installed for clipboard functionality. You can install it using: +```bash +sudo apt-get install xclip # Debian/Ubuntu +sudo yum install xclip # Fedora/CentOS +``` + ## How to use? Go to the directory you are interested in, run -```gpt-repository-loader . -c``` +``` +gpt-repository-loader . -c +``` This will copy ALL the git tracked content in the repository on clipboard and then you can use [Gemini](https://aistudio.google.com/app/prompts/new_chat)/[Claude](https://claude.ai)/[ChatGPT](https://chatgpt.com) to ask questions on it. +### Available Command Line Flags +* `repo_path`: (Required) Path to the Git repository. +* `-p`, `--preamble`: Path to a preamble file to include before the repository content. +* `-c`, `--copy`: Copies the repository contents to the clipboard. If not provided, the output will be written to a file named `output.txt` in the current directory. + ## What to use it for? - Build a README for codebases - Work with Legacy code - Debug issues -Gemini's 1M context window is REALLLY big, and it under utilized. +Gemini's 1M context window is REALLLY big, and it under utilized. \ No newline at end of file diff --git a/setup.py b/setup.py index bcdb11e..7a35623 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.9.1", + version="0.9.2", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.", From 9ecffe8bbec4787b71e3e99b57087878bdb4c4d6 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 31 May 2024 23:12:18 +0530 Subject: [PATCH 21/23] ignore image, video and audio files --- gpt_repository_loader/gpt_repository_loader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index e775b19..17c487c 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -36,7 +36,10 @@ def get_ignore_list(repo_path): ignore_list.append(line) default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*', 'package-lock.json'] - ignore_list += default_ignore_list + image_ignore_list = ['*.png', '*.jpg', '*.jpeg', '*.gif', '*.bmp', '*.ico', '*.cur', '*.tiff', '*.webp', '*.avif'] + video_ignore_list = ['*.mp4', '*.mov', '*.wmv', '*.avi', '*.mkv', '*.flv', '*.webm', '*.mp3', '*.wav', '*.aac', '*.m4a', '*.mpa', '*.mpeg', '*.mpe', '*.mpg', '*.mpi', '*.mpt', '*.mpx', '*.ogv', '*.webm', '*.wmv', '*.yuv'] + audio_ignore_list = ['*.mp3', '*.wav', '*.aac', '*.m4a', '*.mpa', '*.mpeg', '*.mpe', '*.mpg', '*.mpi', '*.mpt', '*.mpx', '*.ogv', '*.webm', '*.wmv', '*.yuv'] + ignore_list += default_ignore_list + image_ignore_list + video_ignore_list + audio_ignore_list return ignore_list From 4465230f7eb00d35e5b5eb3710a99175b79a0e8a Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 31 May 2024 23:14:26 +0530 Subject: [PATCH 22/23] ignore yarn files --- gpt_repository_loader/gpt_repository_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt_repository_loader/gpt_repository_loader.py b/gpt_repository_loader/gpt_repository_loader.py index 17c487c..8106f18 100755 --- a/gpt_repository_loader/gpt_repository_loader.py +++ b/gpt_repository_loader/gpt_repository_loader.py @@ -35,7 +35,7 @@ def get_ignore_list(repo_path): continue ignore_list.append(line) - default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*', 'package-lock.json'] + default_ignore_list = ['dist', 'dist/','dist/*','sdist', 'sdist/','sdist/*' '.git/', '/.git/', '.git', '.git/*', '.gptignore', '.gitignore', 'node_modules', 'node_modules/*', '__pycache__', '__pycache__/*', 'package-lock.json', 'yarn.lock', 'yarn-error.log'] image_ignore_list = ['*.png', '*.jpg', '*.jpeg', '*.gif', '*.bmp', '*.ico', '*.cur', '*.tiff', '*.webp', '*.avif'] video_ignore_list = ['*.mp4', '*.mov', '*.wmv', '*.avi', '*.mkv', '*.flv', '*.webm', '*.mp3', '*.wav', '*.aac', '*.m4a', '*.mpa', '*.mpeg', '*.mpe', '*.mpg', '*.mpi', '*.mpt', '*.mpx', '*.ogv', '*.webm', '*.wmv', '*.yuv'] audio_ignore_list = ['*.mp3', '*.wav', '*.aac', '*.m4a', '*.mpa', '*.mpeg', '*.mpe', '*.mpg', '*.mpi', '*.mpt', '*.mpx', '*.ogv', '*.webm', '*.wmv', '*.yuv'] From a5ce37a71823b9540d4f120500fe60d1e5f1d7c5 Mon Sep 17 00:00:00 2001 From: Harsh Gupta Date: Fri, 31 May 2024 23:15:28 +0530 Subject: [PATCH 23/23] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7a35623..c2604d5 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="gpt-repository-loader", - version="0.9.2", + version="0.9.3", author="Felvin", author_email="team@felvin.com", description="A utility to convert a Git repository into a text representation.",