From 0822a2e2379da877f71ce8a78885216f891af524 Mon Sep 17 00:00:00 2001 From: wrongu Date: Mon, 19 Jun 2023 17:06:56 -0400 Subject: [PATCH 01/13] add test fixture for OverleafGitPaperRemote. Currently broken! --- .../paper_remote/OverleafGitPaperRemote.py | 7 ++- .../test_OverleafGitPaperRemote.py | 57 +++++++++++++++++++ test_data/dummy_overleaf_git_project/main.tex | 26 +++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 llm4papers/paper_remote/test_OverleafGitPaperRemote.py create mode 100644 test_data/dummy_overleaf_git_project/main.tex diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 7dda210..f389e47 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -119,7 +119,7 @@ def _doc_id_to_path(self, doc_id: DocumentID) -> pathlib.Path: # so we can cast to a string on this next line: return pathlib.Path(git_root) / str(doc_id) - def refresh(self): + def refresh(self, retry: bool = True): """ This is a fallback method (that likely needs some love) to ensure that the repo is up to date with the latest upstream changes. @@ -161,7 +161,10 @@ def refresh(self): self._cached_repo = None # recursively delete the repo shutil.rmtree(f"/tmp/{self._reposlug}") - self.refresh() + if retry: + self.refresh(retry=False) + else: + raise e def list_doc_ids(self) -> list[DocumentID]: """ diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py new file mode 100644 index 0000000..c5796f4 --- /dev/null +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -0,0 +1,57 @@ +from .OverleafGitPaperRemote import OverleafGitPaperRemote +from ..models import EditTrigger, DocumentRange +import pytest +import git +from pathlib import Path + + +def _recursive_delete(path: Path): + """Delete a file or directory, recursively. + """ + if path.is_file(): + path.unlink() + elif path.is_dir(): + for child in path.iterdir(): + _recursive_delete(child) + path.rmdir() + + +# Create a local git repo to test with. Upon the start of each test, copy the contents +# of +@pytest.fixture +def temporary_git_paper_repo(): + src = Path("../test_data/dummy_overleaf_git_project/") + # Remove the .git directory from the dummy project to make a fresh start. + git_root = src / ".git" + if git_root.exists(): + _recursive_delete(git_root) + repo = git.Repo.init(src, mkdir=False) + repo.git.add(".") + repo.index.commit("Initial commit.") + # If /tmp/dummy_overleaf_git_project exists, delete it, since this is where + # OverleafGitPaperRemote will clone the repo to. + dst = Path("/tmp/dummy_overleaf_git_project") + if dst.exists(): + _recursive_delete(dst) + # Use the file:// protocol explicitly so that the repo is "cloned" from the local + # filesystem, and adding file://username:password@src works + return "file://" + str(src.resolve()) + + +def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_repo): + """ + Confirm that edits can always be made to an in-memory paper remote. + + """ + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + assert remote.is_edit_ok( + EditTrigger( + input_ranges=[ + DocumentRange(doc_id="main.tex", revision_id=0, selection=(0, 1)) + ], + output_ranges=[ + DocumentRange(doc_id="main.tex", revision_id=0, selection=(0, 1)) + ], + request_text="Nothin'!", + ) + ) diff --git a/test_data/dummy_overleaf_git_project/main.tex b/test_data/dummy_overleaf_git_project/main.tex new file mode 100644 index 0000000..ddff804 --- /dev/null +++ b/test_data/dummy_overleaf_git_project/main.tex @@ -0,0 +1,26 @@ +% Preamble +\documentclass[11pt]{article} + +% Packages +\usepackage{amsmath} + +% Document +\begin{document} + +\section{Quick fox, lazy dog} + +In a world not quite like our own, where animals could speak and had societies akin to those of humans, there lived a +quick brown fox named Felix and a lazy dog named Dudley. The two were known across the vast expanses of the Woodlands, +but for entirely different reasons. Felix, agile and swift, was a famed courier, his russet fur a blur as he darted +through the forest, delivering messages and parcels from the crow's nest in the towering pines to the rabbit warrens in +the cool, dark earth. Dudley, on the other hand, was known for his unmatched languor, a dog who preferred sunbathing on +the riverbank to any form of physical exertion, his only movement being the occasional flick of his tail or turn of his +head. One day, a challenge arose from the animal kingdom — a test of speed and agility. Felix, eager to prove his +prowess, was quick to accept. The challenge was simple: he had to leap over Dudley, who was sprawled in his usual spot +by the river. The whole of the Woodlands gathered, curious critters watching as Felix took a running start. His muscles +coiled, and with a burst of speed, he jumped, his silhouette briefly blocking out the sun. Gasps echoed as he cleared +Dudley with room to spare, landing gracefully on the other side. That day, the quick brown fox didn't just jump over the + lazy dog, he soared above him, further cementing his reputation in the annals of Woodland's history, while Dudley + merely yawned, rolled over, and resumed his nap under the warm sun. + +\end{document} \ No newline at end of file From 920cdeb2f78822742f698c7e9980264ece2b42d9 Mon Sep 17 00:00:00 2001 From: wrongu Date: Mon, 19 Jun 2023 17:17:10 -0400 Subject: [PATCH 02/13] black formatting and fixed relative path in tests --- llm4papers/paper_remote/test_OverleafGitPaperRemote.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index c5796f4..5d58106 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -6,8 +6,7 @@ def _recursive_delete(path: Path): - """Delete a file or directory, recursively. - """ + """Delete a file or directory, recursively.""" if path.is_file(): path.unlink() elif path.is_dir(): @@ -20,7 +19,7 @@ def _recursive_delete(path: Path): # of @pytest.fixture def temporary_git_paper_repo(): - src = Path("../test_data/dummy_overleaf_git_project/") + src = Path("test_data/dummy_overleaf_git_project/") # Remove the .git directory from the dummy project to make a fresh start. git_root = src / ".git" if git_root.exists(): From 2713a25a38bb6c671e343dbac26969dd6996948f Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 10:18:01 -0400 Subject: [PATCH 03/13] better git diff handling; most basic OverleafGitPaperRemote test works --- .../paper_remote/OverleafGitPaperRemote.py | 59 +++++++++++-------- .../test_OverleafGitPaperRemote.py | 23 +++++++- .../dummy_overleaf_git_project/main-0.tex | 10 ++++ .../{main.tex => main-1.tex} | 0 .../dummy_overleaf_git_project/main-2.tex | 29 +++++++++ 5 files changed, 95 insertions(+), 26 deletions(-) create mode 100644 test_data/dummy_overleaf_git_project/main-0.tex rename test_data/dummy_overleaf_git_project/{main.tex => main-1.tex} (100%) create mode 100644 test_data/dummy_overleaf_git_project/main-2.tex diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index f389e47..fb1741c 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -9,14 +9,34 @@ import datetime from urllib.parse import quote from git import Repo # type: ignore +from typing import Iterable +import re -from llm4papers.models import EditTrigger, EditResult, EditType, DocumentID, RevisionID +from llm4papers.models import EditTrigger, EditResult, EditType, DocumentID, RevisionID, LineRange from llm4papers.paper_remote.MultiDocumentPaperRemote import MultiDocumentPaperRemote from llm4papers.logger import logger +def _diff_to_ranges(diff: str) -> Iterable[LineRange]: + """Given a git diff, return LineRange object(s) indicating which lines in the + original document were changed. + """ + diff_edit_re = r"@{2,}\s*-(?P\d+),(?P\d+)\s*\+\d+,\d+\s*@{2,}" + for match in re.finditer(diff_edit_re, diff): + git_line_number = int(match.group("line")) + git_line_count = int(match.group("count")) + # Git counts from 1 and gives (start, length). LineRange counts from 0 and gives + # (start, end). + yield (git_line_number - 1, git_line_number - 1 + git_line_count) + + +def _ranges_overlap(a: LineRange, b: LineRange) -> bool: + """Given two LineRanges, return True if they overlap, False otherwise.""" + return not (a[1] < b[0] or b[1] < a[0]) + + def _too_close_to_human_edits( - repo: Repo, filename: str, line_number: int, last_n: int = 2 + repo: Repo, filename: str, line_range: LineRange, last_n: int = 2 ) -> bool: """ Determine if the line `line_number` of the file `filename` was changed in @@ -44,19 +64,13 @@ def _too_close_to_human_edits( # Get the diff for HEAD~n: total_diff = repo.git.diff(f"HEAD~{last_n}", filename, unified=0) - # Get the current repo state of that line: - current_line = repo.git.show(f"HEAD:{filename}").split("\n")[line_number] - - logger.debug("Diff: " + total_diff) - logger.debug("Current line: " + current_line) - - # Match the line in the diff: - if current_line in total_diff: - logger.info( - f"Found current line ({current_line[:10]}...) in diff, rejecting edit." - ) - return True - + for git_line_range in _diff_to_ranges(total_diff): + if _ranges_overlap(git_line_range, line_range): + logger.info( + f"Line range {line_range} overlaps with git-edited {git_line_range}, " + f"rejecting edit." + ) + return True return False @@ -199,14 +213,13 @@ def is_edit_ok(self, edit: EditTrigger) -> bool: # want to wait for the user to move on to the next line. for doc_range in edit.input_ranges + edit.output_ranges: repo_scoped_file = str(self._doc_id_to_path(doc_range.doc_id)) - for i in range(doc_range.selection[0], doc_range.selection[1]): - if _too_close_to_human_edits(self._get_repo(), repo_scoped_file, i): - logger.info( - f"Temporarily skipping edit request in {doc_range.doc_id}" - " at line {i} because it was still in progress" - " in the last commit." - ) - return False + if _too_close_to_human_edits(self._get_repo(), repo_scoped_file, doc_range.selection): + logger.info( + f"Temporarily skipping edit request in {doc_range.doc_id}" + " at line {i} because it was still in progress" + " in the last commit." + ) + return False return True def to_dict(self): diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index 5d58106..c85dcde 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -3,6 +3,7 @@ import pytest import git from pathlib import Path +import shutil def _recursive_delete(path: Path): @@ -20,21 +21,37 @@ def _recursive_delete(path: Path): @pytest.fixture def temporary_git_paper_repo(): src = Path("test_data/dummy_overleaf_git_project/") + # Remove the .git directory from the dummy project to make a fresh start. git_root = src / ".git" if git_root.exists(): _recursive_delete(git_root) + + # Create initial commit with (mostly empty) main.tex file, then add content to it by + # copying main-1.tex, main-2.tex, etc to main.tex. The purpose of this is to ensure + # that the git history has a few commits. repo = git.Repo.init(src, mkdir=False) - repo.git.add(".") - repo.index.commit("Initial commit.") + i = 0 + while (file := src / f"main-{i}.tex").exists(): + shutil.copyfile(file, src / "main.tex") + repo.git.add("main.tex") + repo.index.commit(f"Commit {i}") + i += 1 + # If /tmp/dummy_overleaf_git_project exists, delete it, since this is where # OverleafGitPaperRemote will clone the repo to. dst = Path("/tmp/dummy_overleaf_git_project") if dst.exists(): _recursive_delete(dst) + # Use the file:// protocol explicitly so that the repo is "cloned" from the local # filesystem, and adding file://username:password@src works - return "file://" + str(src.resolve()) + yield "file://" + str(src.resolve()) + + # Run this after the test is done (after yield) + (src / "main.tex").unlink() + repo.close() + _recursive_delete(src / ".git") def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_repo): diff --git a/test_data/dummy_overleaf_git_project/main-0.tex b/test_data/dummy_overleaf_git_project/main-0.tex new file mode 100644 index 0000000..bf36842 --- /dev/null +++ b/test_data/dummy_overleaf_git_project/main-0.tex @@ -0,0 +1,10 @@ +% Preamble +\documentclass[11pt]{article} + +% Packages +\usepackage{amsmath} + +% Document +\begin{document} + +\end{document} \ No newline at end of file diff --git a/test_data/dummy_overleaf_git_project/main.tex b/test_data/dummy_overleaf_git_project/main-1.tex similarity index 100% rename from test_data/dummy_overleaf_git_project/main.tex rename to test_data/dummy_overleaf_git_project/main-1.tex diff --git a/test_data/dummy_overleaf_git_project/main-2.tex b/test_data/dummy_overleaf_git_project/main-2.tex new file mode 100644 index 0000000..81a5e5c --- /dev/null +++ b/test_data/dummy_overleaf_git_project/main-2.tex @@ -0,0 +1,29 @@ +% Preamble +\documentclass[11pt]{article} + +% Packages +\usepackage{amsmath} + +\title{The Quick Brown Fox Jumps Over the Lazy Dog} +\author{GPT-4} + +% Document +\begin{document} + +\section{Quick fox, lazy dog} + +In a world not quite like our own, where animals could speak and had societies akin to those of humans, there lived a +quick brown fox named Felix and a lazy dog named Dudley. The two were known across the vast expanses of the Woodlands, +but for entirely different reasons. Felix, agile and swift, was a famed courier, his russet fur a blur as he darted +through the forest, delivering messages and parcels from the crow's nest in the towering pines to the rabbit warrens in +the cool, dark earth. Dudley, on the other hand, was known for his unmatched languor, a dog who preferred sunbathing on +the riverbank to any form of physical exertion, his only movement being the occasional flick of his tail or turn of his +head. One day, a challenge arose from the animal kingdom — a test of speed and agility. Felix, eager to prove his +prowess, was quick to accept. The challenge was simple: he had to leap over Dudley, who was sprawled in his usual spot +by the river. The whole of the Woodlands gathered, curious critters watching as Felix took a running start. His muscles +coiled, and with a burst of speed, he jumped, his silhouette briefly blocking out the sun. Gasps echoed as he cleared +Dudley with room to spare, landing gracefully on the other side. That day, the quick brown fox didn't just jump over the + lazy dog, he soared above him, further cementing his reputation in the annals of Woodland's history, while Dudley + merely yawned, rolled over, and resumed his nap under the warm sun. + +\end{document} \ No newline at end of file From ec325055a69a455abc6348b0c6c89bccc568b55c Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 12:08:49 -0400 Subject: [PATCH 04/13] OverleafGitPaperRemote test passing --- .../paper_remote/OverleafGitPaperRemote.py | 32 ++++++--- .../test_OverleafGitPaperRemote.py | 65 ++++++++++++++++++- 2 files changed, 87 insertions(+), 10 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index fb1741c..d86ff84 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -12,22 +12,34 @@ from typing import Iterable import re -from llm4papers.models import EditTrigger, EditResult, EditType, DocumentID, RevisionID, LineRange +from llm4papers.models import ( + EditTrigger, + EditResult, + EditType, + DocumentID, + RevisionID, + LineRange, +) from llm4papers.paper_remote.MultiDocumentPaperRemote import MultiDocumentPaperRemote from llm4papers.logger import logger +diff_line_edit_re = re.compile( + r"@{2,}\s*-(?P\d+),(?P\d+)\s*\+(?P\d+),(?P\d+)\s*@{2,}" +) + + def _diff_to_ranges(diff: str) -> Iterable[LineRange]: """Given a git diff, return LineRange object(s) indicating which lines in the original document were changed. """ - diff_edit_re = r"@{2,}\s*-(?P\d+),(?P\d+)\s*\+\d+,\d+\s*@{2,}" - for match in re.finditer(diff_edit_re, diff): - git_line_number = int(match.group("line")) - git_line_count = int(match.group("count")) - # Git counts from 1 and gives (start, length). LineRange counts from 0 and gives - # (start, end). - yield (git_line_number - 1, git_line_number - 1 + git_line_count) + for match in diff_line_edit_re.finditer(diff): + git_line_number = int(match.group("new_line")) + git_line_count = int(match.group("new_count")) + # Git counts from 1 and gives (start, length), inclusive. LineRange counts from + # 0 and gives start:end indices (exclusive). + zero_index_start = git_line_number - 1 + yield zero_index_start, zero_index_start + git_line_count def _ranges_overlap(a: LineRange, b: LineRange) -> bool: @@ -213,7 +225,9 @@ def is_edit_ok(self, edit: EditTrigger) -> bool: # want to wait for the user to move on to the next line. for doc_range in edit.input_ranges + edit.output_ranges: repo_scoped_file = str(self._doc_id_to_path(doc_range.doc_id)) - if _too_close_to_human_edits(self._get_repo(), repo_scoped_file, doc_range.selection): + if _too_close_to_human_edits( + self._get_repo(), repo_scoped_file, doc_range.selection + ): logger.info( f"Temporarily skipping edit request in {doc_range.doc_id}" " at line {i} because it was still in progress" diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index c85dcde..ad964cd 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -1,9 +1,10 @@ from .OverleafGitPaperRemote import OverleafGitPaperRemote -from ..models import EditTrigger, DocumentRange +from ..models import EditTrigger, EditResult, EditType, DocumentRange import pytest import git from pathlib import Path import shutil +import time def _recursive_delete(path: Path): @@ -44,6 +45,11 @@ def temporary_git_paper_repo(): if dst.exists(): _recursive_delete(dst) + # Set it so that the 'origin' repo (in test_data) does not have any branches + # checked out, allowing 'push' from the /tmp/ clone to work. Do this by checking + # out the current commit directly (detached HEAD state) + repo.git.checkout(repo.head.commit.hexsha) + # Use the file:// protocol explicitly so that the repo is "cloned" from the local # filesystem, and adding file://username:password@src works yield "file://" + str(src.resolve()) @@ -71,3 +77,60 @@ def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_ request_text="Nothin'!", ) ) + + +def test_edit_ok_if_different_part_of_doc(temporary_git_paper_repo): + # Add a comment at the end. This doesn't overlap with anything in the document, so + # it should be immediately accepted. + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + num_lines = len(remote.get_lines("main.tex")) + new_last_line = "\n% --- end of document ---\n" + end_of_doc_comment_edit = EditTrigger( + input_ranges=[ + DocumentRange( + doc_id="main.tex", revision_id=0, selection=(num_lines, num_lines + 1) + ) + ], + output_ranges=[ + DocumentRange( + doc_id="main.tex", revision_id=0, selection=(num_lines, num_lines + 1) + ) + ], + request_text=new_last_line, + ) + assert remote.is_edit_ok(end_of_doc_comment_edit) + remote.perform_edit( + EditResult( + type=EditType.replace, + range=end_of_doc_comment_edit.output_ranges[0], + content=new_last_line, + ) + ) + assert remote.get_lines("main.tex")[-1].strip() == new_last_line.strip() + + +def test_edit_reject_or_accept_given_delay(temporary_git_paper_repo): + """ + Confirm that edits are rejected if "human edits" happened within the last 10s, but + accepted if they happened more than 10s ago. + """ + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + # Find the line where the "\title{}" is, and make an edit to it. + with open(remote._doc_id_to_path("main.tex"), "r") as f: + i_title = next(i for i, line in enumerate(f) if r"\title" in line) + title_edit = EditTrigger( + input_ranges=[ + DocumentRange( + doc_id="main.tex", revision_id=0, selection=(i_title, i_title + 1) + ) + ], + output_ranges=[ + DocumentRange( + doc_id="main.tex", revision_id=0, selection=(i_title, i_title + 1) + ) + ], + request_text=r"\title{A much snazzier title}\n", + ) + assert not remote.is_edit_ok(title_edit) + time.sleep(10) + assert remote.is_edit_ok(title_edit) From 13084866c4af159548a2b501b9faba07774aa38a Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 12:57:42 -0400 Subject: [PATCH 05/13] Handling edits' revision_id in OverleafGitPaperRemote. Snazzy `with paper.rewind(commit_id)` syntax. --- .../paper_remote/OverleafGitPaperRemote.py | 53 ++++++++---- .../test_OverleafGitPaperRemote.py | 82 +++++++++++++++++-- 2 files changed, 110 insertions(+), 25 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index d86ff84..271b8f7 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -110,8 +110,6 @@ class OverleafGitPaperRemote(MultiDocumentPaperRemote): PaperRemote protocol for use by the AI editor. """ - current_revision_id: RevisionID - def __init__(self, git_cached_repo: str): """ Saves the git repo to a local temporary directory using gitpython. @@ -126,6 +124,10 @@ def __init__(self, git_cached_repo: str): self._cached_repo: Repo | None = None self.refresh() + @property + def current_revision_id(self): + return self._get_repo().head.commit.hexsha + def _get_repo(self) -> Repo: if self._cached_repo is None: # TODO - this makes me anxious about race conditions. every time we refresh, @@ -169,7 +171,6 @@ def refresh(self, retry: bool = True): f"Latest change at {self._get_repo().head.commit.committed_datetime}" ) logger.info(f"Repo dirty: {self._get_repo().is_dirty()}") - self.current_revision_id = self._get_repo().head.commit.hexsha try: self._get_repo().git.stash("pop") except Exception as e: @@ -253,22 +254,15 @@ def perform_edit(self, edit: EditResult) -> bool: """ logger.info(f"Performing edit {edit} on remote {self._reposlug}") - if edit.type == EditType.replace: - success = self._perform_replace(edit) - elif edit.type == EditType.comment: - success = self._perform_comment(edit) - else: - raise ValueError(f"Unknown edit type {edit.type}") + with self.rewind(edit.range.revision_id, message="AI edit") as paper: + if edit.type == EditType.replace: + success = paper._perform_replace(edit) + elif edit.type == EditType.comment: + success = paper._perform_comment(edit) + else: + raise ValueError(f"Unknown edit type {edit.type}") if success: - # TODO - apply edit relative to the edit.range.revision_id commit and then - # rebase onto HEAD for poor-man's operational transforms - self._get_repo().index.add([self._doc_id_to_path(str(edit.range.doc_id))]) - self._get_repo().index.commit("AI edit completed.") - # Instead of just pushing, we need to rebase and then push. - # This is because we want to make sure that the AI edits are always - # on top of the stack. - self._get_repo().git.pull() # TODO: We could do a better job catching WARNs here and then maybe setting # success = False self._get_repo().git.push() @@ -314,3 +308,28 @@ def _perform_comment(self, edit: EditResult) -> bool: # TODO - implement this for real logger.info(f"Performing comment edit {edit} on remote {self._reposlug}") return True + + def rewind(self, commit: str, message: str): + return self.RewindContext(self, commit, message) + + # Create an inner class for "with" semantics so that we can do + # `with remote.rewind(commit)` to rewind to a particular commit and play some edits + # onto it, then merge when the 'with' context exits. + class RewindContext: + def __init__(self, remote: "OverleafGitPaperRemote", commit: str, message: str): + self._remote = remote + self._message = message + self._rewind_commit = commit + self._restore_branch = remote._get_repo().active_branch + + def __enter__(self): + self._remote._get_repo().git.checkout(self._rewind_commit) + self._remote._get_repo().git.checkout(b="tmp-edit-branch") + return self._remote + + def __exit__(self, exc_type, exc_val, exc_tb): + self._remote._get_repo().git.add(all=True) + self._remote._get_repo().index.commit(self._message) + self._remote._get_repo().git.checkout(self._restore_branch.name) + self._remote._get_repo().git.merge("tmp-edit-branch") + self._remote._get_repo().git.branch("-D", "tmp-edit-branch") diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index ad964cd..3848844 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -60,6 +60,55 @@ def temporary_git_paper_repo(): _recursive_delete(src / ".git") +def test_merge_resolution_from_revision_id(temporary_git_paper_repo): + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + # Test that we can make *two* edits to different parts of the file by rebasing + # the second edit onto the first one. + + # First, add a line early in the paper (after \section{} before text) + lines = remote.get_lines("main.tex") + i_section = next(i for i, line in enumerate(lines) if r"\section" in line) + i_end_doc = next(i for i, line in enumerate(lines) if r"\end{document}" in line) + + edit1 = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(i_section + 2, i_section + 2), + ), + content="% This is a comment\n% that spans\n% multiple lines\n", + ) + + # Second, add some stuff at the end of the paper *after* the \end{document}, using + # the same revision ID as the first edit. Since the first edit spans multiple lines, + # this fails unless we rebase the second edit onto the first one. + edit2 = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(i_end_doc + 1, i_end_doc + 1), + ), + content="\n% This is a comment that should appear AFTER the end-document\n", + ) + + # Perform the two edits in succession + remote.perform_edit(edit1) + remote.perform_edit(edit2) + + # Confirm that both edits happened + lines = remote.get_lines("main.tex") + assert any("multiple lines" in line for line in lines) + assert any("appear AFTER" in line for line in lines) + + # Confirm that the second edit was rebased onto the first one by checking that the + # comment did indeed appear after the \end{document} + i_end_doc_2 = next(i for i, line in enumerate(lines) if r"\end{document}" in line) + i_second_edit = next(i for i, line in enumerate(lines) if "appear AFTER" in line) + assert i_second_edit > i_end_doc_2 + + def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_repo): """ Confirm that edits can always be made to an in-memory paper remote. @@ -69,10 +118,18 @@ def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_ assert remote.is_edit_ok( EditTrigger( input_ranges=[ - DocumentRange(doc_id="main.tex", revision_id=0, selection=(0, 1)) + DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(0, 1), + ) ], output_ranges=[ - DocumentRange(doc_id="main.tex", revision_id=0, selection=(0, 1)) + DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(0, 1), + ) ], request_text="Nothin'!", ) @@ -88,12 +145,16 @@ def test_edit_ok_if_different_part_of_doc(temporary_git_paper_repo): end_of_doc_comment_edit = EditTrigger( input_ranges=[ DocumentRange( - doc_id="main.tex", revision_id=0, selection=(num_lines, num_lines + 1) + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(num_lines, num_lines + 1), ) ], output_ranges=[ DocumentRange( - doc_id="main.tex", revision_id=0, selection=(num_lines, num_lines + 1) + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(num_lines, num_lines + 1), ) ], request_text=new_last_line, @@ -116,17 +177,22 @@ def test_edit_reject_or_accept_given_delay(temporary_git_paper_repo): """ remote = OverleafGitPaperRemote(temporary_git_paper_repo) # Find the line where the "\title{}" is, and make an edit to it. - with open(remote._doc_id_to_path("main.tex"), "r") as f: - i_title = next(i for i, line in enumerate(f) if r"\title" in line) + lines = remote.get_lines("main.tex") + i_title = next(i for i, line in enumerate(lines) if r"\title" in line) + title_edit = EditTrigger( input_ranges=[ DocumentRange( - doc_id="main.tex", revision_id=0, selection=(i_title, i_title + 1) + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(i_title, i_title + 1), ) ], output_ranges=[ DocumentRange( - doc_id="main.tex", revision_id=0, selection=(i_title, i_title + 1) + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(i_title, i_title + 1), ) ], request_text=r"\title{A much snazzier title}\n", From f8c5815486a3a4bc831a4702b541729b69e7f6ee Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 15:31:09 -0400 Subject: [PATCH 06/13] tryna fix github tests that pass locally --- llm4papers/paper_remote/OverleafGitPaperRemote.py | 2 +- llm4papers/paper_remote/test_OverleafGitPaperRemote.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 271b8f7..23767f4 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -125,7 +125,7 @@ def __init__(self, git_cached_repo: str): self.refresh() @property - def current_revision_id(self): + def current_revision_id(self) -> RevisionID: return self._get_repo().head.commit.hexsha def _get_repo(self) -> Repo: diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index 3848844..acde47c 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -28,10 +28,15 @@ def temporary_git_paper_repo(): if git_root.exists(): _recursive_delete(git_root) + # Initialize src as a git repo and add user config to prevent git from complaining + # about missing user info. + repo = git.Repo.init(src, mkdir=False) + repo.config_writer().set_value("user", "name", "dummy").release() + repo.config_writer().set_value("user", "email", "dummy@agi.ai").release() + # Create initial commit with (mostly empty) main.tex file, then add content to it by # copying main-1.tex, main-2.tex, etc to main.tex. The purpose of this is to ensure # that the git history has a few commits. - repo = git.Repo.init(src, mkdir=False) i = 0 while (file := src / f"main-{i}.tex").exists(): shutil.copyfile(file, src / "main.tex") From b4cfa65969ccd00373058a3e928b8790ea354578 Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 15:48:44 -0400 Subject: [PATCH 07/13] testing (+fixes) for graceful recovery from merge conflict --- .../paper_remote/OverleafGitPaperRemote.py | 39 +++++++++------ .../test_OverleafGitPaperRemote.py | 49 +++++++++++++++++++ 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 23767f4..601cfbb 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -8,7 +8,7 @@ import shutil import datetime from urllib.parse import quote -from git import Repo # type: ignore +from git import Repo, GitCommandError # type: ignore from typing import Iterable import re @@ -173,12 +173,12 @@ def refresh(self, retry: bool = True): logger.info(f"Repo dirty: {self._get_repo().is_dirty()}") try: self._get_repo().git.stash("pop") - except Exception as e: + except GitCommandError as e: # TODO: this just means there was nothing to pop, but # we should handle this more gracefully. logger.debug(f"Nothing to pop: {e}") pass - except Exception as e: + except GitCommandError as e: logger.error( f"Error pulling from repo {self._reposlug}: {e}. " "Falling back on DESTRUCTION!!!" @@ -254,18 +254,24 @@ def perform_edit(self, edit: EditResult) -> bool: """ logger.info(f"Performing edit {edit} on remote {self._reposlug}") - with self.rewind(edit.range.revision_id, message="AI edit") as paper: - if edit.type == EditType.replace: - success = paper._perform_replace(edit) - elif edit.type == EditType.comment: - success = paper._perform_comment(edit) - else: - raise ValueError(f"Unknown edit type {edit.type}") + try: + with self.rewind(edit.range.revision_id, message="AI edit") as paper: + if edit.type == EditType.replace: + success = paper._perform_replace(edit) + elif edit.type == EditType.comment: + success = paper._perform_comment(edit) + else: + raise ValueError(f"Unknown edit type {edit.type}") + except GitCommandError as e: + logger.error( + f"Git error performing edit {edit} on remote {self._reposlug}: {e}" + ) + success = False if success: - # TODO: We could do a better job catching WARNs here and then maybe setting - # success = False self._get_repo().git.push() + else: + self.refresh() return success @@ -331,5 +337,10 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._remote._get_repo().git.add(all=True) self._remote._get_repo().index.commit(self._message) self._remote._get_repo().git.checkout(self._restore_branch.name) - self._remote._get_repo().git.merge("tmp-edit-branch") - self._remote._get_repo().git.branch("-D", "tmp-edit-branch") + try: + self._remote._get_repo().git.merge("tmp-edit-branch") + except GitCommandError as e: + self._remote._get_repo().git.merge("--abort") + raise e + finally: + self._remote._get_repo().git.branch("-D", "tmp-edit-branch") diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index acde47c..3d09707 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -114,6 +114,55 @@ def test_merge_resolution_from_revision_id(temporary_git_paper_repo): assert i_second_edit > i_end_doc_2 +def test_recover_gracefully_from_merge_conflict(temporary_git_paper_repo): + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + + good_edit = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(20, 21), + ), + content="Hey look\nI'm overwriting\nsome stuff\nin the middle of the\ndoc.\n", + ) + + conflict_edit = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(20, 22), + ), + content="Hey so am I!\n", + ) + + i_end_doc = len(remote.get_lines("main.tex")) - 1 + good_edit_2 = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(i_end_doc + 1, i_end_doc + 1), + ), + content="\nThis line should appear somewhere at the end\n", + ) + + # Test that we can do good/conflict/good and get the two good edits to succeed + # while the conflict edit fails silently (logged but not raised) + remote.perform_edit(good_edit) + remote.perform_edit(conflict_edit) + remote.perform_edit(good_edit_2) + + lines = remote.get_lines("main.tex") + # Assert good_edit went through + assert any("I'm overwriting" in line for line in lines) + # Assert conflict_edit did not go through + assert not any("Hey so am I!" in line for line in lines) + # Assert good_edit_2 went through + assert any("This line should appear" in line for line in lines) + + def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_repo): """ Confirm that edits can always be made to an in-memory paper remote. From 2036d283ef5ae2ae1677fc3c7d86e1ff66fafe4c Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 15:55:18 -0400 Subject: [PATCH 08/13] testing (+fixes) for graceful recovery from bad EditResult objects --- .../paper_remote/OverleafGitPaperRemote.py | 3 ++ .../test_OverleafGitPaperRemote.py | 39 ++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 601cfbb..45e8705 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -287,6 +287,9 @@ def _perform_replace(self, edit: EditResult) -> bool: """ doc_range, text = edit.range, edit.content try: + num_lines = len(self.get_lines(doc_range.doc_id)) + if any(i < 0 for i in doc_range.selection) or doc_range.selection[1] < doc_range.selection[0] or any(i > len(self.get_lines(doc_range.doc_id)) for i in doc_range.selection): + raise IndexError(f"Invalid selection {doc_range.selection} for document {doc_range.doc_id} with {num_lines} lines.") lines = self.get_lines(doc_range.doc_id) lines = ( lines[: doc_range.selection[0]] diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index 3d09707..14e5021 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -163,6 +163,41 @@ def test_recover_gracefully_from_merge_conflict(temporary_git_paper_repo): assert any("This line should appear" in line for line in lines) +def test_recover_gracefully_from_bad_edit(temporary_git_paper_repo): + remote = OverleafGitPaperRemote(temporary_git_paper_repo) + + good_edit = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(20, 21), + ), + content="Hey look\nI'm overwriting\nsome stuff\nin the middle of the\ndoc.\n", + ) + + # Edit is bad because file is far fewer than 100 lines long + bad_edit = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="main.tex", + revision_id=remote.current_revision_id, + selection=(100, 101), + ), + content="Hey so am I!\n", + ) + + # Test that we can do bad/good and get just the good edit to succeed + remote.perform_edit(bad_edit) + remote.perform_edit(good_edit) + + lines = remote.get_lines("main.tex") + # Assert good_edit went through + assert any("I'm overwriting" in line for line in lines) + # Assert conflict_edit did not go through + assert not any("Hey so am I!" in line for line in lines) + + def test_can_always_make_edits_to_overleaf_git_paper_remote(temporary_git_paper_repo): """ Confirm that edits can always be made to an in-memory paper remote. @@ -201,14 +236,14 @@ def test_edit_ok_if_different_part_of_doc(temporary_git_paper_repo): DocumentRange( doc_id="main.tex", revision_id=remote.current_revision_id, - selection=(num_lines, num_lines + 1), + selection=(num_lines, num_lines), ) ], output_ranges=[ DocumentRange( doc_id="main.tex", revision_id=remote.current_revision_id, - selection=(num_lines, num_lines + 1), + selection=(num_lines, num_lines), ) ], request_text=new_last_line, From 870ba6f4adb0136bad0e40350fedf77bcddb06ab Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 15:56:05 -0400 Subject: [PATCH 09/13] black + ruff formatting --- .../paper_remote/OverleafGitPaperRemote.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 45e8705..3eecc87 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -288,8 +288,18 @@ def _perform_replace(self, edit: EditResult) -> bool: doc_range, text = edit.range, edit.content try: num_lines = len(self.get_lines(doc_range.doc_id)) - if any(i < 0 for i in doc_range.selection) or doc_range.selection[1] < doc_range.selection[0] or any(i > len(self.get_lines(doc_range.doc_id)) for i in doc_range.selection): - raise IndexError(f"Invalid selection {doc_range.selection} for document {doc_range.doc_id} with {num_lines} lines.") + if ( + any(i < 0 for i in doc_range.selection) + or doc_range.selection[1] < doc_range.selection[0] + or any( + i > len(self.get_lines(doc_range.doc_id)) + for i in doc_range.selection + ) + ): + raise IndexError( + f"Invalid selection {doc_range.selection} for document " + f"{doc_range.doc_id} with {num_lines} lines." + ) lines = self.get_lines(doc_range.doc_id) lines = ( lines[: doc_range.selection[0]] @@ -332,13 +342,17 @@ def __init__(self, remote: "OverleafGitPaperRemote", commit: str, message: str): self._restore_branch = remote._get_repo().active_branch def __enter__(self): + # TODO is there a more gitpython-ic way to do this? self._remote._get_repo().git.checkout(self._rewind_commit) self._remote._get_repo().git.checkout(b="tmp-edit-branch") return self._remote def __exit__(self, exc_type, exc_val, exc_tb): + # TODO - add only modified files not all files self._remote._get_repo().git.add(all=True) self._remote._get_repo().index.commit(self._message) + # TODO - Using branch name here feels like a hack. Can we checkout the + # branch 'object'? self._remote._get_repo().git.checkout(self._restore_branch.name) try: self._remote._get_repo().git.merge("tmp-edit-branch") From 47669e1047ade7eb1a098a2e5482765b2356666b Mon Sep 17 00:00:00 2001 From: wrongu Date: Tue, 20 Jun 2023 16:00:49 -0400 Subject: [PATCH 10/13] comment to future us about local vs GH actions --- llm4papers/paper_remote/OverleafGitPaperRemote.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 3eecc87..5911de7 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -357,6 +357,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): try: self._remote._get_repo().git.merge("tmp-edit-branch") except GitCommandError as e: + # TODO - why does this line fail on GH actions but pass locally? self._remote._get_repo().git.merge("--abort") raise e finally: From bf31df80d6605f890b53875895ce9269c3f49c6a Mon Sep 17 00:00:00 2001 From: wrongu Date: Wed, 21 Jun 2023 11:00:01 -0400 Subject: [PATCH 11/13] improved gitpython merging... but still could be better using IndexFile --- .../paper_remote/OverleafGitPaperRemote.py | 41 ++++++++++++------- .../test_OverleafGitPaperRemote.py | 14 ++++++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 5911de7..2f8dd9c 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -73,7 +73,9 @@ def _too_close_to_human_edits( logger.info(f"Last commit was {sec_since_last_commit}s ago, approving edit.") return False - # Get the diff for HEAD~n: + # Get the diff for HEAD~n. Note that the gitpython DiffIndex and Diff objects drop the line number info (!) so we + # can't use the gitpython object-oriented API to do this. Calling repo.git.diff is pretty much a direct pass-through + # to running "git diff HEAD~n -- " on the command line. total_diff = repo.git.diff(f"HEAD~{last_n}", filename, unified=0) for git_line_range in _diff_to_ranges(total_diff): @@ -252,6 +254,10 @@ def perform_edit(self, edit: EditResult) -> bool: Returns: True if the edit was successful, False otherwise """ + if not self._doc_id_to_path(edit.range.doc_id).exists(): + logger.error(f"Document {edit.range.doc_id} does not exist.") + return False + logger.info(f"Performing edit {edit} on remote {self._reposlug}") try: @@ -335,30 +341,35 @@ def rewind(self, commit: str, message: str): # `with remote.rewind(commit)` to rewind to a particular commit and play some edits # onto it, then merge when the 'with' context exits. class RewindContext: + # TODO - there are tricks in gitpython where an IndexFile can be used to handle changes to files in-memory + # without having to call checkout() and (briefly) modify the state of things on disk. This would be an + # improvement, but would require using the gitpython API more directly inside of perform_edit, + # such as calling git.IndexFile.write() instead of python's open() and write() + def __init__(self, remote: "OverleafGitPaperRemote", commit: str, message: str): self._remote = remote self._message = message self._rewind_commit = commit - self._restore_branch = remote._get_repo().active_branch def __enter__(self): - # TODO is there a more gitpython-ic way to do this? - self._remote._get_repo().git.checkout(self._rewind_commit) - self._remote._get_repo().git.checkout(b="tmp-edit-branch") + repo = self._remote._get_repo() + self._restore_ref = repo.head.ref + self._new_branch = repo.create_head("tmp-edit-branch", commit=self._rewind_commit) + self._new_branch.checkout() return self._remote def __exit__(self, exc_type, exc_val, exc_tb): - # TODO - add only modified files not all files - self._remote._get_repo().git.add(all=True) - self._remote._get_repo().index.commit(self._message) - # TODO - Using branch name here feels like a hack. Can we checkout the - # branch 'object'? - self._remote._get_repo().git.checkout(self._restore_branch.name) + repo = self._remote._get_repo() + assert repo.active_branch == self._new_branch, "Branch changed unexpectedly mid-`with`" + # Add files that changed + repo.index.add([_file for (_file, _), _ in repo.index.entries.items()]) + repo.index.commit(self._message) + self._restore_ref.checkout() try: - self._remote._get_repo().git.merge("tmp-edit-branch") + repo.git.merge("tmp-edit-branch") except GitCommandError as e: - # TODO - why does this line fail on GH actions but pass locally? - self._remote._get_repo().git.merge("--abort") + # Hard reset on failure + repo.git.reset("--hard", self._restore_ref.commit.hexsha) raise e finally: - self._remote._get_repo().git.branch("-D", "tmp-edit-branch") + repo.delete_head(self._new_branch, force=True) diff --git a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py index 14e5021..58c5eb5 100644 --- a/llm4papers/paper_remote/test_OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/test_OverleafGitPaperRemote.py @@ -187,8 +187,20 @@ def test_recover_gracefully_from_bad_edit(temporary_git_paper_repo): content="Hey so am I!\n", ) - # Test that we can do bad/good and get just the good edit to succeed + # Edit is bad because file "dne.tex" does not exist + bad_edit_2 = EditResult( + type=EditType.replace, + range=DocumentRange( + doc_id="dne.tex", + revision_id=remote.current_revision_id, + selection=(0, 1), + ), + content="Blah blah blah\n", + ) + + # Test that we can do bad/bad/good and get just the good edit to succeed remote.perform_edit(bad_edit) + remote.perform_edit(bad_edit_2) remote.perform_edit(good_edit) lines = remote.get_lines("main.tex") From 7e27b453ad5d9921e4fd63a491ce040b5f1ac8b8 Mon Sep 17 00:00:00 2001 From: wrongu Date: Wed, 21 Jun 2023 11:14:23 -0400 Subject: [PATCH 12/13] attempting to fit missing-git-author issue --- .../paper_remote/OverleafGitPaperRemote.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 2f8dd9c..67210bd 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -8,7 +8,7 @@ import shutil import datetime from urllib.parse import quote -from git import Repo, GitCommandError # type: ignore +from git import Repo, GitCommandError, Actor # type: ignore from typing import Iterable import re @@ -73,9 +73,10 @@ def _too_close_to_human_edits( logger.info(f"Last commit was {sec_since_last_commit}s ago, approving edit.") return False - # Get the diff for HEAD~n. Note that the gitpython DiffIndex and Diff objects drop the line number info (!) so we - # can't use the gitpython object-oriented API to do this. Calling repo.git.diff is pretty much a direct pass-through - # to running "git diff HEAD~n -- " on the command line. + # Get the diff for HEAD~n. Note that the gitpython DiffIndex and Diff objects + # drop the line number info (!) so we can't use the gitpython object-oriented API + # to do this. Calling repo.git.diff is pretty much a direct pass-through to + # running "git diff HEAD~n -- " on the command line. total_diff = repo.git.diff(f"HEAD~{last_n}", filename, unified=0) for git_line_range in _diff_to_ranges(total_diff): @@ -105,6 +106,16 @@ def _add_auth(uri: str): return uri +def _git_author() -> Actor: + try: + from llm4papers.config import OverleafConfig + + return Actor(name="AI Editor", email=OverleafConfig().username) + except ImportError: + logger.debug("No config file found, assuming public repo.") + return Actor(name="AI Editor", email="unknown@identity.com") + + class OverleafGitPaperRemote(MultiDocumentPaperRemote): """ Overleaf exposes a git remote for each project. This class handles reading @@ -341,29 +352,36 @@ def rewind(self, commit: str, message: str): # `with remote.rewind(commit)` to rewind to a particular commit and play some edits # onto it, then merge when the 'with' context exits. class RewindContext: - # TODO - there are tricks in gitpython where an IndexFile can be used to handle changes to files in-memory - # without having to call checkout() and (briefly) modify the state of things on disk. This would be an - # improvement, but would require using the gitpython API more directly inside of perform_edit, - # such as calling git.IndexFile.write() instead of python's open() and write() + # TODO - there are tricks in gitpython where an IndexFile can be used to + # handle changes to files in-memory without having to call checkout() and + # (briefly) modify the state of things on disk. This would be an improvement, + # but would require using the gitpython API more directly inside of + # perform_edit, such as calling git.IndexFile.write() instead of python's + # open() and write() def __init__(self, remote: "OverleafGitPaperRemote", commit: str, message: str): self._remote = remote self._message = message self._rewind_commit = commit + self._auth = _git_author() def __enter__(self): repo = self._remote._get_repo() self._restore_ref = repo.head.ref - self._new_branch = repo.create_head("tmp-edit-branch", commit=self._rewind_commit) + self._new_branch = repo.create_head( + "tmp-edit-branch", commit=self._rewind_commit + ) self._new_branch.checkout() return self._remote def __exit__(self, exc_type, exc_val, exc_tb): repo = self._remote._get_repo() - assert repo.active_branch == self._new_branch, "Branch changed unexpectedly mid-`with`" + assert ( + repo.active_branch == self._new_branch + ), "Branch changed unexpectedly mid-`with`" # Add files that changed repo.index.add([_file for (_file, _), _ in repo.index.entries.items()]) - repo.index.commit(self._message) + repo.index.commit(self._message, author=self._auth, committer=self._auth) self._restore_ref.checkout() try: repo.git.merge("tmp-edit-branch") From 8c86dd548f004c1c34b721d93c350893f2f12b7e Mon Sep 17 00:00:00 2001 From: wrongu Date: Wed, 21 Jun 2023 14:57:51 -0400 Subject: [PATCH 13/13] attempting again to fix missing-git-author issue --- llm4papers/config.example.py | 4 ++++ .../paper_remote/OverleafGitPaperRemote.py | 17 +++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/llm4papers/config.example.py b/llm4papers/config.example.py index f93382d..416f254 100644 --- a/llm4papers/config.example.py +++ b/llm4papers/config.example.py @@ -7,8 +7,12 @@ class OpenAIConfig(BaseSettings): class OverleafConfig(BaseSettings): + # Username and password for logging into your overleaf account username: str = "###" password: str = "###" + # Author name and email that should appear in git history + git_name: str = "AI assistant" + git_email: str = "dummy@agi.ai" class Settings(BaseSettings): diff --git a/llm4papers/paper_remote/OverleafGitPaperRemote.py b/llm4papers/paper_remote/OverleafGitPaperRemote.py index 67210bd..0323572 100644 --- a/llm4papers/paper_remote/OverleafGitPaperRemote.py +++ b/llm4papers/paper_remote/OverleafGitPaperRemote.py @@ -8,7 +8,7 @@ import shutil import datetime from urllib.parse import quote -from git import Repo, GitCommandError, Actor # type: ignore +from git import Repo, GitCommandError # type: ignore from typing import Iterable import re @@ -106,14 +106,19 @@ def _add_auth(uri: str): return uri -def _git_author() -> Actor: +def _add_git_user_from_config(repo: Repo) -> None: try: from llm4papers.config import OverleafConfig - return Actor(name="AI Editor", email=OverleafConfig().username) + config = OverleafConfig() + repo.config_writer().set_value("user", "name", config.git_name).release() + repo.config_writer().set_value("user", "email", config.git_email).release() except ImportError: logger.debug("No config file found, assuming public repo.") - return Actor(name="AI Editor", email="unknown@identity.com") + repo.config_writer().set_value("user", "name", "no-config").release() + repo.config_writer().set_value( + "user", "email", "no-config@placeholder.com" + ).release() class OverleafGitPaperRemote(MultiDocumentPaperRemote): @@ -175,6 +180,7 @@ def refresh(self, retry: bool = True): ) self._cached_repo = Repo(f"/tmp/{self._reposlug}") + _add_git_user_from_config(self._cached_repo) logger.info(f"Pulling latest from repo {self._reposlug}.") try: @@ -363,7 +369,6 @@ def __init__(self, remote: "OverleafGitPaperRemote", commit: str, message: str): self._remote = remote self._message = message self._rewind_commit = commit - self._auth = _git_author() def __enter__(self): repo = self._remote._get_repo() @@ -381,7 +386,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): ), "Branch changed unexpectedly mid-`with`" # Add files that changed repo.index.add([_file for (_file, _), _ in repo.index.entries.items()]) - repo.index.commit(self._message, author=self._auth, committer=self._auth) + repo.index.commit(self._message) self._restore_ref.checkout() try: repo.git.merge("tmp-edit-branch")