Merge branch 'main' into main

hackclub · Dec 19, 2023 · f72aeca · f72aeca
2 parents 0ae3d1c + b802ae9
commit f72aeca
Show file tree

Hide file tree

Showing 9 changed files with 963 additions and 61 deletions.
diff --git a/.github/scripts/extract_percentages.py b/.github/scripts/extract_percentages.py
@@ -1,43 +1,52 @@
 from bs4 import BeautifulSoup
 import os
 import sys
+import time
+
+def log(message):
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {message}")
 
 def extract_similarity_percentage(html_file):
-    with open(html_file, 'r', encoding='utf-8') as file:
-        soup = BeautifulSoup(file, 'html.parser')
-        file_name_tag = soup.select_one("#textright > div > h4")
-        if file_name_tag:
-            percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
-            return int(percentage_text)
-        else:
-            return None
+    try:
+        with open(html_file, 'r', encoding='utf-8') as file:
+            soup = BeautifulSoup(file, 'html.parser')
+            file_name_tag = soup.select_one("#textright > div > h4")
+            if file_name_tag:
+                percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
+                return int(percentage_text)
+            else:
+                return None
+    except Exception as e:
+        log(f"Error processing file {html_file}: {e}")
+        return None
 
-def process_html_files(directory, threshold=10):
-    results = {}
+def process_html_files(directory, threshold=50):
+    log("Processing HTML files for plagiarism results...")
+    high_plagiarism_detected = False
     for filename in os.listdir(directory):
         if filename.endswith(".html"):
             file_path = os.path.join(directory, filename)
             percentage = extract_similarity_percentage(file_path)
-            if percentage is not None:
-                results[filename.replace('.html', '.js')] = percentage
-
-    filtered_sorted_results = sorted(
-        ((file, percent) for file, percent in results.items() if percent >= threshold),
-        key=lambda x: x[1], reverse=True
-    )
+            if percentage is not None and percentage >= threshold:
+                log(f"High plagiarism detected - {filename.replace('.html', '.js')}: {percentage}%")
+                high_plagiarism_detected = True
 
-    with open('plagiarism_results.txt', 'w') as output_file:
-        output_file.write("\nFiltered and Sorted Results (Above 10%):\n")
-        for file, percent in filtered_sorted_results:
-            output_file.write(f"{file}: {percent}%\n")
+    return high_plagiarism_detected
 
 def main():
     if len(sys.argv) != 2:
+        log("Incorrect number of arguments provided.")
         print("Usage: python extract_percentages.py <saved_dir_path>")
         sys.exit(1)
 
     saved_dir_path = sys.argv[1]
-    process_html_files(saved_dir_path)
+    log(f"Received saved directory path: {saved_dir_path}")
+    if process_html_files(saved_dir_path):
+        log("High plagiarism percentages detected.")
+        sys.exit(1)
+    else:
+        log("No high plagiarism percentages detected.")
 
 if __name__ == "__main__":
     main()
diff --git a/.github/scripts/plagiarism_check.py b/.github/scripts/plagiarism_check.py
@@ -3,11 +3,17 @@
 import os
 import glob
 import shutil
+import time
+
+def log(message):
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {message}")
 
 def run_compare50(single_file, directory, output_dir, saved_dir_base):
     try:
         if not os.path.exists(saved_dir_base):
             os.makedirs(saved_dir_base)
+            log("Created base directory for saved files.")
 
         all_js_files = glob.glob(os.path.join(directory, "*.js"))
         total_files = len(all_js_files)
@@ -16,38 +22,46 @@ def run_compare50(single_file, directory, output_dir, saved_dir_base):
         for file in all_js_files:
             current_file_number += 1
             if os.path.abspath(file) == os.path.abspath(single_file):
+                log(f"Skipping comparison for the same file: {file}")
                 continue
 
-            print(f"Processing file {current_file_number} of {total_files}: {file}")
+            log(f"Processing file {current_file_number} of {total_files}: {file}")
             if os.path.exists(output_dir):
                 shutil.rmtree(output_dir)
+                log(f"Cleaned existing output directory: {output_dir}")
 
             command = [
                 "compare50",
-                single_file,
-                file,
-                "--output", output_dir,
+                f'"{single_file}"',
+                f'"{file}"',
+                "--output", f'"{output_dir}"',
                 "--max-file-size", str(1024 * 1024 * 100),
                 "--passes", "text"
             ]
 
-            subprocess.run(command, check=True)
+            command_str = ' '.join(command)
+            log(f"Running command: {command_str}")
+            subprocess.run(command_str, shell=True, check=True)
+            log("Compare50 command executed successfully.")
 
             match_file = os.path.join(output_dir, "match_1.html")
 
             if os.path.exists(match_file):
                 new_filename = os.path.basename(file).replace('.js', '.html')
                 saved_file_path = os.path.join(saved_dir_base, new_filename)
-                print(f"Moving {match_file} to {saved_file_path}")
+                log(f"Match found. Moving {match_file} to {saved_file_path}")
                 shutil.move(match_file, saved_file_path)
+            else:
+                log(f"No match found for file: {file}")
 
     except subprocess.CalledProcessError as e:
-        print("Error in running Compare50:", e)
+        log(f"Error in running Compare50: {e}")
     except Exception as e:
-        print(f"An error occurred: {e}")
+        log(f"An error occurred: {e}")
 
 def main():
     if len(sys.argv) != 5:
+        log("Incorrect number of arguments provided.")
         print("Usage: python plagiarism_check.py <single_file> <directory> <output_dir> <saved_dir_base>")
         sys.exit(1)
 
@@ -56,7 +70,18 @@ def main():
     output_dir = sys.argv[3]
     saved_dir_base = sys.argv[4]
 
+    log(f"Starting plagiarism check with the following arguments:")
+    log(f"Single file: {single_file}")
+    log(f"Directory: {directory}")
+    log(f"Output directory: {output_dir}")
+    log(f"Saved directory base: {saved_dir_base}")
+
+    log(f"Listing all JavaScript files in directory '{directory}':")
+    for f in glob.glob(os.path.join(directory, "*.js")):
+        log(f)
+
     run_compare50(single_file, directory, output_dir, saved_dir_base)
+    log("Plagiarism check completed.")
 
 if __name__ == "__main__":
     main()
diff --git a/.github/workflows/check_plagiarism.yml b/.github/workflows/check_plagiarism.yml
@@ -19,6 +19,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: '3.10'
+
       - name: Install Compare50 && beautifulsoup4
         run: pip install compare50 beautifulsoup4
 
@@ -29,38 +30,21 @@ jobs:
           head_sha="${{ github.event.pull_request.head.sha }}"
           js_files=$(git diff --name-only --diff-filter=AM $base_sha..$head_sha | grep 'games/.*\.js$' | xargs)
           echo "FILES=$js_files" >> $GITHUB_ENV
-
+          
       - name: Run Plagiarism Detection Script
         run: python .github/scripts/plagiarism_check.py ${{ env.FILES }} games output_dir saved_dir
 
       - name: Extract and Display Similarity Percentages
         run: python .github/scripts/extract_percentages.py saved_dir/
-
-      - name: Post Plagiarism Results Comment
-        if: success()
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          script: |
-            const fs = require('fs');
-            const output = fs.readFileSync('plagiarism_results.txt', 'utf8');
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: output
-            });
-
-      - name: Check for High Plagiarism Percentages
-        if: success()
-        run: |
-          if grep -qE "(\d{2,3})%" plagiarism_results.txt; then
-            echo "Plagiarism percentage over threshold detected."
-            exit 1
-          fi
+        id: extract-percentages
 
       - name: Upload Compare50 Results as Artifacts
+        if: always()
         uses: actions/upload-artifact@v3
         with:
           name: compare50-results
           path: saved_dir/
+
+      - name: Check for High Plagiarism Percentages
+        if: steps.extract-percentages.outcome == 'failure'
+        run: echo "Plagiarism percentage over threshold detected."
diff --git a/docs/SPRIG_APP_REVIEW.md b/docs/SPRIG_APP_REVIEW.md
@@ -0,0 +1,32 @@
+# Sprig App Review
+
+### Triage
+1. Determine if there is a new app to review via ```/slacker gimme sprig games```.  If there are no new Sprig games, triage is complete and you should start ```Review``` below.
+2. Click the github link to the Sprig game.
+3. Add the 'submission' tag to the PR.
+4. Assign the issue to yourself in github.
+5. Welcome the game author with an encouraging/congratulatory comment.
+6. Resolve the Slacker action item
+7. Repeat step #1
+
+### Review
+1. Perform your checklist
+  * Ensure that the author has provided their name
+  * Ensure that the author has provided an "about" blurb for their game.
+  * Ensure that the author has provided a brief gameplay description ("How do you play your game?")
+  * Ensure that the title of the game is unique (see the top of )
+  * Ensure that the author has checked on ALL of the checkboxes in the PR template.
+  * Ensure that name of the game file contains only alphanumeric characters (or -, or _), and does not conflict with any other game.
+  * Ensure that the game is placed in the /games directory.
+  * Ensure that the game screenshot is placed in the /games/img directory, and that the name of the image matches the name of the game file.
+  * Check plagiarism script (it should comment on PRs every time code is changed).  Ensure that it reports no overlaps greater than 50% with other games.
+  * Navigate to the preview deployment of the game and ensure that the game is able to launch and displays (and is discoverable) as expected in the gallery.
+  * Run the game.  Ensure that it has >= 1 minute of novel gameplay.
+
+
+2. Communicate your review decision to the PR author via a code review
+  * If you found an issue during your checklist, **Request changes** on the PR, and clearly communicate the issue or issues that the author needs to resolve before being able to land their changes.  After requesting changes, add the author to the current assignees.  Stop here.
+  * If you determine that the game is ready to be landed, **Approve** the PR with a congratulatory comment.
+  * Merge the PR to main.
+  * Add a comment stating that if the author is a teen, they can request their Sprig device at https://sprig-order.hackclub.dev/.  You can also suggest to them to share a few sentences in our #ship Slack channel.
+