From 3a6173ba460c12237d6192af3cf2456f70b0b47b Mon Sep 17 00:00:00 2001
From: DevIos01 <GamingCraft52@gmail.com>
Date: Tue, 19 Dec 2023 17:19:55 +0100
Subject: [PATCH] Implemented Better Logs For Plagiarism Checker.

---
 .github/scripts/extract_percentages.py | 40 +++++++++++++++++++-------
 .github/scripts/plagiarism_check.py    | 36 ++++++++++++++++-------
 .github/workflows/check_plagiarism.yml | 27 ++++-------------
 3 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/.github/scripts/extract_percentages.py b/.github/scripts/extract_percentages.py
index 28e1888041..d62c7e3ea7 100644
--- a/.github/scripts/extract_percentages.py
+++ b/.github/scripts/extract_percentages.py
@@ -1,25 +1,36 @@
 from bs4 import BeautifulSoup
 import os
 import sys
+import time
+
+def log(message):
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {message}")
 
 def extract_similarity_percentage(html_file):
-    with open(html_file, 'r', encoding='utf-8') as file:
-        soup = BeautifulSoup(file, 'html.parser')
-        file_name_tag = soup.select_one("#textright > div > h4")
-        if file_name_tag:
-            percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
-            return int(percentage_text)
-        else:
-            return None
+    try:
+        with open(html_file, 'r', encoding='utf-8') as file:
+            soup = BeautifulSoup(file, 'html.parser')
+            file_name_tag = soup.select_one("#textright > div > h4")
+            if file_name_tag:
+                percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
+                return int(percentage_text)
+            else:
+                return None
+    except Exception as e:
+        log(f"Error processing file {html_file}: {e}")
+        return None
 
 def process_html_files(directory, threshold=10):
     results = {}
+    log("Processing HTML files for plagiarism results...")
     for filename in os.listdir(directory):
         if filename.endswith(".html"):
             file_path = os.path.join(directory, filename)
             percentage = extract_similarity_percentage(file_path)
             if percentage is not None:
                 results[filename.replace('.html', '.js')] = percentage
+                log(f"Extracted {percentage}% similarity from {filename}")
 
     filtered_sorted_results = sorted(
         ((file, percent) for file, percent in results.items() if percent >= threshold),
@@ -27,17 +38,26 @@ def process_html_files(directory, threshold=10):
     )
 
     with open('plagiarism_results.txt', 'w') as output_file:
-        output_file.write("\nFiltered and Sorted Results (Above 10%):\n")
+        log("Writing results to plagiarism_results.txt")
+        output_file.write("Filtered and Sorted Results (Above Threshold):\n")
         for file, percent in filtered_sorted_results:
-            output_file.write(f"{file}: {percent}%\n")
+            line = f"{file}: {percent}%\n"
+            output_file.write(line)
+            log(line.strip())
+        if not filtered_sorted_results:
+            output_file.write("No results exceeding threshold.\n")
+            log("No results exceeding threshold.")
 
 def main():
     if len(sys.argv) != 2:
+        log("Incorrect number of arguments provided.")
         print("Usage: python extract_percentages.py <saved_dir_path>")
         sys.exit(1)
 
     saved_dir_path = sys.argv[1]
+    log(f"Received saved directory path: {saved_dir_path}")
     process_html_files(saved_dir_path)
+    log("Extraction of plagiarism percentages completed.")
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/.github/scripts/plagiarism_check.py b/.github/scripts/plagiarism_check.py
index bdf38ffdae..4946bc127e 100644
--- a/.github/scripts/plagiarism_check.py
+++ b/.github/scripts/plagiarism_check.py
@@ -3,11 +3,17 @@
 import os
 import glob
 import shutil
+import time
+
+def log(message):
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {message}")
 
 def run_compare50(single_file, directory, output_dir, saved_dir_base):
     try:
         if not os.path.exists(saved_dir_base):
             os.makedirs(saved_dir_base)
+            log("Created base directory for saved files.")
 
         all_js_files = glob.glob(os.path.join(directory, "*.js"))
         total_files = len(all_js_files)
@@ -16,11 +22,13 @@ def run_compare50(single_file, directory, output_dir, saved_dir_base):
         for file in all_js_files:
             current_file_number += 1
             if os.path.abspath(file) == os.path.abspath(single_file):
+                log(f"Skipping comparison for the same file: {file}")
                 continue
 
-            print(f"Processing file {current_file_number} of {total_files}: {file}")
+            log(f"Processing file {current_file_number} of {total_files}: {file}")
             if os.path.exists(output_dir):
                 shutil.rmtree(output_dir)
+                log(f"Cleaned existing output directory: {output_dir}")
             
             command = [
                 "compare50",
@@ -32,23 +40,28 @@ def run_compare50(single_file, directory, output_dir, saved_dir_base):
             ]
 
             command_str = ' '.join(command)
+            log(f"Running command: {command_str}")
             subprocess.run(command_str, shell=True, check=True)
+            log("Compare50 command executed successfully.")
 
             match_file = os.path.join(output_dir, "match_1.html")
 
             if os.path.exists(match_file):
                 new_filename = os.path.basename(file).replace('.js', '.html')
                 saved_file_path = os.path.join(saved_dir_base, new_filename)
-                print(f"Moving {match_file} to {saved_file_path}")
+                log(f"Match found. Moving {match_file} to {saved_file_path}")
                 shutil.move(match_file, saved_file_path)
+            else:
+                log(f"No match found for file: {file}")
 
     except subprocess.CalledProcessError as e:
-        print("Error in running Compare50:", e)
+        log(f"Error in running Compare50: {e}")
     except Exception as e:
-        print(f"An error occurred: {e}")
+        log(f"An error occurred: {e}")
 
 def main():
     if len(sys.argv) != 5:
+        log("Incorrect number of arguments provided.")
         print("Usage: python plagiarism_check.py <single_file> <directory> <output_dir> <saved_dir_base>")
         sys.exit(1)
 
@@ -57,17 +70,18 @@ def main():
     output_dir = sys.argv[3]
     saved_dir_base = sys.argv[4]
 
-    print(f"Received arguments:")
-    print(f"Single file: {single_file}")
-    print(f"Directory: {directory}")
-    print(f"Output directory: {output_dir}")
-    print(f"Saved directory base: {saved_dir_base}")
+    log(f"Starting plagiarism check with the following arguments:")
+    log(f"Single file: {single_file}")
+    log(f"Directory: {directory}")
+    log(f"Output directory: {output_dir}")
+    log(f"Saved directory base: {saved_dir_base}")
 
-    print(f"All files in directory '{directory}':")
+    log(f"Listing all JavaScript files in directory '{directory}':")
     for f in glob.glob(os.path.join(directory, "*.js")):
-        print(f)
+        log(f)
 
     run_compare50(single_file, directory, output_dir, saved_dir_base)
+    log("Plagiarism check completed.")
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/.github/workflows/check_plagiarism.yml b/.github/workflows/check_plagiarism.yml
index f268936e9a..7d8f52c43f 100644
--- a/.github/workflows/check_plagiarism.yml
+++ b/.github/workflows/check_plagiarism.yml
@@ -30,27 +30,18 @@ jobs:
           head_sha="${{ github.event.pull_request.head.sha }}"
           js_files=$(git diff --name-only --diff-filter=AM $base_sha..$head_sha | grep 'games/.*\.js$' | xargs)
           echo "FILES=$js_files" >> $GITHUB_ENV
+          
       - name: Run Plagiarism Detection Script
-
         run: python .github/scripts/plagiarism_check.py ${{ env.FILES }} games output_dir saved_dir
 
       - name: Extract and Display Similarity Percentages
         run: python .github/scripts/extract_percentages.py saved_dir/
 
-      - name: Post Plagiarism Results Comment
-        if: success()
-        uses: actions/github-script@v7
+      - name: Upload Compare50 Results as Artifacts
+        uses: actions/upload-artifact@v3
         with:
-          github-token: ${{ secrets.PR_COMMENT_PRIVATE_KEY }}
-          script: |
-            const fs = require('fs');
-            const output = fs.readFileSync('plagiarism_results.txt', 'utf8');
-            github.rest.issues.createComment({
-              issue_number: context.issue.number,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: output
-            });
+          name: compare50-results
+          path: saved_dir/
 
       - name: Check for High Plagiarism Percentages
         if: success()
@@ -58,10 +49,4 @@ jobs:
           if grep -qE "(\d{2,3})%" plagiarism_results.txt; then
             echo "Plagiarism percentage over threshold detected."
             exit 1
-          fi
-
-      - name: Upload Compare50 Results as Artifacts
-        uses: actions/upload-artifact@v3
-        with:
-          name: compare50-results
-          path: saved_dir/
\ No newline at end of file
+          fi
\ No newline at end of file