Skip to content

Commit

Permalink
Merge branch 'main' into test-fake-sprig-app
Browse files Browse the repository at this point in the history
  • Loading branch information
DevIos01 authored Dec 19, 2023
2 parents 70ed299 + 3a6173b commit 807eeda
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 42 deletions.
40 changes: 30 additions & 10 deletions .github/scripts/extract_percentages.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,63 @@
from bs4 import BeautifulSoup
import os
import sys
import time

def log(message):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] {message}")

def extract_similarity_percentage(html_file):
with open(html_file, 'r', encoding='utf-8') as file:
soup = BeautifulSoup(file, 'html.parser')
file_name_tag = soup.select_one("#textright > div > h4")
if file_name_tag:
percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
return int(percentage_text)
else:
return None
try:
with open(html_file, 'r', encoding='utf-8') as file:
soup = BeautifulSoup(file, 'html.parser')
file_name_tag = soup.select_one("#textright > div > h4")
if file_name_tag:
percentage_text = file_name_tag.find("span", class_="text-secondary small").text.strip("()%")
return int(percentage_text)
else:
return None
except Exception as e:
log(f"Error processing file {html_file}: {e}")
return None

def process_html_files(directory, threshold=10):
results = {}
log("Processing HTML files for plagiarism results...")
for filename in os.listdir(directory):
if filename.endswith(".html"):
file_path = os.path.join(directory, filename)
percentage = extract_similarity_percentage(file_path)
if percentage is not None:
results[filename.replace('.html', '.js')] = percentage
log(f"Extracted {percentage}% similarity from {filename}")

filtered_sorted_results = sorted(
((file, percent) for file, percent in results.items() if percent >= threshold),
key=lambda x: x[1], reverse=True
)

with open('plagiarism_results.txt', 'w') as output_file:
output_file.write("\nFiltered and Sorted Results (Above 10%):\n")
log("Writing results to plagiarism_results.txt")
output_file.write("Filtered and Sorted Results (Above Threshold):\n")
for file, percent in filtered_sorted_results:
output_file.write(f"{file}: {percent}%\n")
line = f"{file}: {percent}%\n"
output_file.write(line)
log(line.strip())
if not filtered_sorted_results:
output_file.write("No results exceeding threshold.\n")
log("No results exceeding threshold.")

def main():
if len(sys.argv) != 2:
log("Incorrect number of arguments provided.")
print("Usage: python extract_percentages.py <saved_dir_path>")
sys.exit(1)

saved_dir_path = sys.argv[1]
log(f"Received saved directory path: {saved_dir_path}")
process_html_files(saved_dir_path)
log("Extraction of plagiarism percentages completed.")

if __name__ == "__main__":
main()
36 changes: 25 additions & 11 deletions .github/scripts/plagiarism_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
import os
import glob
import shutil
import time

def log(message):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] {message}")

def run_compare50(single_file, directory, output_dir, saved_dir_base):
try:
if not os.path.exists(saved_dir_base):
os.makedirs(saved_dir_base)
log("Created base directory for saved files.")

all_js_files = glob.glob(os.path.join(directory, "*.js"))
total_files = len(all_js_files)
Expand All @@ -16,11 +22,13 @@ def run_compare50(single_file, directory, output_dir, saved_dir_base):
for file in all_js_files:
current_file_number += 1
if os.path.abspath(file) == os.path.abspath(single_file):
log(f"Skipping comparison for the same file: {file}")
continue

print(f"Processing file {current_file_number} of {total_files}: {file}")
log(f"Processing file {current_file_number} of {total_files}: {file}")
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
log(f"Cleaned existing output directory: {output_dir}")

command = [
"compare50",
Expand All @@ -32,23 +40,28 @@ def run_compare50(single_file, directory, output_dir, saved_dir_base):
]

command_str = ' '.join(command)
log(f"Running command: {command_str}")
subprocess.run(command_str, shell=True, check=True)
log("Compare50 command executed successfully.")

match_file = os.path.join(output_dir, "match_1.html")

if os.path.exists(match_file):
new_filename = os.path.basename(file).replace('.js', '.html')
saved_file_path = os.path.join(saved_dir_base, new_filename)
print(f"Moving {match_file} to {saved_file_path}")
log(f"Match found. Moving {match_file} to {saved_file_path}")
shutil.move(match_file, saved_file_path)
else:
log(f"No match found for file: {file}")

except subprocess.CalledProcessError as e:
print("Error in running Compare50:", e)
log(f"Error in running Compare50: {e}")
except Exception as e:
print(f"An error occurred: {e}")
log(f"An error occurred: {e}")

def main():
if len(sys.argv) != 5:
log("Incorrect number of arguments provided.")
print("Usage: python plagiarism_check.py <single_file> <directory> <output_dir> <saved_dir_base>")
sys.exit(1)

Expand All @@ -57,17 +70,18 @@ def main():
output_dir = sys.argv[3]
saved_dir_base = sys.argv[4]

print(f"Received arguments:")
print(f"Single file: {single_file}")
print(f"Directory: {directory}")
print(f"Output directory: {output_dir}")
print(f"Saved directory base: {saved_dir_base}")
log(f"Starting plagiarism check with the following arguments:")
log(f"Single file: {single_file}")
log(f"Directory: {directory}")
log(f"Output directory: {output_dir}")
log(f"Saved directory base: {saved_dir_base}")

print(f"All files in directory '{directory}':")
log(f"Listing all JavaScript files in directory '{directory}':")
for f in glob.glob(os.path.join(directory, "*.js")):
print(f)
log(f)

run_compare50(single_file, directory, output_dir, saved_dir_base)
log("Plagiarism check completed.")

if __name__ == "__main__":
main()
27 changes: 6 additions & 21 deletions .github/workflows/check_plagiarism.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,38 +30,23 @@ jobs:
head_sha="${{ github.event.pull_request.head.sha }}"
js_files=$(git diff --name-only --diff-filter=AM $base_sha..$head_sha | grep 'games/.*\.js$' | xargs)
echo "FILES=$js_files" >> $GITHUB_ENV
- name: Run Plagiarism Detection Script

run: python .github/scripts/plagiarism_check.py ${{ env.FILES }} games output_dir saved_dir

- name: Extract and Display Similarity Percentages
run: python .github/scripts/extract_percentages.py saved_dir/

- name: Post Plagiarism Results Comment
if: success()
uses: actions/github-script@v7
- name: Upload Compare50 Results as Artifacts
uses: actions/upload-artifact@v3
with:
github-token: ${{ secrets.PR_COMMENT_PRIVATE_KEY }}
script: |
const fs = require('fs');
const output = fs.readFileSync('plagiarism_results.txt', 'utf8');
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: output
});
name: compare50-results
path: saved_dir/

- name: Check for High Plagiarism Percentages
if: success()
run: |
if grep -qE "(\d{2,3})%" plagiarism_results.txt; then
echo "Plagiarism percentage over threshold detected."
exit 1
fi
- name: Upload Compare50 Results as Artifacts
uses: actions/upload-artifact@v3
with:
name: compare50-results
path: saved_dir/
fi

0 comments on commit 807eeda

Please sign in to comment.