diff --git a/README.md b/README.md index 7821824..edf8421 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Try it at https://videorecipegen.com/ ## Features - Transcribes YouTube cooking videos into text. -- Summarizes the transcription into a recipe using OpenAI's GPT-3 model. +- Summarizes the transcription into a recipe using OpenAI's GPT model. - Formats the response into a recipe card. - Limits the number of requests per user to prevent abuse. @@ -39,4 +39,9 @@ Try it at https://videorecipegen.com/ - The application limits the number of requests to 5 per user per day. This is done using the IP address of the user. - The application limits the duration of the video to 45 minutes. This is to prevent inputs greater than the models max tokens. -- The accuracy of the recipe depends on the quality of the video transcription and the performance of the GPT-3 model. \ No newline at end of file +- The accuracy of the recipe depends on the quality of the video transcription and the performance of the GPT-3 model. + +## Tests + +Building out the testing coverage. +Tests can be run with pytest from the root directory using the command `pytest --envfile flask/.env` to set up the env. diff --git a/flask/main.py b/flask/main.py index 7b7fb57..7f482ff 100644 --- a/flask/main.py +++ b/flask/main.py @@ -1,5 +1,13 @@ import os -from flask import Flask, request, render_template, make_response, url_for, send_file, abort +from flask import ( + Flask, + request, + render_template, + make_response, + url_for, + send_file, + abort, +) from video import validate_url, get_video_id, transcribe_video, validate_video_content from recipe import create_recipe from config import REDIS_HOST, REDIS_PORT, REQUEST_LIMIT, REQUEST_TIMEOUT_SECS @@ -26,8 +34,9 @@ def max_requests(): @app.route("/recipe", methods=["POST"]) def submit_video(): url = request.form["video"] - if validate_url(url): - recipe.video_id = get_video_id(url) + video_id = get_video_id(url) + if video_id: + recipe.video_id = video_id if not validate_video_content(recipe.video_id): return render_template("invalid-recipe.html") # Check if the user has exceeded the rate limit @@ -66,7 +75,7 @@ def export_pdf(video_id: str) -> bytes: @app.route("/recipe/url", methods=["POST"]) def validate_video(): url = request.form["video"] - if not validate_url(url): + if not get_video_id(url): return render_template("invalid-video.html", url=url) else: return render_template("valid-video.html", url=url) diff --git a/flask/requirements.txt b/flask/requirements.txt index dcff9ae..71cb8e0 100644 --- a/flask/requirements.txt +++ b/flask/requirements.txt @@ -62,6 +62,7 @@ Pygments==2.17.2 pyparsing==3.1.1 pyphen==0.14.0 pytest==8.0.0 +pytest-dotenv==0.5.2 python-dotenv==1.0.0 PyYAML==6.0.1 redis==5.0.1 diff --git a/flask/video.py b/flask/video.py index 64836dc..aa428f4 100644 --- a/flask/video.py +++ b/flask/video.py @@ -13,21 +13,15 @@ # TODO: Create a video class -def validate_url(url: str) -> bool: - # TODO: Make this logic more robust, probably needs a regex - valid_prefixes = [ - "https://www.youtube.com/watch?v=", - "wwww.youtube.com/watch?v=", - "youtube.com/watch?v=", - "https://youtu.be/6gwF8mG3UUY?si=RRz938" - ] - if any(map(url.startswith, valid_prefixes)): - logger.info("Valid youtube url", video_url=url) - return True - else: +def get_video_id(url: str) -> str: + reg = r"^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/|shorts\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" + video_id = re.search(reg, url) + if video_id is None: logger.info("Invalid youtube url", video_url=url) return False + return video_id.group(1) + def validate_video_content(url: str) -> bool: with build("youtube", "v3", developerKey=YOUTUBE_API_KEY) as yt_service: @@ -41,7 +35,11 @@ def validate_video_content(url: str) -> bool: return False if parse_duration(duration) > parse_duration(MAX_VIDEO_LENGTH): - logger.info("Video duration too long", video_duration=duration, max_duration=MAX_VIDEO_LENGTH) + logger.info( + "Video duration too long", + video_duration=duration, + max_duration=MAX_VIDEO_LENGTH, + ) return False title = response["items"][0]["snippet"]["title"] @@ -58,14 +56,8 @@ def validate_video_content(url: str) -> bool: logger.info("Cooking tags found", video_tags=tags, video_url=url) return True else: - logger.info("No cooking tags found", video_tags=tags, video_url=url) - return False - - -def get_video_id(url: str) -> str: - reg = r"^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/|shorts\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" - video_id = re.search(reg, url).group(1) - return video_id + logger.info("No cooking tags found", video_tags=tags, video_url=url) + return False def transcribe_video(video_id: str) -> str: diff --git a/tests/test_video.py b/tests/test_video.py index 81b20b8..1ea80bf 100644 --- a/tests/test_video.py +++ b/tests/test_video.py @@ -5,9 +5,22 @@ import video -def test_validate_url(): - assert video.validate_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ") == True - assert video.validate_url("not a url") == False +def test_get_video_id(): + valid_urls = { + "https://www.youtube.com/watch?v=dQw4w9WgXcQ": "dQw4w9WgXcQ", + "https://youtu.be/6tvBVCm9lmg?si=VuQYxsVc6tvA26KH": "6tvBVCm9lmg", + "m.youtube.com/watch?v=S_7SE_Uzk-I": "S_7SE_Uzk-I", + "youtube.com/watch?v=vYu4Zu5Ra_k": "vYu4Zu5Ra_k" + } + invalid_urls = [ + "this is a url", + "https://www.reddit.com/" + ] + + for url, id in valid_urls.items(): + assert video.get_video_id(url) == id + for url in invalid_urls: + assert video.get_video_id(url) == False def test_validate_video_content(): # Cooking vid id