Skip to content

Commit

Permalink
Merge pull request #13 from tk-hap/refactor-valid-video-check
Browse files Browse the repository at this point in the history
Refactor the validate URL function
  • Loading branch information
tk-hap authored Mar 29, 2024
2 parents 2f0e0cb + 344d377 commit 111b94e
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 30 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Try it at https://videorecipegen.com/
## Features

- Transcribes YouTube cooking videos into text.
- Summarizes the transcription into a recipe using OpenAI's GPT-3 model.
- Summarizes the transcription into a recipe using OpenAI's GPT model.
- Formats the response into a recipe card.
- Limits the number of requests per user to prevent abuse.

Expand Down Expand Up @@ -39,4 +39,9 @@ Try it at https://videorecipegen.com/

- The application limits the number of requests to 5 per user per day. This is done using the IP address of the user.
- The application limits the duration of the video to 45 minutes. This is to prevent inputs greater than the models max tokens.
- The accuracy of the recipe depends on the quality of the video transcription and the performance of the GPT-3 model.
- The accuracy of the recipe depends on the quality of the video transcription and the performance of the GPT-3 model.

## Tests

Building out the testing coverage.
Tests can be run with pytest from the root directory using the command `pytest --envfile flask/.env` to set up the env.
17 changes: 13 additions & 4 deletions flask/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import os
from flask import Flask, request, render_template, make_response, url_for, send_file, abort
from flask import (
Flask,
request,
render_template,
make_response,
url_for,
send_file,
abort,
)
from video import validate_url, get_video_id, transcribe_video, validate_video_content
from recipe import create_recipe
from config import REDIS_HOST, REDIS_PORT, REQUEST_LIMIT, REQUEST_TIMEOUT_SECS
Expand All @@ -26,8 +34,9 @@ def max_requests():
@app.route("/recipe", methods=["POST"])
def submit_video():
url = request.form["video"]
if validate_url(url):
recipe.video_id = get_video_id(url)
video_id = get_video_id(url)
if video_id:
recipe.video_id = video_id
if not validate_video_content(recipe.video_id):
return render_template("invalid-recipe.html")
# Check if the user has exceeded the rate limit
Expand Down Expand Up @@ -66,7 +75,7 @@ def export_pdf(video_id: str) -> bytes:
@app.route("/recipe/url", methods=["POST"])
def validate_video():
url = request.form["video"]
if not validate_url(url):
if not get_video_id(url):
return render_template("invalid-video.html", url=url)
else:
return render_template("valid-video.html", url=url)
Expand Down
1 change: 1 addition & 0 deletions flask/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Pygments==2.17.2
pyparsing==3.1.1
pyphen==0.14.0
pytest==8.0.0
pytest-dotenv==0.5.2
python-dotenv==1.0.0
PyYAML==6.0.1
redis==5.0.1
Expand Down
34 changes: 13 additions & 21 deletions flask/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,15 @@
# TODO: Create a video class


def validate_url(url: str) -> bool:
# TODO: Make this logic more robust, probably needs a regex
valid_prefixes = [
"https://www.youtube.com/watch?v=",
"wwww.youtube.com/watch?v=",
"youtube.com/watch?v=",
"https://youtu.be/6gwF8mG3UUY?si=RRz938"
]
if any(map(url.startswith, valid_prefixes)):
logger.info("Valid youtube url", video_url=url)
return True
else:
def get_video_id(url: str) -> str:
reg = r"^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/|shorts\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"
video_id = re.search(reg, url)
if video_id is None:
logger.info("Invalid youtube url", video_url=url)
return False

return video_id.group(1)


def validate_video_content(url: str) -> bool:
with build("youtube", "v3", developerKey=YOUTUBE_API_KEY) as yt_service:
Expand All @@ -41,7 +35,11 @@ def validate_video_content(url: str) -> bool:
return False

if parse_duration(duration) > parse_duration(MAX_VIDEO_LENGTH):
logger.info("Video duration too long", video_duration=duration, max_duration=MAX_VIDEO_LENGTH)
logger.info(
"Video duration too long",
video_duration=duration,
max_duration=MAX_VIDEO_LENGTH,
)
return False

title = response["items"][0]["snippet"]["title"]
Expand All @@ -58,14 +56,8 @@ def validate_video_content(url: str) -> bool:
logger.info("Cooking tags found", video_tags=tags, video_url=url)
return True
else:
logger.info("No cooking tags found", video_tags=tags, video_url=url)
return False


def get_video_id(url: str) -> str:
reg = r"^.*(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/|shorts\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"
video_id = re.search(reg, url).group(1)
return video_id
logger.info("No cooking tags found", video_tags=tags, video_url=url)
return False


def transcribe_video(video_id: str) -> str:
Expand Down
19 changes: 16 additions & 3 deletions tests/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,22 @@
import video


def test_validate_url():
assert video.validate_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ") == True
assert video.validate_url("not a url") == False
def test_get_video_id():
valid_urls = {
"https://www.youtube.com/watch?v=dQw4w9WgXcQ": "dQw4w9WgXcQ",
"https://youtu.be/6tvBVCm9lmg?si=VuQYxsVc6tvA26KH": "6tvBVCm9lmg",
"m.youtube.com/watch?v=S_7SE_Uzk-I": "S_7SE_Uzk-I",
"youtube.com/watch?v=vYu4Zu5Ra_k": "vYu4Zu5Ra_k"
}
invalid_urls = [
"this is a url",
"https://www.reddit.com/"
]

for url, id in valid_urls.items():
assert video.get_video_id(url) == id
for url in invalid_urls:
assert video.get_video_id(url) == False

def test_validate_video_content():
# Cooking vid id
Expand Down

0 comments on commit 111b94e

Please sign in to comment.