From 3d1ec4f25c8497db7b84e2d854e278769b60e26d Mon Sep 17 00:00:00 2001 From: Martin Michalec Date: Fri, 20 Nov 2020 15:23:15 +0100 Subject: [PATCH 1/3] added Musixmatch lyrics source --- swaglyrics/__init__.py | 1 + swaglyrics/cli.py | 55 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/swaglyrics/__init__.py b/swaglyrics/__init__.py index 79286da6..38852a6c 100644 --- a/swaglyrics/__init__.py +++ b/swaglyrics/__init__.py @@ -37,6 +37,7 @@ def user_data_dir(file_name): backend_url = 'https://api.swaglyrics.dev' api_timeout = 10 genius_timeout = 20 +musixmatch_timeout = 20 unsupported_txt = user_data_dir("unsupported.txt") # create unsupported.txt if it doesn't exist diff --git a/swaglyrics/cli.py b/swaglyrics/cli.py index 4b56fd62..941bd43e 100644 --- a/swaglyrics/cli.py +++ b/swaglyrics/cli.py @@ -8,7 +8,7 @@ from html import unescape from unidecode import unidecode -from swaglyrics import __version__, unsupported_txt, backend_url, api_timeout, genius_timeout +from swaglyrics import __version__, unsupported_txt, backend_url, api_timeout, genius_timeout, musixmatch_timeout def clear() -> None: @@ -61,7 +61,29 @@ def stripper(song: str, artist: str) -> str: return url_data -def get_lyrics(song: str, artist: str) -> Optional[str]: +def get_lyrics(song: str, artist: str, sources = ["Genius", "Musixmatch"]) -> Optional[str]: + """ + Get lyrics from given the song and artist. + Default lyrics source is Genius + :param song: currently playing song + :param artist: song artist + :param sources: ordered list of lyrics sources to attempt to query + :return: song lyrics or None if lyrics unavailable + """ + get_lyrics_from = { + "Genius" : get_lyrics_from_genius, + "Musixmatch" : get_lyrics_from_musixmatch, + } + for source in sources: + try: + lyrics = get_lyrics_from[source](song, artist) + if lyrics: return lyrics + except KeyError: + raise ValueError(f'"{source}" is invalid source') + return None + + +def get_lyrics_from_genius(song: str, artist: str) -> Optional[str]: """ Get lyrics from Genius given the song and artist. Formats the URL with the stripped url path to fetch the lyrics. @@ -100,6 +122,35 @@ def get_lyrics(song: str, artist: str) -> Optional[str]: return lyrics +def get_lyrics_from_musixmatch(song: str, artist: str) -> Optional[str]: + """ + Get lyrics from Musixmatch given the song and artist. + Formats the URL with the stripped url path to fetch the lyrics. + :param song: currently playing song + :param artist: song artist + :return: song lyrics or None if lyrics unavailable + """ + # fake legitimate browser lookup with custom header + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + + # get search page from Musixmatch + search_url = f'https://www.musixmatch.com/search/{artist}%20{song}/tracks' + search_page = requests.get(search_url, headers=headers, timeout=musixmatch_timeout) + + # get first track page from search page + html = BeautifulSoup(search_page.content, "html.parser") + track_url = 'https://www.musixmatch.com' + html.find("a", {'class': "title"})['href'] + first_track_page = requests.get(track_url, headers=headers, timeout=musixmatch_timeout) + + # get lyrics from paragraphs + html = BeautifulSoup(first_track_page.content, "html.parser") + lyrics = "" + for p in html.find_all("p", {'class': "mxm-lyrics__content"}): + lyrics += p.text + + return lyrics + + def lyrics(song: str, artist: str, make_issue: bool = True) -> str: """ Displays the fetched lyrics if song playing and handles if lyrics unavailable. From 2f040e0221dd40b6764788307e214bbb0fed31a9 Mon Sep 17 00:00:00 2001 From: Martin Michalec Date: Fri, 20 Nov 2020 16:21:24 +0100 Subject: [PATCH 2/3] pep8fix --- swaglyrics/cli.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/swaglyrics/cli.py b/swaglyrics/cli.py index 941bd43e..80a64886 100644 --- a/swaglyrics/cli.py +++ b/swaglyrics/cli.py @@ -61,7 +61,7 @@ def stripper(song: str, artist: str) -> str: return url_data -def get_lyrics(song: str, artist: str, sources = ["Genius", "Musixmatch"]) -> Optional[str]: +def get_lyrics(song: str, artist: str, sources=["Genius", "Musixmatch"]) -> Optional[str]: """ Get lyrics from given the song and artist. Default lyrics source is Genius @@ -71,13 +71,14 @@ def get_lyrics(song: str, artist: str, sources = ["Genius", "Musixmatch"]) -> Op :return: song lyrics or None if lyrics unavailable """ get_lyrics_from = { - "Genius" : get_lyrics_from_genius, - "Musixmatch" : get_lyrics_from_musixmatch, + "Genius": get_lyrics_from_genius, + "Musixmatch": get_lyrics_from_musixmatch, } for source in sources: try: lyrics = get_lyrics_from[source](song, artist) - if lyrics: return lyrics + if lyrics: + return lyrics except KeyError: raise ValueError(f'"{source}" is invalid source') return None @@ -131,7 +132,12 @@ def get_lyrics_from_musixmatch(song: str, artist: str) -> Optional[str]: :return: song lyrics or None if lyrics unavailable """ # fake legitimate browser lookup with custom header - headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + headers = {'User-Agent': 'Mozilla/5.0 \ + (Macintosh; Intel Mac OS X 10_10_1) \ + AppleWebKit/537.36 \ + (KHTML, like Gecko) \ + Chrome/39.0.2171.95 \ + Safari/537.36'} # get search page from Musixmatch search_url = f'https://www.musixmatch.com/search/{artist}%20{song}/tracks' From 34d2b6ea8db6d57be6548886c3c8c986682a1de5 Mon Sep 17 00:00:00 2001 From: Martin Michalec Date: Fri, 20 Nov 2020 16:54:46 +0100 Subject: [PATCH 3/3] hotfix --- swaglyrics/cli.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/swaglyrics/cli.py b/swaglyrics/cli.py index 80a64886..31622d42 100644 --- a/swaglyrics/cli.py +++ b/swaglyrics/cli.py @@ -144,8 +144,11 @@ def get_lyrics_from_musixmatch(song: str, artist: str) -> Optional[str]: search_page = requests.get(search_url, headers=headers, timeout=musixmatch_timeout) # get first track page from search page - html = BeautifulSoup(search_page.content, "html.parser") - track_url = 'https://www.musixmatch.com' + html.find("a", {'class': "title"})['href'] + try: + html = BeautifulSoup(search_page.content, "html.parser") + track_url = 'https://www.musixmatch.com' + html.find("a", {'class': "title"})['href'] + except TypeError: + return None first_track_page = requests.get(track_url, headers=headers, timeout=musixmatch_timeout) # get lyrics from paragraphs