From 1bb0500e5088c1516993e40009137835f8fbce5d Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Mon, 30 Oct 2023 23:12:38 -0700 Subject: [PATCH 01/13] feat: get metadata for a post --- ocflib/lab/announcements.py | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 ocflib/lab/announcements.py diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py new file mode 100644 index 0000000..17bc7ce --- /dev/null +++ b/ocflib/lab/announcements.py @@ -0,0 +1,39 @@ +"""Announcements handling""" +import requests +import json + +ANNOUNCEMENTS_REPO = ( + "https://api.github.com/repos/ocf/announcements/contents/announcements/{id}" +) + + +def get_all_announcements() -> str: + """Get announcements from the announcements repo""" + r = requests.get( + url=ANNOUNCEMENTS_REPO.format(id=""), + headers={"Accept": "application/vnd.github+json"}, + ) + r.raise_for_status() + + return json.dumps(r.json(), indent=4) + + +def get_announcement(id: str) -> str: + """Get one particular announcement from the announcements repo""" + r = requests.get( + url=ANNOUNCEMENTS_REPO.format(id=id + ".md"), + headers={"Accept": "application/vnd.github.raw"}, + ) + r.raise_for_status() + + return r.text + + +def get_metadata(post: str) -> str: + """Get the metadata from one announcement""" + return post.split("---")[1] + + +print(get_all_announcements()) +print(get_announcement("2002-01-01-00")) +print(get_metadata(get_announcement("2002-01-01-00"))) From 0e416bd22caea4cd6a7d885e5152498736c67526 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Sun, 5 Nov 2023 00:15:39 -0700 Subject: [PATCH 02/13] metadata to dictionary --- ocflib/lab/announcements.py | 59 ++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index 17bc7ce..18a9ace 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -1,39 +1,70 @@ """Announcements handling""" -import requests -import json +from collections import namedtuple +from requests import get + +from yaml import safe_load +from typing import NamedTuple ANNOUNCEMENTS_REPO = ( "https://api.github.com/repos/ocf/announcements/contents/announcements/{id}" ) +# TODO: cache +# TODO: request session for better efficiency + +Metadata = namedtuple("Metadata", ["title", "date", "author", "tags", "summary"]) + -def get_all_announcements() -> str: +def get_all_announcements(): """Get announcements from the announcements repo""" - r = requests.get( + posts = get( url=ANNOUNCEMENTS_REPO.format(id=""), headers={"Accept": "application/vnd.github+json"}, ) - r.raise_for_status() + posts.raise_for_status() - return json.dumps(r.json(), indent=4) + return posts.json() def get_announcement(id: str) -> str: """Get one particular announcement from the announcements repo""" - r = requests.get( + posts = get( url=ANNOUNCEMENTS_REPO.format(id=id + ".md"), headers={"Accept": "application/vnd.github.raw"}, ) - r.raise_for_status() + posts.raise_for_status() - return r.text + return posts.text -def get_metadata(post: str) -> str: +def get_metadata(id: str) -> Metadata: """Get the metadata from one announcement""" - return post.split("---")[1] + post = get_announcement(id) + meta_dict = safe_load(post.split("---")[1]) + + metadata = Metadata( + title=meta_dict["title"], + date=meta_dict["date"], + author=meta_dict["author"], + tags=meta_dict["tags"], + summary=meta_dict["summary"], + ) + return metadata + + +def get_all_ids(): + """Get all announcement ids""" + posts = get_all_announcements() + return [post["name"][:-3] for post in posts] + + +def get_ids(num: int): + """Get the last num announcement ids""" + posts = get_all_announcements() + return [post["name"][:-3] for post in posts[:num]] -print(get_all_announcements()) -print(get_announcement("2002-01-01-00")) -print(get_metadata(get_announcement("2002-01-01-00"))) +# print(get_all_announcements()) +# print(get_announcement("2002-01-01-00")) +# print(get_metadata("2002-01-01-00")) +# print(get_all_ids()) From 1ab98e40be80e188f2a1b4eb7b21e5f346fa7137 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Sun, 5 Nov 2023 10:59:21 -0800 Subject: [PATCH 03/13] get last n posts --- ocflib/lab/announcements.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index 18a9ace..ccda05e 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -37,9 +37,13 @@ def get_announcement(id: str) -> str: return posts.text -def get_metadata(id: str) -> Metadata: +def get_id(post: str) -> str: + """Get announcement id""" + return post["name"][:-3] + + +def get_metadata(post: str) -> Metadata: """Get the metadata from one announcement""" - post = get_announcement(id) meta_dict = safe_load(post.split("---")[1]) metadata = Metadata( @@ -52,19 +56,16 @@ def get_metadata(id: str) -> Metadata: return metadata -def get_all_ids(): - """Get all announcement ids""" +def get_announcements(num: int) -> list[str]: + """Get the last num announcements""" posts = get_all_announcements() - return [post["name"][:-3] for post in posts] - -def get_ids(num: int): - """Get the last num announcement ids""" - posts = get_all_announcements() - return [post["name"][:-3] for post in posts[:num]] + # get the last num posts in reverse order + return [get_announcement(get_id(post)) for post in posts[-1 : -num - 1 : -1]] # print(get_all_announcements()) # print(get_announcement("2002-01-01-00")) -# print(get_metadata("2002-01-01-00")) +# print(get_metadata(get_announcement("2002-01-01-00"))) # print(get_all_ids()) +# print(get_announcements(2)) From 6847b6e54d1f3942da959ffd71b5c24ad6bf8b26 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Wed, 8 Nov 2023 17:54:39 -0800 Subject: [PATCH 04/13] resolving pr comments --- ocflib/lab/announcements.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index ccda05e..5989ef6 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -39,6 +39,7 @@ def get_announcement(id: str) -> str: def get_id(post: str) -> str: """Get announcement id""" + # Since the id is the filename, remove the .md extension return post["name"][:-3] @@ -62,10 +63,3 @@ def get_announcements(num: int) -> list[str]: # get the last num posts in reverse order return [get_announcement(get_id(post)) for post in posts[-1 : -num - 1 : -1]] - - -# print(get_all_announcements()) -# print(get_announcement("2002-01-01-00")) -# print(get_metadata(get_announcement("2002-01-01-00"))) -# print(get_all_ids()) -# print(get_announcements(2)) From 8e81ee29c27eb978c71539cbf195e72f9e2c993a Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Sat, 11 Nov 2023 15:59:22 -0800 Subject: [PATCH 05/13] implement cache --- ocflib/lab/announcements.py | 100 +++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 25 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index 5989ef6..e977127 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -1,24 +1,29 @@ """Announcements handling""" from collections import namedtuple from requests import get - +from datetime import datetime from yaml import safe_load -from typing import NamedTuple +from collections import deque -ANNOUNCEMENTS_REPO = ( +# The default branch is main +ANNOUNCEMENTS_URL = ( "https://api.github.com/repos/ocf/announcements/contents/announcements/{id}" ) +CACHE_LEN = 10 + +# post_cache is a dict of id: post +post_cache = {} +id_cache = deque(maxlen=CACHE_LEN) -# TODO: cache -# TODO: request session for better efficiency Metadata = namedtuple("Metadata", ["title", "date", "author", "tags", "summary"]) -def get_all_announcements(): +def get_all_announcements() -> list[dict]: """Get announcements from the announcements repo""" + posts = get( - url=ANNOUNCEMENTS_REPO.format(id=""), + url=ANNOUNCEMENTS_URL.format(id=""), headers={"Accept": "application/vnd.github+json"}, ) posts.raise_for_status() @@ -28,38 +33,83 @@ def get_all_announcements(): def get_announcement(id: str) -> str: """Get one particular announcement from the announcements repo""" + + if id in post_cache: + return post_cache[id] + posts = get( - url=ANNOUNCEMENTS_REPO.format(id=id + ".md"), + url=ANNOUNCEMENTS_URL.format(id=id + ".md"), headers={"Accept": "application/vnd.github.raw"}, ) posts.raise_for_status() + post_cache[id] = posts.text + + # add the most recent id to the left of the deque + if id not in id_cache: + id_cache.appendleft(id) + return posts.text -def get_id(post: str) -> str: - """Get announcement id""" +def get_id(post: dict) -> str: + """Get announcement id based on the json response""" + # Since the id is the filename, remove the .md extension - return post["name"][:-3] + try: + id = post["name"][:-3] + except KeyError: + raise KeyError("Missing id in announcement") + + # Check if the id is a valid date + try: + datetime.strptime(id, "%Y-%m-%d") + except ValueError: + raise ValueError("Invalid announcement id") + + return id def get_metadata(post: str) -> Metadata: """Get the metadata from one announcement""" - meta_dict = safe_load(post.split("---")[1]) - - metadata = Metadata( - title=meta_dict["title"], - date=meta_dict["date"], - author=meta_dict["author"], - tags=meta_dict["tags"], - summary=meta_dict["summary"], - ) + + try: + meta_dict = safe_load(post.split("---")[1]) + except IndexError: + raise IndexError("Missing metadata in announcement") + + try: + metadata = Metadata( + title=meta_dict["title"], + date=meta_dict["date"], + author=meta_dict["author"], + tags=meta_dict["tags"], + summary=meta_dict["summary"], + ) + except KeyError: + raise KeyError("Missing metadata in announcement") + return metadata -def get_announcements(num: int) -> list[str]: - """Get the last num announcements""" - posts = get_all_announcements() +def get_last_n_announcements(n: int) -> list[str]: + """Get the last n announcements""" + + result = [] + + # Get announcements from the cache (latest first) + for id in list(id_cache)[:n]: + result.append(post_cache[id]) + + # Fetch additional announcements if needed + if n > CACHE_LEN: + posts = get_all_announcements() + needed = n - CACHE_LEN + + # The posts returned are in reverse chronological order + # So we first need to grab the last CACHE_LEN + 1 posts to avoid duplicates + # Then we need to reverse the order and grab the first needed posts + for post in posts[-CACHE_LEN - 1 :: -1][:needed]: + result.append(get_announcement(get_id(post))) - # get the last num posts in reverse order - return [get_announcement(get_id(post)) for post in posts[-1 : -num - 1 : -1]] + return result From 2be017f25e3bff1a5fbb09e41943ba8c6d13407d Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Sat, 11 Nov 2023 22:56:31 -0800 Subject: [PATCH 06/13] draft test + modified Makefile --- Makefile | 5 +-- ocflib/lab/announcements.py | 62 ++++++++++++++++++--------------- tests/lab/announcements_test.py | 33 ++++++++++++++++++ tox.ini | 2 +- 4 files changed, 71 insertions(+), 31 deletions(-) create mode 100644 tests/lab/announcements_test.py diff --git a/Makefile b/Makefile index 342c01b..dfea75c 100644 --- a/Makefile +++ b/Makefile @@ -11,11 +11,12 @@ install-hooks: venv # after running tests .PHONY: test test: - tox + tox -e py37 -- $(TEST_FILE) ifneq ($(strip $(COVERALLS_REPO_TOKEN)),) - .tox/py37/bin/coveralls + .tox/py37/bin/coveralls endif + .PHONY: release-pypi release-pypi: clean autoversion python3 setup.py sdist bdist_wheel diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index e977127..ed8e179 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -1,45 +1,55 @@ """Announcements handling""" +from collections import deque from collections import namedtuple -from requests import get from datetime import datetime + +from requests import get from yaml import safe_load -from collections import deque # The default branch is main ANNOUNCEMENTS_URL = ( - "https://api.github.com/repos/ocf/announcements/contents/announcements/{id}" + 'https://api.github.com/repos/ocf/announcements/contents/{folder}/{id}' ) CACHE_LEN = 10 # post_cache is a dict of id: post post_cache = {} id_cache = deque(maxlen=CACHE_LEN) +Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) + +def _check_id(id: str) -> bool: + """Check if the id is a valid date""" -Metadata = namedtuple("Metadata", ["title", "date", "author", "tags", "summary"]) + try: + datetime.strptime(id, '%Y-%m-%d-%H') # TODO: if this %H is right + except ValueError: + raise ValueError('Invalid id') -def get_all_announcements() -> list[dict]: +def get_all_announcements(folder='announcements') -> [dict]: """Get announcements from the announcements repo""" posts = get( - url=ANNOUNCEMENTS_URL.format(id=""), - headers={"Accept": "application/vnd.github+json"}, + url=ANNOUNCEMENTS_URL.format(folder=folder, id=''), + headers={'Accept': 'application/vnd.github+json'}, ) posts.raise_for_status() return posts.json() -def get_announcement(id: str) -> str: +def get_announcement(id: str, folder='announcements') -> dict: """Get one particular announcement from the announcements repo""" + _check_id(id) + if id in post_cache: return post_cache[id] posts = get( - url=ANNOUNCEMENTS_URL.format(id=id + ".md"), - headers={"Accept": "application/vnd.github.raw"}, + url=ANNOUNCEMENTS_URL.format(folder=folder, id=id + '.md'), + headers={'Accept': 'application/vnd.github.raw'}, ) posts.raise_for_status() @@ -49,7 +59,7 @@ def get_announcement(id: str) -> str: if id not in id_cache: id_cache.appendleft(id) - return posts.text + return posts.json() def get_id(post: dict) -> str: @@ -57,15 +67,11 @@ def get_id(post: dict) -> str: # Since the id is the filename, remove the .md extension try: - id = post["name"][:-3] + id = post['name'][:-3] except KeyError: - raise KeyError("Missing id in announcement") + raise KeyError('Missing id in announcement') - # Check if the id is a valid date - try: - datetime.strptime(id, "%Y-%m-%d") - except ValueError: - raise ValueError("Invalid announcement id") + _check_id(id) return id @@ -74,25 +80,25 @@ def get_metadata(post: str) -> Metadata: """Get the metadata from one announcement""" try: - meta_dict = safe_load(post.split("---")[1]) + meta_dict = safe_load(post.split('---')[1]) except IndexError: - raise IndexError("Missing metadata in announcement") + raise IndexError('Missing metadata in announcement') try: metadata = Metadata( - title=meta_dict["title"], - date=meta_dict["date"], - author=meta_dict["author"], - tags=meta_dict["tags"], - summary=meta_dict["summary"], + title=meta_dict['title'], + date=meta_dict['date'], + author=meta_dict['author'], + tags=meta_dict['tags'], + summary=meta_dict['summary'], ) except KeyError: - raise KeyError("Missing metadata in announcement") + raise KeyError('Missing metadata in announcement') return metadata -def get_last_n_announcements(n: int) -> list[str]: +def get_last_n_announcements(n: int) -> [str]: """Get the last n announcements""" result = [] @@ -109,7 +115,7 @@ def get_last_n_announcements(n: int) -> list[str]: # The posts returned are in reverse chronological order # So we first need to grab the last CACHE_LEN + 1 posts to avoid duplicates # Then we need to reverse the order and grab the first needed posts - for post in posts[-CACHE_LEN - 1 :: -1][:needed]: + for post in posts[-CACHE_LEN - 1:: -1][:needed]: result.append(get_announcement(get_id(post))) return result diff --git a/tests/lab/announcements_test.py b/tests/lab/announcements_test.py new file mode 100644 index 0000000..f9e259c --- /dev/null +++ b/tests/lab/announcements_test.py @@ -0,0 +1,33 @@ +import pytest + +from ocflib.lab.announcements import get_announcement + +TEST_FOLDER = 'tests' + + +def test_health_check(): + assert True + + +@pytest.mark.parametrize( + 'content, id', + [ + ('this is a 1st test announcements', '2002-01-00-00'), + ('this is a 2nd test announcements', '2002-01-01-00'), + ('this is a 3rd test announcements', '2002-01-02-00'), + ], +) +def test_get_announcement_pass(content, id): + assert content in get_announcement(id, folder=TEST_FOLDER).text + + +def test_get_announcement_bad_id(): + with pytest.raises(ValueError): + get_announcement('2002-01-01-102p', folder=TEST_FOLDER) + + +# @pytest.mark.parametrize +# def test_get_id_pass(): +# assert "2002-01-00-00" == get_id( +# get_announcement("2002-01-01-10", folder=TEST_FOLDER) +# ) diff --git a/tox.ini b/tox.ini index 80e00ed..7413731 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,7 @@ venv_update = install= -r {toxinidir}/requirements-dev.txt -e {toxinidir} commands = {[testenv]venv_update} - py.test --cov --cov-report=term-missing -n 1 -v tests -vv + py.test --cov --cov-report=term-missing -n 1 -v tests/{posargs} -vv pre-commit run --all-files [flake8] From 5a6fa692fa60373c5f060b1318781cba50bd50cf Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Sun, 12 Nov 2023 17:13:53 -0800 Subject: [PATCH 07/13] more test --- ocflib/lab/announcements.py | 31 ++++----- tests/lab/announcements_test.py | 107 +++++++++++++++++++++++++++----- 2 files changed, 108 insertions(+), 30 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index ed8e179..d5ea4c3 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -2,6 +2,7 @@ from collections import deque from collections import namedtuple from datetime import datetime +from typing import Dict from requests import get from yaml import safe_load @@ -12,8 +13,8 @@ ) CACHE_LEN = 10 -# post_cache is a dict of id: post -post_cache = {} +# post_cache is a dict of {id: post content} +post_cache: Dict[str, str] = {} id_cache = deque(maxlen=CACHE_LEN) Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) @@ -22,7 +23,7 @@ def _check_id(id: str) -> bool: """Check if the id is a valid date""" try: - datetime.strptime(id, '%Y-%m-%d-%H') # TODO: if this %H is right + datetime.strptime(id, '%Y-%m-%d-%M') # TODO: if this %M (minute) is sufficient except ValueError: raise ValueError('Invalid id') @@ -39,7 +40,7 @@ def get_all_announcements(folder='announcements') -> [dict]: return posts.json() -def get_announcement(id: str, folder='announcements') -> dict: +def get_announcement(id: str, folder='announcements') -> str: """Get one particular announcement from the announcements repo""" _check_id(id) @@ -47,27 +48,27 @@ def get_announcement(id: str, folder='announcements') -> dict: if id in post_cache: return post_cache[id] - posts = get( + post = get( url=ANNOUNCEMENTS_URL.format(folder=folder, id=id + '.md'), headers={'Accept': 'application/vnd.github.raw'}, ) - posts.raise_for_status() + post.raise_for_status() - post_cache[id] = posts.text + post_cache[id] = post.text # add the most recent id to the left of the deque if id not in id_cache: id_cache.appendleft(id) - return posts.json() + return post.text -def get_id(post: dict) -> str: +def get_id(post_json: dict) -> str: """Get announcement id based on the json response""" # Since the id is the filename, remove the .md extension try: - id = post['name'][:-3] + id = post_json['name'][:-3] except KeyError: raise KeyError('Missing id in announcement') @@ -76,11 +77,11 @@ def get_id(post: dict) -> str: return id -def get_metadata(post: str) -> Metadata: +def get_metadata(post_text: str) -> Metadata: """Get the metadata from one announcement""" try: - meta_dict = safe_load(post.split('---')[1]) + meta_dict = safe_load(post_text.split('---')[1]) except IndexError: raise IndexError('Missing metadata in announcement') @@ -98,8 +99,10 @@ def get_metadata(post: str) -> Metadata: return metadata -def get_last_n_announcements(n: int) -> [str]: - """Get the last n announcements""" +def get_last_n_announcements_text(n: int) -> [str]: + """Get the text of last n announcements""" + + assert n > 0, 'n must be positive' result = [] diff --git a/tests/lab/announcements_test.py b/tests/lab/announcements_test.py index f9e259c..ef1b8d3 100644 --- a/tests/lab/announcements_test.py +++ b/tests/lab/announcements_test.py @@ -1,33 +1,108 @@ import pytest +from requests.exceptions import HTTPError +from ocflib.lab.announcements import get_all_announcements from ocflib.lab.announcements import get_announcement +from ocflib.lab.announcements import get_id TEST_FOLDER = 'tests' +TEST_IDS = [ + '2002-01-01-00', + '2002-01-01-01', + '2002-01-02-00', + '2023-09-01-00', + '2023-10-01-00', + '2023-11-01-00', +] -def test_health_check(): - assert True +# scope = module means that the fixture is only run once per module +@pytest.fixture(scope='module') +def test_annoucement_health_check() -> [dict]: + return get_all_announcements(folder=TEST_FOLDER) @pytest.mark.parametrize( - 'content, id', + 'id', + TEST_IDS, +) +def test_get_announcement_pass(id): + assert 'testing' in get_announcement(id, folder=TEST_FOLDER) + + +@pytest.mark.parametrize( + 'id', [ - ('this is a 1st test announcements', '2002-01-00-00'), - ('this is a 2nd test announcements', '2002-01-01-00'), - ('this is a 3rd test announcements', '2002-01-02-00'), + '2002-01-00-00212', + '2002-01-01-aa', + '2002-01-02-21a', + '202-01-02-00', + '2002-223-02-00', + '2002-01-80-00', ], ) -def test_get_announcement_pass(content, id): - assert content in get_announcement(id, folder=TEST_FOLDER).text +def test_get_announcement_bad_id(id): + with pytest.raises(ValueError): + get_announcement(id, folder=TEST_FOLDER) -def test_get_announcement_bad_id(): - with pytest.raises(ValueError): - get_announcement('2002-01-01-102p', folder=TEST_FOLDER) +# Those announcements don't exist in the test folder +@pytest.mark.parametrize( + 'id', + [ + '2002-01-01-10', + '2002-01-01-12', + '2002-01-02-30', + ], +) +def test_get_announcement_fail(id): + with pytest.raises(HTTPError): + get_announcement(id, folder=TEST_FOLDER) + + +@pytest.mark.parametrize('id', TEST_IDS) +def test_get_id_pass(id, test_annoucement_health_check): + found = False + for post in test_annoucement_health_check: + if id == get_id(post): + found = True + break + assert found, f'ID {id} not found in announcements' + + +@pytest.mark.parametrize( + 'id', + [ + '2002-01-01-10', + '2002-01-01-12', + '2002-01-02-30', + ], +) +def test_get_id_fail(id, test_annoucement_health_check): + for post in test_annoucement_health_check: + assert id != get_id(post), f'Unexpected ID {id} found in announcements' + + +# @pytest.mark.parametrize() +# def test_get_metadata_pass(content): +# pass + + +# @pytest.mark.parametrize() +# def test_get_metadata_bad_format(content): +# pass + + +# @pytest.mark.parametrize() +# def test_get_metadata_missing_metadata(content): +# pass + + +# @pytest.mark.parametrize() +# def test_get_last_n_announcements_text_pass(content): +# pass -# @pytest.mark.parametrize -# def test_get_id_pass(): -# assert "2002-01-00-00" == get_id( -# get_announcement("2002-01-01-10", folder=TEST_FOLDER) -# ) +# @pytest.mark.parametrize() +# def test_get_last_n_announcements_text_bad_n(content): +# pass From 636ab8e3a5127d421cd102abb06781c5da086dd3 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Wed, 15 Nov 2023 10:44:04 -0800 Subject: [PATCH 08/13] more tests --- tests/lab/announcements_test.py | 60 ++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/tests/lab/announcements_test.py b/tests/lab/announcements_test.py index ef1b8d3..8abee81 100644 --- a/tests/lab/announcements_test.py +++ b/tests/lab/announcements_test.py @@ -4,6 +4,7 @@ from ocflib.lab.announcements import get_all_announcements from ocflib.lab.announcements import get_announcement from ocflib.lab.announcements import get_id +from ocflib.lab.announcements import get_metadata TEST_FOLDER = 'tests' TEST_IDS = [ @@ -18,10 +19,18 @@ # scope = module means that the fixture is only run once per module @pytest.fixture(scope='module') -def test_annoucement_health_check() -> [dict]: +def get_all() -> [dict]: return get_all_announcements(folder=TEST_FOLDER) +# scope = module means that the fixture is only run once per module +@pytest.fixture(scope='module') +def announcement_data(): + # Fetch data once for all tests in this module + return {id: get_announcement(id, folder=TEST_FOLDER) for id in TEST_IDS} + + +# Health check @pytest.mark.parametrize( 'id', TEST_IDS, @@ -30,13 +39,13 @@ def test_get_announcement_pass(id): assert 'testing' in get_announcement(id, folder=TEST_FOLDER) +# Those ids are invalid @pytest.mark.parametrize( 'id', [ '2002-01-00-00212', '2002-01-01-aa', '2002-01-02-21a', - '202-01-02-00', '2002-223-02-00', '2002-01-80-00', ], @@ -60,16 +69,18 @@ def test_get_announcement_fail(id): get_announcement(id, folder=TEST_FOLDER) +# Those ids are valid @pytest.mark.parametrize('id', TEST_IDS) -def test_get_id_pass(id, test_annoucement_health_check): +def test_get_id_pass(id, get_all): found = False - for post in test_annoucement_health_check: + for post in get_all: if id == get_id(post): found = True break assert found, f'ID {id} not found in announcements' +# Those ids don't exist in the test folder @pytest.mark.parametrize( 'id', [ @@ -78,24 +89,35 @@ def test_get_id_pass(id, test_annoucement_health_check): '2002-01-02-30', ], ) -def test_get_id_fail(id, test_annoucement_health_check): - for post in test_annoucement_health_check: +def test_get_id_fail(id, get_all): + for post in get_all: assert id != get_id(post), f'Unexpected ID {id} found in announcements' -# @pytest.mark.parametrize() -# def test_get_metadata_pass(content): -# pass - - -# @pytest.mark.parametrize() -# def test_get_metadata_bad_format(content): -# pass - - -# @pytest.mark.parametrize() -# def test_get_metadata_missing_metadata(content): -# pass +@pytest.mark.parametrize('id', TEST_IDS) +def test_get_metadata_pass(id, announcement_data): + content = announcement_data[id] + assert 'Victor' == get_metadata(content).author, 'author not found in metadata' + + +def test_get_metadata_missing_metadata(): + content = """ + --- + title: test + date: 2020-01-01 + --- + """ + with pytest.raises(KeyError): + get_metadata(content) + + +def test_get_metadata_bad_format(): + content = """ + title: test + date: 2020-01-01 + """ + with pytest.raises(IndexError): + get_metadata(content) # @pytest.mark.parametrize() From 664b9ee0bb00600f5b177facda6c0cb2ba9347a4 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Mon, 4 Dec 2023 17:27:24 -0800 Subject: [PATCH 09/13] new cache implementation --- ocflib/lab/announcements.py | 66 +++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index d5ea4c3..34752ee 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -1,8 +1,6 @@ """Announcements handling""" -from collections import deque from collections import namedtuple from datetime import datetime -from typing import Dict from requests import get from yaml import safe_load @@ -11,11 +9,13 @@ ANNOUNCEMENTS_URL = ( 'https://api.github.com/repos/ocf/announcements/contents/{folder}/{id}' ) -CACHE_LEN = 10 +# 1 day in seconds +TIME_TO_LIVE = 60 * 60 * 24 # post_cache is a dict of {id: post content} -post_cache: Dict[str, str] = {} -id_cache = deque(maxlen=CACHE_LEN) +post_cache = {} +last_updated = datetime.now() + Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) @@ -23,13 +23,25 @@ def _check_id(id: str) -> bool: """Check if the id is a valid date""" try: - datetime.strptime(id, '%Y-%m-%d-%M') # TODO: if this %M (minute) is sufficient + datetime.strptime(id, '%Y-%m-%d-%M') except ValueError: raise ValueError('Invalid id') +def _clear_cache() -> None: + """Clear the cache if it's too old""" + + global last_updated + if (datetime.now() - last_updated).total_seconds() > TIME_TO_LIVE: + post_cache.clear() + last_updated = datetime.now() + + def get_all_announcements(folder='announcements') -> [dict]: - """Get announcements from the announcements repo""" + """ + Get announcements from the announcements repo + The result is a list of post metadatas + """ posts = get( url=ANNOUNCEMENTS_URL.format(folder=folder, id=''), @@ -41,9 +53,14 @@ def get_all_announcements(folder='announcements') -> [dict]: def get_announcement(id: str, folder='announcements') -> str: - """Get one particular announcement from the announcements repo""" + """ + Get one particular announcement from the announcements repo + The result is the post content + """ _check_id(id) + # if the cache is too old, clear it + _clear_cache() if id in post_cache: return post_cache[id] @@ -56,10 +73,6 @@ def get_announcement(id: str, folder='announcements') -> str: post_cache[id] = post.text - # add the most recent id to the left of the deque - if id not in id_cache: - id_cache.appendleft(id) - return post.text @@ -82,21 +95,18 @@ def get_metadata(post_text: str) -> Metadata: try: meta_dict = safe_load(post_text.split('---')[1]) - except IndexError: - raise IndexError('Missing metadata in announcement') - try: - metadata = Metadata( + data = Metadata( title=meta_dict['title'], date=meta_dict['date'], author=meta_dict['author'], tags=meta_dict['tags'], summary=meta_dict['summary'], ) - except KeyError: - raise KeyError('Missing metadata in announcement') + except (IndexError, KeyError) as e: + raise ValueError(f'Error parsing metadata: {e}') - return metadata + return data def get_last_n_announcements_text(n: int) -> [str]: @@ -106,19 +116,11 @@ def get_last_n_announcements_text(n: int) -> [str]: result = [] - # Get announcements from the cache (latest first) - for id in list(id_cache)[:n]: - result.append(post_cache[id]) - - # Fetch additional announcements if needed - if n > CACHE_LEN: - posts = get_all_announcements() - needed = n - CACHE_LEN + # the res returned are in reverse chronological order + # so we need to reverse it first then take the first n + res = get_all_announcements()[::-1][:n] - # The posts returned are in reverse chronological order - # So we first need to grab the last CACHE_LEN + 1 posts to avoid duplicates - # Then we need to reverse the order and grab the first needed posts - for post in posts[-CACHE_LEN - 1:: -1][:needed]: - result.append(get_announcement(get_id(post))) + for item in res: + result.append(get_announcement(get_id(item))) return result From 3443b2a2c61c0001cc1296e743ae86bb3c95ee37 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Tue, 5 Dec 2023 11:54:31 -0800 Subject: [PATCH 10/13] cache class --- ocflib/lab/announcements.py | 39 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index 34752ee..c53fdc5 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -11,12 +11,24 @@ ) # 1 day in seconds TIME_TO_LIVE = 60 * 60 * 24 +Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) -# post_cache is a dict of {id: post content} -post_cache = {} -last_updated = datetime.now() -Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) +class _AnnouncementCache: + def __init__(self) -> None: + # post_cache is a dict of {id: post content} + self.cache = {} + self.last_updated = datetime.now() + + def clear_cache(self) -> None: + """Clear the cache if it's too old""" + + if (datetime.now() - self.last_updated).total_seconds() > TIME_TO_LIVE: + self.cache.clear() + self.last_updated = datetime.now() + + +_announcement_cache_instance = _AnnouncementCache() def _check_id(id: str) -> bool: @@ -28,15 +40,6 @@ def _check_id(id: str) -> bool: raise ValueError('Invalid id') -def _clear_cache() -> None: - """Clear the cache if it's too old""" - - global last_updated - if (datetime.now() - last_updated).total_seconds() > TIME_TO_LIVE: - post_cache.clear() - last_updated = datetime.now() - - def get_all_announcements(folder='announcements') -> [dict]: """ Get announcements from the announcements repo @@ -59,11 +62,11 @@ def get_announcement(id: str, folder='announcements') -> str: """ _check_id(id) - # if the cache is too old, clear it - _clear_cache() + # check if the cache is too old + _announcement_cache_instance.clear_cache() - if id in post_cache: - return post_cache[id] + if id in _announcement_cache_instance.cache: + return _announcement_cache_instance.cache[id] post = get( url=ANNOUNCEMENTS_URL.format(folder=folder, id=id + '.md'), @@ -71,7 +74,7 @@ def get_announcement(id: str, folder='announcements') -> str: ) post.raise_for_status() - post_cache[id] = post.text + _announcement_cache_instance.cache[id] = post.text return post.text From c82a1513b7946e77aceee147866fe9ea4d5e1500 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Tue, 5 Dec 2023 11:59:15 -0800 Subject: [PATCH 11/13] Metadata class --- ocflib/lab/announcements.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index c53fdc5..c5606c4 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -1,5 +1,4 @@ """Announcements handling""" -from collections import namedtuple from datetime import datetime from requests import get @@ -11,7 +10,15 @@ ) # 1 day in seconds TIME_TO_LIVE = 60 * 60 * 24 -Metadata = namedtuple('Metadata', ['title', 'date', 'author', 'tags', 'summary']) + + +class Metadata: + def __init__(self, title, date, author, tags, summary): + self.title = title + self.date = date + self.author = author + self.tags = tags + self.summary = summary class _AnnouncementCache: From 76f6e69f8f3c43849ef4a9288884cc6c4aba27d7 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Tue, 12 Dec 2023 14:14:40 -0800 Subject: [PATCH 12/13] added ID cache and function for return the last IDs --- ocflib/lab/announcements.py | 61 ++++++++++++++++++++++++--------- tests/lab/announcements_test.py | 21 +++--------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/ocflib/lab/announcements.py b/ocflib/lab/announcements.py index c5606c4..364bb73 100644 --- a/ocflib/lab/announcements.py +++ b/ocflib/lab/announcements.py @@ -23,15 +23,18 @@ def __init__(self, title, date, author, tags, summary): class _AnnouncementCache: def __init__(self) -> None: - # post_cache is a dict of {id: post content} - self.cache = {} + # text_cache is a dict of {id: post content} + self.text_cache = {} + # id_cache is a list of ids, ordered by latest to oldest + self.id_cache = [] self.last_updated = datetime.now() def clear_cache(self) -> None: """Clear the cache if it's too old""" if (datetime.now() - self.last_updated).total_seconds() > TIME_TO_LIVE: - self.cache.clear() + self.text_cache.clear() + self.id_cache.clear() self.last_updated = datetime.now() @@ -47,10 +50,10 @@ def _check_id(id: str) -> bool: raise ValueError('Invalid id') -def get_all_announcements(folder='announcements') -> [dict]: +def get_all_announcements(folder='announcements') -> [str]: """ Get announcements from the announcements repo - The result is a list of post metadatas + The result is a list of IDs from latest to oldest """ posts = get( @@ -59,7 +62,17 @@ def get_all_announcements(folder='announcements') -> [dict]: ) posts.raise_for_status() - return posts.json() + res = [] + + for post in posts.json(): + res.append(get_id(post)) + + # Reverse the list so that the order is latest to oldest + res = res[::-1] + + _announcement_cache_instance.id_cache = res + + return res def get_announcement(id: str, folder='announcements') -> str: @@ -69,11 +82,9 @@ def get_announcement(id: str, folder='announcements') -> str: """ _check_id(id) - # check if the cache is too old - _announcement_cache_instance.clear_cache() - if id in _announcement_cache_instance.cache: - return _announcement_cache_instance.cache[id] + if id in _announcement_cache_instance.text_cache: + return _announcement_cache_instance.text_cache[id] post = get( url=ANNOUNCEMENTS_URL.format(folder=folder, id=id + '.md'), @@ -81,7 +92,7 @@ def get_announcement(id: str, folder='announcements') -> str: ) post.raise_for_status() - _announcement_cache_instance.cache[id] = post.text + _announcement_cache_instance.text_cache[id] = post.text return post.text @@ -119,18 +130,36 @@ def get_metadata(post_text: str) -> Metadata: return data +def get_last_n_announcements(n: int) -> [dict]: + """Get the IDs of last n announcements""" + + assert n > 0, 'n must be positive' + + # check if the cache is too old + _announcement_cache_instance.clear_cache() + + if _announcement_cache_instance.id_cache: + return _announcement_cache_instance.id_cache[:n] + + return get_all_announcements()[:n] + + def get_last_n_announcements_text(n: int) -> [str]: """Get the text of last n announcements""" assert n > 0, 'n must be positive' + # check if the cache is too old + _announcement_cache_instance.clear_cache() + result = [] - # the res returned are in reverse chronological order - # so we need to reverse it first then take the first n - res = get_all_announcements()[::-1][:n] + if _announcement_cache_instance.id_cache: + res = _announcement_cache_instance.id_cache[:n] + else: + res = get_all_announcements()[:n] - for item in res: - result.append(get_announcement(get_id(item))) + for id in res: + result.append(get_announcement(id)) return result diff --git a/tests/lab/announcements_test.py b/tests/lab/announcements_test.py index 8abee81..9459a93 100644 --- a/tests/lab/announcements_test.py +++ b/tests/lab/announcements_test.py @@ -3,7 +3,6 @@ from ocflib.lab.announcements import get_all_announcements from ocflib.lab.announcements import get_announcement -from ocflib.lab.announcements import get_id from ocflib.lab.announcements import get_metadata TEST_FOLDER = 'tests' @@ -19,7 +18,7 @@ # scope = module means that the fixture is only run once per module @pytest.fixture(scope='module') -def get_all() -> [dict]: +def get_all() -> [str]: return get_all_announcements(folder=TEST_FOLDER) @@ -74,7 +73,7 @@ def test_get_announcement_fail(id): def test_get_id_pass(id, get_all): found = False for post in get_all: - if id == get_id(post): + if id == post: found = True break assert found, f'ID {id} not found in announcements' @@ -91,7 +90,7 @@ def test_get_id_pass(id, get_all): ) def test_get_id_fail(id, get_all): for post in get_all: - assert id != get_id(post), f'Unexpected ID {id} found in announcements' + assert id != post, f'Unexpected ID {id} found in announcements' @pytest.mark.parametrize('id', TEST_IDS) @@ -107,7 +106,7 @@ def test_get_metadata_missing_metadata(): date: 2020-01-01 --- """ - with pytest.raises(KeyError): + with pytest.raises(ValueError): get_metadata(content) @@ -116,15 +115,5 @@ def test_get_metadata_bad_format(): title: test date: 2020-01-01 """ - with pytest.raises(IndexError): + with pytest.raises(ValueError): get_metadata(content) - - -# @pytest.mark.parametrize() -# def test_get_last_n_announcements_text_pass(content): -# pass - - -# @pytest.mark.parametrize() -# def test_get_last_n_announcements_text_bad_n(content): -# pass From fc549cb90ad8d09dbe9cd423eb043dee4cdc3553 Mon Sep 17 00:00:00 2001 From: Victor Shi Date: Tue, 12 Dec 2023 14:57:55 -0800 Subject: [PATCH 13/13] update README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 582b8ac..5d3d5ee 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,10 @@ The `tests` directory contains automated tests which you're encouraged to add to (and not break). The `tests-manual` directory contains scripts intended for testing. +To run the a specific test file, run `make tests TEST_FILE=/`. + +For example, to run the `announcements_test.py` file in the `lab` folder, run `make tests TEST_FILE=lab/announcements_test.py` + #### Using pre-commit