From bd99cd8bb8faabdea01debd435b610b6cb3d0b39 Mon Sep 17 00:00:00 2001
From: RusticPotato <3542090+RusticPotatoes@users.noreply.github.com>
Date: Tue, 21 Nov 2023 13:01:26 -0500
Subject: [PATCH] Post Feedback (#31)

* test case and model change

* define route for post

* wip daily

* min size vs min value

* cleanup anon user

* stuck, daily

* lower case labels

* checkpoint

* checkpoint, add player working

* working valdiator for feedback input

* Update foreign key references in Feedback model

* cleanup of ok debug returns

* checkpoint working post of feedback!

* working code, need to fix tests

* minor fix

* better but not perfect

* bug fix

* change to self.assertions

* add anon users to players, 10

* self.asserts and worked on post feedback valid anon

* Refactor test_post_feedback_valid_anon method to
generate random data

* remove duplicate players and subjects

* remove duplicate players

* remove print

* revert

* valid player names from reports

* Refactor player API tests

* use and_ and first()

* rename class to TestReportAPI

* cleanup debug ouput

* improve logging

* remove unneccesary and

* hardcoded anonymous users

* improve report

---------

Co-authored-by: extreme4all <>
---
 docker-compose.yaml                          |   2 +-
 kafka_setup/setup_kafka.py                   |   2 +-
 mysql/docker-entrypoint-initdb.d/02_data.sql |  43 ++++-
 src/api/v2/__init__.py                       |   3 +-
 src/api/v2/feedback.py                       |  28 ++++
 src/api/v2/report.py                         |   4 +-
 src/app/models/feedback.py                   |  70 ++++++++
 src/app/models/player.py                     |   8 +
 src/app/models/report.py                     |  13 +-
 src/app/views/input/feedback.py              |  50 ++----
 src/core/database/models/feedback.py         |   4 +-
 src/core/fastapi/dependencies/kafka.py       |  68 ++++++++
 src/core/kafka/engine.py                     |  77 ---------
 src/core/kafka/report.py                     |  10 --
 src/core/server.py                           |  29 +---
 tests/test_feedback_api.py                   | 165 +++++++++++++++----
 tests/test_player_api.py                     |  92 +++++++++--
 tests/test_prediction_api.py                 |  82 ---------
 tests/test_report_api.py                     |   2 +-
 19 files changed, 466 insertions(+), 286 deletions(-)
 create mode 100644 src/api/v2/feedback.py
 create mode 100644 src/app/models/feedback.py
 create mode 100644 src/core/fastapi/dependencies/kafka.py
 delete mode 100644 src/core/kafka/engine.py
 delete mode 100644 src/core/kafka/report.py
 delete mode 100644 tests/test_prediction_api.py

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 363dcfb4..c79ea518 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -79,7 +79,7 @@ services:
       dockerfile: Dockerfile
       target: base
       args:
-        root_path: /
+        root_path: ""
         api_port: 5000
     image: public_api
     # command: bash -c "apt update && apt install -y curl && sleep infinity"
diff --git a/kafka_setup/setup_kafka.py b/kafka_setup/setup_kafka.py
index 2c685754..3abe3264 100644
--- a/kafka_setup/setup_kafka.py
+++ b/kafka_setup/setup_kafka.py
@@ -34,7 +34,7 @@ def create_topics():
                 replication_factor=1,
             ),
             NewTopic(
-                name="reports",
+                name="report",
                 num_partitions=4,
                 replication_factor=1,
             ),
diff --git a/mysql/docker-entrypoint-initdb.d/02_data.sql b/mysql/docker-entrypoint-initdb.d/02_data.sql
index d1b3957e..0b4e5931 100644
--- a/mysql/docker-entrypoint-initdb.d/02_data.sql
+++ b/mysql/docker-entrypoint-initdb.d/02_data.sql
@@ -51,11 +51,10 @@ call InsertRandomPlayers(100, 0,0,1);
 UPDATE Players
 SET
     name = CONCAT('player', id),
-    created_at = NOW() - INTERVAL FLOOR(RAND(42) * 365) DAY,
-    updated_at = NOW() - INTERVAL FLOOR(RAND(41) * 365) DAY,
     normalized_name = CONCAT('player', id)
 ;
 
+
 -- Insert data into the Reports table
 INSERT INTO
     Reports (
@@ -167,7 +166,9 @@ SELECT
         TIMESTAMPDIFF(SECOND, '2020-01-01 00:00:00', '2022-12-31 23:59:59') * RAND(42) 
         + UNIX_TIMESTAMP('2020-01-01 00:00:00')
     )
-FROM `Players` 
+FROM `Players`
+where 1=1
+    AND name not LIKE 'anonymoususer%'
 ORDER BY RAND(42) 
 LIMIT 250
 ;
@@ -235,4 +236,40 @@ UPDATE PredictionsFeedback
     SET proposed_label = prediction
 WHERE 1=1
     AND vote = 1
+;
+
+DELIMITER $$
+
+INSERT INTO Players (
+    name,
+    created_at,
+    updated_at,
+    possible_ban,
+    confirmed_ban,
+    confirmed_player,
+    label_id,
+    label_jagex
+) VALUES 
+    ("anonymoususer 382e728f 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e7259 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e7221 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e71ee 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e71bb 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e7179 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e7133 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e70ef 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e7089 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0),
+    ("anonymoususer 382e6def 87ea 11ee aab6 0242ac120002", NOW(), NOW(), 0, 0, 0, 0, 0)
+;
+
+UPDATE `Players`
+SET
+    created_at = NOW() - INTERVAL FLOOR(RAND(42) * 365) DAY,
+    updated_at = NOW() - INTERVAL FLOOR(RAND(41) * 365) DAY
+;
+UPDATE `Players`
+SET
+    name=replace(name,'-',' '),
+    normalized_name=replace(name,'-',' ')
+WHERE name LIKE 'anonymoususer%'
 ;
\ No newline at end of file
diff --git a/src/api/v2/__init__.py b/src/api/v2/__init__.py
index a6097332..7ae6966c 100644
--- a/src/api/v2/__init__.py
+++ b/src/api/v2/__init__.py
@@ -1,7 +1,8 @@
 from fastapi import APIRouter
 
-from . import player, report
+from . import feedback, player, report
 
 router = APIRouter()
 router.include_router(player.router)
 router.include_router(report.router)
+router.include_router(feedback.router)
diff --git a/src/api/v2/feedback.py b/src/api/v2/feedback.py
new file mode 100644
index 00000000..dc90dbee
--- /dev/null
+++ b/src/api/v2/feedback.py
@@ -0,0 +1,28 @@
+import logging
+
+from fastapi import APIRouter, Depends, HTTPException, status
+
+from src.app.models.feedback import Feedback
+from src.app.views.input.feedback import FeedbackInput
+from src.app.views.response.ok import Ok
+from src.core.fastapi.dependencies.session import get_session
+from src.core.fastapi.dependencies.to_jagex_name import to_jagex_name
+
+router = APIRouter(tags=["Feedback"])
+logger = logging.getLogger(__name__)
+
+
+@router.post("/feedback", response_model=Ok, status_code=status.HTTP_201_CREATED)
+async def post_feedback(
+    feedback: FeedbackInput,
+    session=Depends(get_session),
+):
+    """ """
+    _feedback = Feedback(session)
+
+    feedback.player_name = await to_jagex_name(feedback.player_name)
+
+    success, detail = await _feedback.insert_feedback(feedback=feedback)
+    if not success:
+        raise HTTPException(status_code=422, detail=detail)
+    return Ok()
diff --git a/src/api/v2/report.py b/src/api/v2/report.py
index 4ea92110..f219196f 100644
--- a/src/api/v2/report.py
+++ b/src/api/v2/report.py
@@ -4,14 +4,14 @@
 from src.app.models.report import Report
 from src.app.views.input.report import Detection
 from src.app.views.response.ok import Ok
-from src.core.kafka.report import report_engine
+from src.core.fastapi.dependencies import kafka
 
 router = APIRouter(tags=["Report"])
 
 
 @router.post("/report", status_code=status.HTTP_201_CREATED, response_model=Ok)
 async def post_reports(detection: list[Detection]):
-    report = Report(kafka_engine=report_engine)
+    report = Report()
     data = await report.parse_data(detection)
     if not data:
         raise HTTPException(status.HTTP_400_BAD_REQUEST, detail="invalid data")
diff --git a/src/app/models/feedback.py b/src/app/models/feedback.py
new file mode 100644
index 00000000..88f58379
--- /dev/null
+++ b/src/app/models/feedback.py
@@ -0,0 +1,70 @@
+import logging
+import time
+
+from fastapi.encoders import jsonable_encoder
+from sqlalchemy import and_, func, insert, select
+from sqlalchemy.ext.asyncio import AsyncResult, AsyncSession
+from sqlalchemy.sql.expression import Insert, Select
+
+from src.app.views.input.feedback import FeedbackInput
+from src.core.database.models.feedback import PredictionFeedback as dbFeedback
+from src.core.database.models.player import Player as dbPlayer
+
+logger = logging.getLogger(__name__)
+
+
+class Feedback:
+    def __init__(self, session: AsyncSession) -> None:
+        self.session = session
+
+    async def insert_feedback(self, feedback: FeedbackInput) -> tuple[bool, str]:
+        sql_select: Select = select(dbPlayer.id)
+        sql_select = sql_select.where(dbPlayer.name == feedback.player_name)
+
+        sql_dupe_check: Select = select(dbFeedback)
+        sql_dupe_check = sql_dupe_check.where(
+            and_(
+                dbFeedback.prediction == feedback.prediction,
+                dbFeedback.subject_id == feedback.subject_id,
+            )
+        )
+
+        sql_insert: Insert = insert(dbFeedback)
+        data = {
+            "voter_id": None,
+            "subject_id": feedback.subject_id,
+            "prediction": feedback.prediction,
+            "confidence": feedback.confidence,
+            "vote": feedback.vote,
+            "feedback_text": feedback.feedback_text,
+            "proposed_label": feedback.proposed_label,
+        }
+
+        async with self.session:
+            result: AsyncResult = await self.session.execute(sql_select)
+            result = result.first()
+
+            # check if voter exists
+            if not result:
+                logger.info({"voter_does_not_exist": FeedbackInput})
+                await self.session.rollback()
+                return False, "voter_does_not_exist"
+
+            voter_id = result["id"]
+            sql_dupe_check = sql_dupe_check.where(dbFeedback.voter_id == voter_id)
+
+            result: AsyncResult = await self.session.execute(sql_dupe_check)
+            result = result.first()
+
+            # check if duplicate record
+            if result:
+                logger.info({"duplicate_record": FeedbackInput, "voter id": voter_id})
+                await self.session.rollback()
+                return False, "duplicate_record"
+
+            # add voter_id and insert
+            data["voter_id"] = voter_id
+            sql_insert = sql_insert.values(data)
+            result: AsyncResult = await self.session.execute(sql_insert)
+            await self.session.commit()
+        return True, "success"
diff --git a/src/app/models/player.py b/src/app/models/player.py
index fb064e1c..e510604f 100644
--- a/src/app/models/player.py
+++ b/src/app/models/player.py
@@ -1,14 +1,22 @@
+import logging
+import time
+
 from fastapi.encoders import jsonable_encoder
 from sqlalchemy import func, select
 from sqlalchemy.engine import Result
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncResult, AsyncSession
 from sqlalchemy.orm import aliased
 from sqlalchemy.sql.expression import Select
 
+from src.app.views.input.feedback import FeedbackInput
 from src.core.database.models.feedback import PredictionFeedback as dbFeedback
 from src.core.database.models.player import Player as dbPlayer
 from src.core.database.models.prediction import Prediction as dbPrediction
 from src.core.database.models.report import Report as dbReport
+from src.core.fastapi.dependencies.to_jagex_name import to_jagex_name
+
+logger = logging.getLogger(__name__)
 
 
 class Player:
diff --git a/src/app/models/report.py b/src/app/models/report.py
index 1f8866e9..2f32f4be 100644
--- a/src/app/models/report.py
+++ b/src/app/models/report.py
@@ -1,15 +1,15 @@
+import asyncio
 import logging
 import time
 
 from src.app.views.input.report import Detection
-from src.core.kafka.engine import AioKafkaEngine
+from src.core.fastapi.dependencies import kafka
 
 logger = logging.getLogger(__name__)
 
 
 class Report:
-    def __init__(self, kafka_engine: AioKafkaEngine) -> None:
-        self.kafka_engine = kafka_engine
+    def __init__(self) -> None:
         pass
 
     def _check_data_size(self, data: list[Detection]) -> list[Detection] | None:
@@ -45,7 +45,8 @@ async def parse_data(self, data: list[dict]) -> list[Detection] | None:
         return data
 
     async def send_to_kafka(self, data: list[Detection]) -> None:
-        for detection in data:
-            detection = detection.model_dump_json()
-            self.kafka_engine.message_queue.put_nowait(detection)
+        detections = [d.model_dump(mode="json") for d in data]
+        await asyncio.gather(
+            *[kafka.report_send_queue.put(detection) for detection in detections]
+        )
         return
diff --git a/src/app/views/input/feedback.py b/src/app/views/input/feedback.py
index cd50c428..49ef7da8 100644
--- a/src/app/views/input/feedback.py
+++ b/src/app/views/input/feedback.py
@@ -1,7 +1,6 @@
-import re
-from typing import List, Optional
+from typing import Optional
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, constr, validator
 
 
 class FeedbackInput(BaseModel):
@@ -9,19 +8,17 @@ class FeedbackInput(BaseModel):
     Class representing prediction feedback input.
     """
 
-    player_name: str = Field(
+    player_name: constr(strip_whitespace=True) = Field(
         ...,
         example="Player1",
         min_length=1,
-        max_length=13,
+        max_length=50,
         description="Name of the player",
     )
     vote: int = Field(..., ge=-1, le=1, description="Vote is -1, 0 or 1")
     prediction: str = Field(
         ...,
-        example="Real_Player",
-        min_length=1,
-        max_length=13,
+        example="real_player",
         description="Prediction for the player",
     )
     confidence: Optional[float] = Field(
@@ -35,31 +32,14 @@ class FeedbackInput(BaseModel):
         None,
         example="real_player",
         description="Proposed label for the player",
-        enum=[
-            "real_player",
-            "pvm_melee_bot",
-            "smithing_bot",
-            "magic_bot",
-            "fishing_bot",
-            "mining_bot",
-            "crafting_bot",
-            "pvm_ranged_magic_bot",
-            "pvm_ranged_bot",
-            "hunter_bot",
-            "fletching_bot",
-            "clue_scroll_bot",
-            "lms_bot",
-            "agility_bot",
-            "wintertodt_bot",
-            "runecrafting_bot",
-            "zalcano_bot",
-            "woodcutting_bot",
-            "thieving_bot",
-            "soul_wars_bot",
-            "cooking_bot",
-            "vorkath_bot",
-            "barrows_bot",
-            "herblore_bot",
-            "unknown_bot",
-        ],
     )
+
+    @validator("player_name")
+    def uuid_format(cls, value: str):
+        match value:
+            case _ if 1 <= len(value) <= 12:
+                return value
+            case _ if value.lower().startswith("anonymoususer"):
+                return value
+            case _:
+                raise ValueError("Invalid format for player_name")
diff --git a/src/core/database/models/feedback.py b/src/core/database/models/feedback.py
index c6e1bdbc..4e639b02 100644
--- a/src/core/database/models/feedback.py
+++ b/src/core/database/models/feedback.py
@@ -17,8 +17,8 @@ class PredictionFeedback(Base):
 
     id = Column(Integer, primary_key=True, autoincrement=True)
     ts = Column(TIMESTAMP, nullable=False, server_default="CURRENT_TIMESTAMP")
-    voter_id = Column(Integer, ForeignKey("FK_Voter_ID"), nullable=False)
-    subject_id = Column(Integer, ForeignKey("FK_Subject_ID"), nullable=False)
+    voter_id = Column(Integer, ForeignKey("Players.id"), nullable=False)
+    subject_id = Column(Integer, ForeignKey("Players.id"), nullable=False)
     prediction = Column(String(50), nullable=False)
     confidence = Column(Float, nullable=False)
     vote = Column(Integer, nullable=False, server_default="0")
diff --git a/src/core/fastapi/dependencies/kafka.py b/src/core/fastapi/dependencies/kafka.py
new file mode 100644
index 00000000..704d8ad1
--- /dev/null
+++ b/src/core/fastapi/dependencies/kafka.py
@@ -0,0 +1,68 @@
+import asyncio
+import json
+import logging
+from asyncio import Queue
+from time import time
+
+from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
+
+from src.core.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+async def kafka_producer(producer: AIOKafkaProducer):
+    await producer.start()
+    return producer
+
+
+async def kafka_player_consumer(topic: str, group: str):
+    consumer = AIOKafkaConsumer(
+        topic,
+        bootstrap_servers=[settings.KAFKA_HOST],
+        group_id=group,
+        value_deserializer=lambda x: json.loads(x.decode("utf-8")),
+        auto_offset_reset="earliest",
+    )
+    await consumer.start()
+    return consumer
+
+
+async def receive_messages(consumer: AIOKafkaConsumer, receive_queue: Queue):
+    logger.info("start receiving messages")
+    async for message in consumer:
+        value = message.value
+        await receive_queue.put(value)
+
+
+async def send_messages(topic: str, producer: AIOKafkaProducer, send_queue: Queue):
+    logger.info("start sending messages")
+    last_interval = time()
+    messages_sent = 0
+
+    while True:
+        if send_queue.empty():
+            await asyncio.sleep(1)
+        message = await send_queue.get()
+        await producer.send(topic, value=message)
+        send_queue.task_done()
+
+        messages_sent += 1
+
+        if messages_sent >= 100:
+            current_time = time()
+            elapsed_time = current_time - last_interval
+            speed = messages_sent / elapsed_time
+            logger.info(
+                f"processed {messages_sent} in {elapsed_time:.2f} seconds, {speed:.2f} msg/sec"
+            )
+
+            last_interval = time()
+            messages_sent = 0
+
+
+report_send_queue = Queue(maxsize=500)
+producer = AIOKafkaProducer(
+    bootstrap_servers=[settings.KAFKA_HOST],
+    value_serializer=lambda v: json.dumps(v).encode(),
+)
diff --git a/src/core/kafka/engine.py b/src/core/kafka/engine.py
deleted file mode 100644
index b18b576d..00000000
--- a/src/core/kafka/engine.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import asyncio
-import json
-from asyncio import Queue
-
-from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
-
-
-def retry_on_exception(max_retries=3, retry_interval=5):
-    def decorator(func):
-        async def wrapper(*args, **kwargs):
-            retries = 0
-            while retries < max_retries:
-                try:
-                    await func(*args, **kwargs)
-                except Exception as e:
-                    print(f"Error: {e}")
-                    retries += 1
-                    await asyncio.sleep(retry_interval)
-                else:
-                    break
-
-        return wrapper
-
-    return decorator
-
-
-class AioKafkaEngine:
-    def __init__(self, bootstrap_servers: list[str], topic: str, message_queue: Queue):
-        self.bootstrap_servers = bootstrap_servers
-        self.topic = topic
-        self.consumer = None
-        self.producer = None
-        self.message_queue = message_queue
-
-    async def start_consumer(self, group_id: str):
-        self.consumer = AIOKafkaConsumer(
-            self.topic,
-            bootstrap_servers=self.bootstrap_servers,
-            value_serializer=lambda v: json.dumps(v).encode(),
-            group_id=group_id,
-        )
-        await self.consumer.start()
-
-    async def start_producer(self):
-        self.producer = AIOKafkaProducer(
-            bootstrap_servers=self.bootstrap_servers,
-            value_serializer=lambda v: json.dumps(v).encode(),
-        )
-        await self.producer.start()
-
-    @retry_on_exception(max_retries=3, retry_interval=5)
-    async def consume_messages(self):
-        if self.consumer is None:
-            raise ValueError("Consumer not started. Call start_consumer() first.")
-
-        async for message in self.consumer:
-            value = message.value
-            self.message_queue.put_nowait(value)
-
-    @retry_on_exception(max_retries=3, retry_interval=5)
-    async def produce_messages(self):
-        if self.producer is None:
-            raise ValueError("Producer not started. Call start_producer() first.")
-        while True:
-            message = await self.message_queue.get()
-            await self.producer.send(self.topic, value=message)
-
-    async def stop_consumer(self):
-        if self.consumer:
-            await self.consumer.stop()
-
-    async def stop_producer(self):
-        if self.producer:
-            await self.producer.stop()
-
-    def is_ready(self):
-        return self.consumer is not None or self.producer is not None
diff --git a/src/core/kafka/report.py b/src/core/kafka/report.py
deleted file mode 100644
index 1652f589..00000000
--- a/src/core/kafka/report.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from asyncio import Queue
-
-from src.core.config import settings
-
-from .engine import AioKafkaEngine
-
-report_queue = Queue()
-report_engine = AioKafkaEngine(
-    bootstrap_servers=[settings.KAFKA_HOST], topic="reports", message_queue=report_queue
-)
diff --git a/src/core/server.py b/src/core/server.py
index f34878f4..ebf1e7d5 100644
--- a/src/core/server.py
+++ b/src/core/server.py
@@ -1,13 +1,14 @@
 import asyncio
 import logging
+from asyncio import Queue
 
 from fastapi import FastAPI
 from fastapi.middleware import Middleware
 from fastapi.middleware.cors import CORSMiddleware
 
 from src import api
+from src.core.fastapi.dependencies import kafka
 from src.core.fastapi.middleware.logging import LoggingMiddleware
-from src.core.kafka.report import report_engine
 
 from . import logging_config  # needed for log formatting
 
@@ -57,28 +58,16 @@ async def root():
 
 @app.on_event("startup")
 async def startup_event():
-    global report_engine
     logger.info("startup initiated")
-    while True:
-        try:
-            await report_engine.start_producer()
-            logger.info("report_engine started")
-            break
-        except Exception as e:
-            logger.error(e)
-            await asyncio.sleep(5)
-            continue
-
-    while True:
-        if report_engine.is_ready():
-            asyncio.ensure_future(report_engine.produce_messages())
-            break
-        logger.info("not ready")
-        await asyncio.sleep(5)
+    producer = await kafka.kafka_producer(kafka.producer)
+    asyncio.create_task(
+        kafka.send_messages(
+            topic="report", producer=producer, send_queue=kafka.report_send_queue
+        )
+    )
 
 
 @app.on_event("shutdown")
 async def shutdown_event():
-    global report_engine
     logger.info("shutdown initiated")
-    await report_engine.stop_producer()
+    await kafka.producer.stop()
diff --git a/tests/test_feedback_api.py b/tests/test_feedback_api.py
index 2dfc08bc..dce35633 100644
--- a/tests/test_feedback_api.py
+++ b/tests/test_feedback_api.py
@@ -1,57 +1,168 @@
+import hashlib
+import json
+import random
+import string
 import unittest
+import uuid
 
+import hypothesis
 import requests
-from hypothesis import given, settings
+from hypothesis import assume, given, settings
 from hypothesis import strategies as st
 
 
 class TestFeedbackAPI(unittest.TestCase):
-    API_ENDPOINT = "http://localhost:5000/v2/player/feedback/score"
+    API_ENDPOINT_GET = "http://localhost:5000/v2/player/feedback/score"
+    API_ENDPOINT_POST = "http://localhost:5000/v2/feedback"
 
     # fmt: off
     PLAYER_IDS = [
-        3, 5, 19, 23, 26, 29, 30, 34, 34, 38, 39, 42, 42, 45, 46, 52, 52, 57, 57, 58,
-        58, 69, 74, 78, 79, 80, 81, 81, 82, 85, 92, 92, 95, 98, 98, 100, 108, 112, 112,
-        113, 114, 116, 121, 123, 123, 124, 134, 139, 141, 142, 146, 146, 149, 154, 156,
-        157, 158, 158, 161, 162, 166, 168, 171, 173, 178, 180, 181, 187, 190, 191, 195,
-        197, 199, 202, 202, 202, 204, 206, 207, 208, 212, 215, 220, 222, 222, 225, 226,
-        226, 233, 236, 242, 261, 264, 265, 266, 268, 268, 276, 277, 282
+        3, 5, 19, 23, 26, 29, 30, 34, 38, 39, 42, 45, 46, 52, 57, 58, 69, 74, 78, 79,
+        80, 81, 82, 85, 92, 95, 98, 100, 108, 112, 113, 114, 116, 121, 123, 124, 134,
+        139, 141, 142, 146, 149, 154, 156, 157, 158, 161, 162, 166, 168, 171, 173, 178,
+        180, 181, 187, 190, 191, 195, 197, 199, 202, 204, 206, 207, 208, 212, 215, 220,
+        222, 225, 226, 233, 236, 242, 261, 264, 265, 266, 268, 276, 277, 282
     ]
 
-    COMMON_LABELS = [
-        "Real_Player", "PVM_Melee_bot", "Smithing_bot", "Magic_bot", "Fishing_bot",
-        "Mining_bot", "Crafting_bot", "PVM_Ranged_Magic_bot", "Hunter_bot", "Fletching_bot",
-        "LMS_bot", "Agility_bot", "Wintertodt_bot", "Runecrafting_bot", "Zalcano_bot",
-        "Woodcutting_bot", "Thieving_bot", "Soul_Wars_bot", "Cooking_bot", "Vorkath_bot",
-        "Barrows_bot", "Herblore_bot", "Zlrah_bot", "Unknown_bot", "Something_else", "Unsure"
+    SUBJECT_IDS = [
+        2, 6, 7, 8, 9, 11, 12, 13, 14, 20, 21, 22, 24, 28, 31, 32, 33, 43, 44, 47, 63,
+        65, 71, 72, 83, 84, 89, 93, 94, 99, 102, 103, 104, 105, 109, 111, 117, 118, 125,
+        128, 131, 137, 150, 152, 153, 155, 159, 160, 163, 165, 167, 182, 183, 184, 189,
+        192, 198, 200, 201, 210, 217, 219, 224, 228, 232, 241, 243, 247
     ]
     # fmt: on
 
+    ANON_IDS = [301, 302, 303, 304, 305, 306, 307, 308, 309, 310]
+
+    COMMON_LABELS = ["real_player", "fishing_bot", "mining_bot"]
+
     # Define a Hypothesis strategy for player names
     PLAYERS = [f"player{i}" for i in PLAYER_IDS]
     PLAYER_NAME_STRATEGY = st.sampled_from(PLAYERS)
 
+    ANON = [
+        "anonymoususer 382e728f 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e7259 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e7221 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e71ee 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e71bb 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e7179 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e7133 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e70ef 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e7089 87ea 11ee aab6 0242ac120002",
+        "anonymoususer 382e6def 87ea 11ee aab6 0242ac120002",
+    ]
+
+    ANON_NAME_STRATEGY = st.sampled_from(ANON)
+
+    # define a Hypothesis strategy for subject ids
+    SUBJECT_ID_STRATEGY = st.sampled_from(SUBJECT_IDS)
+
+    # define a hypothesis strategy for annon subject ids
+    ANON_ID_STRATEGY = st.sampled_from(ANON_IDS)
+
+    # TODO: anon player must first exist
+    def test_post_feedback_valid_anon(self):
+        for player_name in self.ANON:
+            for subject_id in self.SUBJECT_IDS:
+                prediction = random.choice(self.COMMON_LABELS)
+                proposed_label = random.choice(self.COMMON_LABELS)
+                while proposed_label == prediction:
+                    proposed_label = random.choice(self.COMMON_LABELS)
+
+                data = {
+                    "player_name": player_name,
+                    "vote": random.randint(-1, 1),
+                    "prediction": prediction,
+                    "confidence": random.random(),
+                    "subject_id": subject_id,
+                    "feedback_text": "".join(
+                        random.choices(
+                            string.ascii_letters + string.digits,
+                            k=random.randint(0, 250),
+                        )
+                    ),
+                    "proposed_label": proposed_label,
+                }
+
+                # Send the POST request
+                response = requests.post(url=self.API_ENDPOINT_POST, json=data)
+
+                if response.status_code != 201:
+                    print({"status": response.status_code})
+                    print({"data": data, "response": response.json()})
+
+                    # Assert that the response is as expected
+                self.assertEqual(
+                    first=response.status_code,
+                    second=201,
+                    msg=f"expected: 201, received: {response.status_code}, data: {json.dumps(data)}",
+                )
+
+    @given(
+        player_name=st.text(min_size=1, max_size=13),  # PLAYER_NAME_STRATEGY,
+        vote=st.integers(min_value=-1, max_value=1),
+        prediction=st.sampled_from(COMMON_LABELS),
+        confidence=st.floats(min_value=0, max_value=1),
+        subject_id=SUBJECT_ID_STRATEGY,
+        feedback_text=st.text(min_size=0, max_size=250),
+        proposed_label=st.sampled_from(COMMON_LABELS),
+    )
+    def test_post_feedback_invalid_player(
+        self,
+        player_name,
+        vote,
+        prediction,
+        confidence,
+        subject_id,
+        feedback_text,
+        proposed_label,
+    ):
+        assume(prediction != proposed_label)
+        # Define the data to send
+        data = {
+            "player_name": player_name,
+            "vote": vote,
+            "prediction": prediction,
+            "confidence": confidence,
+            "subject_id": subject_id,
+            "feedback_text": feedback_text,
+            "proposed_label": proposed_label,
+        }
+
+        # Send the POST request
+        response = requests.post(url=self.API_ENDPOINT_POST, json=data)
+
+        debug_data = {
+            "data": data,
+            "response_status": response.status_code,
+            "response": response.text,
+        }
+
+        # Assert that the response is as expected
+        error_msg = f"expected status code: <422>, received: <{response.status_code}>, {json.dumps(debug_data)}"
+        self.assertEqual(first=response.status_code, second=422, msg=error_msg)
+
     # Test valid players and check if feedback scores are returned
     @settings(deadline=500)
     @given(valid_player_names=st.lists(PLAYER_NAME_STRATEGY, min_size=1, max_size=5))
     def test_valid_players(self, valid_player_names):
         params = {"name": valid_player_names}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
-
-        # Check if the response status code is 200
-        if response.status_code != 200:
-            print({"status": response.status_code})
-            print({"params": params, "response": response.json()})
+        response = requests.get(url=self.API_ENDPOINT_GET, params=params)
 
         # Check that the response contains report scores for all specified players
         json_data = response.json()
-        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            first=response.status_code,
+            second=200,
+            msg=f"Expected 200, received: {response.status_code}",  # TODO:improve
+        )
 
         error = "List is empty"
-        assert len(json_data) > 0, error
+        self.assertGreater(len(json_data), 0, msg=error)
 
         error = "Not all items in the list are dictionaries"
-        assert all(isinstance(item, dict) for item in json_data), error
+        self.assertTrue(all(isinstance(item, dict) for item in json_data), msg=error)
 
     @given(
         invalid_player_names=st.lists(
@@ -60,16 +171,10 @@ def test_valid_players(self, valid_player_names):
     )
     def test_invalid_players(self, invalid_player_names):
         params = {"name": invalid_player_names}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
-
-        # Check if the response status code is 200
-        if response.status_code != 200:
-            print({"status": response.status_code})
-            print({"params": params, "response": response.json()})
+        response = requests.get(url=self.API_ENDPOINT_GET, params=params)
 
         # Check that the response is an empty list
         self.assertEqual(response.status_code, 200)
-        assert response.json() == []
 
 
 if __name__ == "__main__":
diff --git a/tests/test_player_api.py b/tests/test_player_api.py
index c303f92d..13515f92 100644
--- a/tests/test_player_api.py
+++ b/tests/test_player_api.py
@@ -1,34 +1,37 @@
 import unittest
+from unittest import TestCase
 
+import hypothesis.strategies as st
 import requests
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
 
 class TestPlayerAPI(unittest.TestCase):
-    API_ENDPOINT = "http://localhost:5000/v2/player/report/score"
+    API_ENDPOINT_REPORT = "http://localhost:5000/v2/player/report/score"
+    API_ENDPOINT_PREDICTION = "http://localhost:5000/v2/player/prediction"
 
     # fmt: off
-    PLAYER_IDS = [
-        3, 5, 19, 23, 26, 29, 30, 34, 34, 38, 39, 42, 42, 45, 46, 52, 52, 57, 57, 58,
-        58, 69, 74, 78, 79, 80, 81, 81, 82, 85, 92, 92, 95, 98, 98, 100, 108, 112, 112,
-        113, 114, 116, 121, 123, 123, 124, 134, 139, 141, 142, 146, 146, 149, 154, 156,
-        157, 158, 158, 161, 162, 166, 168, 171, 173, 178, 180, 181, 187, 190, 191, 195,
-        197, 199, 202, 202, 202, 204, 206, 207, 208, 212, 215, 220, 222, 222, 225, 226,
-        226, 233, 236, 242, 261, 264, 265, 266, 268, 268, 276, 277, 282
+    REPORT_IDS = [
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 24, 30, 47, 86,
+        91, 126, 149, 183, 184, 194, 217, 249, 272, 284
     ]
     # fmt: on
 
     # Define a Hypothesis strategy for player names
-    PLAYERS = [f"player{i}" for i in PLAYER_IDS]
-    PLAYER_NAME_STRATEGY = st.sampled_from(PLAYERS)
+    def name_strategy(player_ids):
+        players = [f"player{i}" for i in player_ids]
+        return players
+
+    REPORTS = name_strategy(REPORT_IDS)
+    REPORT_NAME_STRATEGY = st.sampled_from(REPORTS)
 
     # Test valid players and check if report scores are returned
     @settings(deadline=500)
-    @given(valid_player_names=st.lists(PLAYER_NAME_STRATEGY, min_size=1, max_size=5))
-    def test_valid_players(self, valid_player_names):
+    @given(valid_player_names=st.lists(REPORT_NAME_STRATEGY, min_size=1, max_size=5))
+    def test_report_valid_players(self, valid_player_names):
         params = {"name": valid_player_names}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
+        response = requests.get(url=self.API_ENDPOINT_REPORT, params=params)
 
         # Check if the response status code is 200
         if response.status_code != 200:
@@ -51,9 +54,9 @@ def test_valid_players(self, valid_player_names):
             st.text(min_size=1, max_size=13), min_size=1, max_size=5
         )
     )
-    def test_invalid_players(self, invalid_player_names):
+    def test_report_invalid_players(self, invalid_player_names):
         params = {"name": invalid_player_names}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
+        response = requests.get(url=self.API_ENDPOINT_REPORT, params=params)
 
         # Check if the response status code is 200
         if response.status_code != 200:
@@ -64,6 +67,65 @@ def test_invalid_players(self, invalid_player_names):
         self.assertEqual(response.status_code, 200)
         assert response.json() == []
 
+    # multi valid check returns list[dict]
+    @settings(deadline=500)  # Increase the deadline to 500 milliseconds
+    @given(
+        prediction_tuple=st.tuples(
+            st.lists(REPORT_NAME_STRATEGY, min_size=1, max_size=5),
+            st.booleans(),
+        )
+    )
+    def test_prediction_valid_players(self, prediction_tuple):
+        player_names_count_valid, breakdown = prediction_tuple
+
+        params = {"name": player_names_count_valid, "breakdown": breakdown}
+        response = requests.get(url=self.API_ENDPOINT_PREDICTION, params=params)
+
+        # Check if the response status code is 200
+        if response.status_code != 200:
+            print({"status": response.status_code})
+            print({"params": params, "response": response.json()})
+
+        # Check that the response contains report scores for all specified players
+        json_data: list[dict] = response.json()
+        self.assertEqual(response.status_code, 200)
+
+        error = "List is empty"
+        assert len(json_data) > 0, error
+
+        error = "Not all items in the list are dictionaries"
+        assert all(isinstance(item, dict) for item in json_data), error
+
+        for item in json_data:
+            predictions_breakdown = item.get("predictions_breakdown", {})
+            if breakdown:
+                error = "'predictions_breakdown' is not a populated dictionary"
+                assert predictions_breakdown, error
+            else:
+                error = "'predictions_breakdown' is not an empty dictionary"
+                assert not predictions_breakdown, error
+
+    # invalid player(s) check returns empty list
+    @given(
+        prediction_tuple=st.tuples(
+            st.lists(st.text(min_size=1, max_size=13), min_size=1, max_size=5),
+            st.booleans(),
+        )
+    )
+    def test__prediction_invalid_players(self, prediction_tuple):
+        player_names_count_invalid, breakdown = prediction_tuple
+
+        params = {"name": player_names_count_invalid, "breakdown": breakdown}
+        response = requests.get(url=self.API_ENDPOINT_PREDICTION, params=params)
+
+        if response.status_code != 404:
+            print({"status": response.status_code})
+            print({"params": params, "response": response.json()})
+
+        self.assertEqual(response.status_code, 404)
+
+        self.assertEqual(response.json(), {"detail": "Player not found"})
+
 
 # Run the tests
 if __name__ == "__main__":
diff --git a/tests/test_prediction_api.py b/tests/test_prediction_api.py
deleted file mode 100644
index e1dbdce9..00000000
--- a/tests/test_prediction_api.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from unittest import TestCase
-
-import hypothesis.strategies as st
-import requests
-from hypothesis import given, settings
-
-
-class TestPredictionAPI(TestCase):
-    API_ENDPOINT = "http://localhost:5000/v2/player/prediction"
-
-    # fmt: off
-    PLAYER_IDS = [
-        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24,
-        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 41, 43, 44, 45, 46, 47, 48,
-        50, 51, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73,
-        76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
-        98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 113, 114, 115
-    ]
-    # fmt: on
-
-    # Define a Hypothesis strategy for player names
-    PLAYERS = [f"player{i}" for i in PLAYER_IDS]
-    PLAYER_NAME_STRATEGY = st.sampled_from(PLAYERS)
-
-    # multi valid check returns list[dict]
-    @settings(deadline=500)  # Increase the deadline to 500 milliseconds
-    @given(
-        prediction_tuple=st.tuples(
-            st.lists(PLAYER_NAME_STRATEGY, min_size=1, max_size=5),
-            st.booleans(),
-        )
-    )
-    def test_valid_players(self, prediction_tuple):
-        player_names_count_valid, breakdown = prediction_tuple
-
-        params = {"name": player_names_count_valid, "breakdown": breakdown}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
-
-        # Check if the response status code is 200
-        if response.status_code != 200:
-            print({"status": response.status_code})
-            print({"params": params, "response": response.json()})
-
-        # Check that the response contains report scores for all specified players
-        json_data: list[dict] = response.json()
-        self.assertEqual(response.status_code, 200)
-
-        error = "List is empty"
-        assert len(json_data) > 0, error
-
-        error = "Not all items in the list are dictionaries"
-        assert all(isinstance(item, dict) for item in json_data), error
-
-        for item in json_data:
-            predictions_breakdown = item.get("predictions_breakdown", {})
-            if breakdown:
-                error = "'predictions_breakdown' is not a populated dictionary"
-                assert predictions_breakdown, error
-            else:
-                error = "'predictions_breakdown' is not an empty dictionary"
-                assert not predictions_breakdown, error
-
-    # invalid player(s) check returns empty list
-    @given(
-        prediction_tuple=st.tuples(
-            st.lists(st.text(min_size=1, max_size=13), min_size=1, max_size=5),
-            st.booleans(),
-        )
-    )
-    def test_invalid_players(self, prediction_tuple):
-        player_names_count_invalid, breakdown = prediction_tuple
-
-        params = {"name": player_names_count_invalid, "breakdown": breakdown}
-        response = requests.get(url=self.API_ENDPOINT, params=params)
-
-        if response.status_code != 404:
-            print({"status": response.status_code})
-            print({"params": params, "response": response.json()})
-
-        self.assertEqual(response.status_code, 404)
-
-        self.assertEqual(response.json(), {"detail": "Player not found"})
diff --git a/tests/test_report_api.py b/tests/test_report_api.py
index 4558d86f..f119bf45 100644
--- a/tests/test_report_api.py
+++ b/tests/test_report_api.py
@@ -25,7 +25,7 @@
 unique_reporter = str(uuid4()).replace("-", "")[:13]
 
 
-class TestPostReportAPI(unittest.TestCase):
+class TestReportAPI(unittest.TestCase):
     API_ENDPOINT = "http://localhost:5000/v2/report"
 
     @given(