Skip to content

Commit

Permalink
Slight ParsedRawReport cleanup
Browse files Browse the repository at this point in the history
Removes a bunch of has/get methods in favor of accessing fields directly.
Removes the `env` field completely.
Avoids a `BytesIO` indirection in the renamed `as_readable` method.
  • Loading branch information
Swatinem committed Dec 3, 2024
1 parent dcf7304 commit 3ba6405
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 233 deletions.
3 changes: 2 additions & 1 deletion services/path_fixer/fixpaths.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def clean_toc(toc: str) -> list[str]:
continue

# This path is good; save it.
rv.append(path)
if path:
rv.append(path)

return rv
55 changes: 28 additions & 27 deletions services/path_fixer/tests/unit/test_fixpaths.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from services.path_fixer import fixpaths
from test_utils.base import BaseTestCase

# Hand-written TOCs.
paths = [
Expand All @@ -24,33 +23,35 @@
}


class TestFixpaths(BaseTestCase):
@pytest.mark.parametrize("toc, result", paths)
def test_clean_toc(self, toc, result):
assert fixpaths.clean_toc(toc) == result

def test_clean_toc_with_space(self):
assert fixpaths.clean_toc("a\\ b") == ["a b"]

@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
def test_unquote_git_path(self, path, result):
assert fixpaths.unquote_git_path(path) == result

def test_some_real_git_paths(self):
prefix = "services/path_fixer/tests/testdir"
filenames = [
"café.txt",
"comma,txt",
"🍭.txt",
'fixture/get_breakdown_values_escaped_".json',
]
joined = [os.path.join(prefix, filename) for filename in filenames]
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
@pytest.mark.parametrize("toc, result", paths)
def test_clean_toc(toc, result):
assert fixpaths.clean_toc(toc) == result


def test_clean_toc_with_space():
assert fixpaths.clean_toc("a\\ b") == ["a b"]


@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
def test_unquote_git_path(path, result):
assert fixpaths.unquote_git_path(path) == result


def test_some_real_git_paths():
prefix = "services/path_fixer/tests/testdir"
filenames = [
"café.txt",
"comma,txt",
"🍭.txt",
'fixture/get_breakdown_values_escaped_".json',
]
joined = [os.path.join(prefix, filename) for filename in filenames]
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
services/path_fixer/tests/testdir/comma,txt
"services/path_fixer/tests/testdir/\\360\\237\\215\\255.txt"
"services/path_fixer/tests/testdir/fixture/get_breakdown_values_escaped_\\".json"
"""
cleaned = fixpaths.clean_toc(toc)
joined.sort()
cleaned.sort()
assert joined == cleaned
cleaned = fixpaths.clean_toc(toc)
joined.sort()
cleaned.sort()
assert joined == cleaned
5 changes: 1 addition & 4 deletions services/processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,4 @@ def rewrite_or_delete_upload(

elif isinstance(report_info.raw_report, VersionOneParsedRawReport):
# only a version 1 report needs to be "rewritten readable"

archive_service.write_file(
archive_url, report_info.raw_report.content().getvalue()
)
archive_service.write_file(archive_url, report_info.raw_report.as_readable())

Check warning on line 102 in services/processing/processing.py

View check run for this annotation

Codecov Notifications / codecov/patch

services/processing/processing.py#L102

Added line #L102 was not covered by tests
12 changes: 2 additions & 10 deletions services/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,14 +539,8 @@ def parse_raw_report_from_storage(
"""
archive_service = self.get_archive_service(repo)
archive_url = upload.storage_path

log.info(
"Parsing the raw report from storage",
extra=dict(
commit=upload.report.commit_id,
repoid=repo.repoid,
archive_url=archive_url,
),
"Parsing the raw report from storage", extra=dict(archive_url=archive_url)
)

archive_file = archive_service.read_file(archive_url)
Expand All @@ -559,13 +553,11 @@ def parse_raw_report_from_storage(

raw_uploaded_report = parser.parse_raw_report_from_bytes(archive_file)

raw_report_count = len(raw_uploaded_report.get_uploaded_files())
raw_report_count = len(raw_uploaded_report.uploaded_files)
if raw_report_count < 1:
log.warning(
"Raw upload contains no uploaded files",
extra=dict(
commit=upload.report.commit_id,
repoid=repo.repoid,
raw_report_count=raw_report_count,
upload_version=upload_version,
archive_url=archive_url,
Expand Down
37 changes: 16 additions & 21 deletions services/report/parser/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import sentry_sdk

from services.path_fixer.fixpaths import clean_toc
from services.report.parser.types import LegacyParsedRawReport, ParsedUploadedReportFile


Expand Down Expand Up @@ -67,7 +68,6 @@ def cut_sections(self, raw_report: bytes):
This function takes the proper steps to find all the relevant sections of a report:
- toc: the 'network', list of files present on this report
- env: the envvars the user set on the upload
- uploaded_files: the actual report files
- report_fixes: the report fixes some languages need
Expand Down Expand Up @@ -99,8 +99,8 @@ def cut_sections(self, raw_report: bytes):
while i_start < i_end and raw_report[i_start] in whitespaces:
i_start += 1
yield {
"contents": raw_report[i_start:i_end],
"filename": filename,
"contents": raw_report[i_start:i_end],
"footer": separator,
}

Expand All @@ -110,32 +110,27 @@ def parse_raw_report_from_bytes(self, raw_report: bytes) -> LegacyParsedRawRepor
self.ignore_from_now_on_marker
)
sections = self.cut_sections(raw_report)
res = self._generate_parsed_report_from_sections(sections)
return res
return self._generate_parsed_report_from_sections(sections)

def _generate_parsed_report_from_sections(self, sections):
uploaded_files = []
toc_section = None
env_section = None
report_fixes_section = None
toc = None
report_fixes = None
for sect in sections:
if sect["footer"] == self.network_separator:
toc_section = sect["contents"]
toc = clean_toc(sect["contents"].decode(errors="replace").strip())
elif sect["footer"] == self.env_separator:
env_section = sect["contents"]
pass
elif sect["filename"] == "fixes":
report_fixes = sect["contents"]
else:
if sect["filename"] == "fixes":
report_fixes_section = sect["contents"]
else:
uploaded_files.append(
ParsedUploadedReportFile(
filename=sect.get("filename"),
file_contents=sect["contents"],
)
)
file = ParsedUploadedReportFile(
filename=sect["filename"], file_contents=sect["contents"]
)
uploaded_files.append(file)

return LegacyParsedRawReport(
toc=toc_section,
env=env_section,
toc=toc or [],
uploaded_files=uploaded_files,
report_fixes=report_fixes_section,
report_fixes=report_fixes,
)
18 changes: 8 additions & 10 deletions services/report/parser/tests/unit/test_version_one_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from services.report.parser.version_one import (
ParsedUploadedReportFile,
VersionOneReportParser,
_parse_coverage_file_contents,
)

input_data = b"""{
Expand Down Expand Up @@ -47,7 +48,6 @@
def test_version_one_parser():
subject = VersionOneReportParser()
res = subject.parse_raw_report_from_bytes(input_data)
assert res.get_env() is None
assert res.get_report_fixes(None) == {
"SwiftExample/AppDelegate.swift": {
"eof": 15,
Expand All @@ -58,13 +58,13 @@ def test_version_one_parser():
"lines": [1, 17, 3, 22, 7, 9, 12, 14],
},
}
assert res.get_toc() == [
assert res.toc == [
"path/to/file1.c",
"path/from/another.cpp",
"path/from/aaaaaa.cpp",
]
assert len(res.get_uploaded_files()) == 2
first_file, second_file = res.get_uploaded_files()
assert len(res.uploaded_files) == 2
first_file, second_file = res.uploaded_files
assert isinstance(first_file, ParsedUploadedReportFile)
assert first_file.filename == "coverage.xml"
assert (
Expand All @@ -82,28 +82,26 @@ def test_version_one_parser():
assert second_file.labels == ["simple", "a.py::fileclass::test_simple"]

assert (
res.content().getvalue().decode("utf-8")
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n"
res.as_readable()
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode()}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode()}\n<<<<<< EOF\n\n".encode()
)


def test_version_one_parser_parse_coverage_file_contents_bad_format():
subject = VersionOneReportParser()
coverage_file = {"format": "unknown", "data": b"simple", "filename": "filename.py"}
assert subject._parse_coverage_file_contents(coverage_file) == b"simple"
assert _parse_coverage_file_contents(coverage_file) == b"simple"


def test_version_one_parser_parse_coverage_file_contents_base64_zip_format():
original_input = b"some_cool_string right \n here"
formatted_input = base64.b64encode(zlib.compress(original_input))
# An assert for the sake of showing the result
assert formatted_input == b"eJwrzs9NjU/Oz8+JLy4pysxLVyjKTM8oUeBSyEgtSgUArOcK4w=="
subject = VersionOneReportParser()
coverage_file = {
"format": "base64+compressed",
"data": formatted_input,
"filename": "filename.py",
}
res = subject._parse_coverage_file_contents(coverage_file)
res = _parse_coverage_file_contents(coverage_file)
assert isinstance(res, bytes)
assert res == b"some_cool_string right \n here"
74 changes: 22 additions & 52 deletions services/report/parser/types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from io import BytesIO
from typing import Any

from services.path_fixer.fixpaths import clean_toc
from services.report.fixes import get_fixes_from_raw


Expand Down Expand Up @@ -30,8 +29,6 @@ class ParsedRawReport(object):
toc
table of contents, this lists the files relevant to the report,
i.e. the files contained in the repository
env
list of env vars in environment of uploader (legacy only)
uploaded_files
list of class ParsedUploadedReportFile describing uploaded coverage files
report_fixes
Expand All @@ -41,41 +38,31 @@ class ParsedRawReport(object):

def __init__(
self,
toc: Any,
env: Any,
toc: list[str],
uploaded_files: list[ParsedUploadedReportFile],
report_fixes: Any,
):
self.toc = toc
self.env = env
self.uploaded_files = uploaded_files
self.report_fixes = report_fixes

def has_toc(self) -> bool:
return self.toc is not None

def has_env(self) -> bool:
return self.env is not None

def has_report_fixes(self) -> bool:
return self.report_fixes is not None

@property
def size(self):
return sum(f.size for f in self.uploaded_files)

def content(self) -> BytesIO:
buffer = BytesIO()
if self.has_toc():
for file in self.get_toc():
buffer.write(f"{file}\n".encode("utf-8"))
buffer.write(b"<<<<<< network\n\n")
for file in self.uploaded_files:
buffer.write(f"# path={file.filename}\n".encode("utf-8"))
buffer.write(file.contents)
buffer.write(b"\n<<<<<< EOF\n\n")
buffer.seek(0)
return buffer

class LegacyParsedRawReport(ParsedRawReport):
"""
report_fixes : bytes
<filename>:<line number>,<line number>,...
"""

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
report_fixes = self.report_fixes.decode(errors="replace")
return get_fixes_from_raw(report_fixes, path_fixer)


class VersionOneParsedRawReport(ParsedRawReport):
Expand All @@ -90,34 +77,17 @@ class VersionOneParsedRawReport(ParsedRawReport):
}
"""

def get_toc(self) -> list[str]:
return self.toc

def get_env(self):
return self.env

def get_uploaded_files(self):
return self.uploaded_files

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
return self.report_fixes


class LegacyParsedRawReport(ParsedRawReport):
"""
report_fixes : BinaryIO
<filename>:<line number>,<line number>,...
"""

def get_toc(self) -> list[str]:
return clean_toc(self.toc.decode(errors="replace").strip())

def get_env(self):
return self.env.decode(errors="replace")

def get_uploaded_files(self):
return self.uploaded_files

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
report_fixes = self.report_fixes.decode(errors="replace")
return get_fixes_from_raw(report_fixes, path_fixer)
def as_readable(self) -> bytes:
buffer = b""
if self.toc:
for path in self.toc:
buffer += f"{path}\n".encode()
buffer += b"<<<<<< network\n\n"
for file in self.uploaded_files:
buffer += f"# path={file.filename}\n".encode()
buffer += file.contents
buffer += b"\n<<<<<< EOF\n\n"
return buffer
Loading

0 comments on commit 3ba6405

Please sign in to comment.