Skip to content

Commit

Permalink
Revert "Slight ParsedRawReport cleanup" (#935)
Browse files Browse the repository at this point in the history
  • Loading branch information
trent-codecov authored Dec 3, 2024
1 parent 2cbfc8c commit 3eed85d
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 148 deletions.
3 changes: 1 addition & 2 deletions services/path_fixer/fixpaths.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def clean_toc(toc: str) -> list[str]:
continue

# This path is good; save it.
if path:
rv.append(path)
rv.append(path)

return rv
55 changes: 27 additions & 28 deletions services/path_fixer/tests/unit/test_fixpaths.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from services.path_fixer import fixpaths
from test_utils.base import BaseTestCase

# Hand-written TOCs.
paths = [
Expand All @@ -23,35 +24,33 @@
}


@pytest.mark.parametrize("toc, result", paths)
def test_clean_toc(toc, result):
assert fixpaths.clean_toc(toc) == result


def test_clean_toc_with_space():
assert fixpaths.clean_toc("a\\ b") == ["a b"]


@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
def test_unquote_git_path(path, result):
assert fixpaths.unquote_git_path(path) == result


def test_some_real_git_paths():
prefix = "services/path_fixer/tests/testdir"
filenames = [
"café.txt",
"comma,txt",
"🍭.txt",
'fixture/get_breakdown_values_escaped_".json',
]
joined = [os.path.join(prefix, filename) for filename in filenames]
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
class TestFixpaths(BaseTestCase):
@pytest.mark.parametrize("toc, result", paths)
def test_clean_toc(self, toc, result):
assert fixpaths.clean_toc(toc) == result

def test_clean_toc_with_space(self):
assert fixpaths.clean_toc("a\\ b") == ["a b"]

@pytest.mark.parametrize("path, result", list(unquoted_files.items()))
def test_unquote_git_path(self, path, result):
assert fixpaths.unquote_git_path(path) == result

def test_some_real_git_paths(self):
prefix = "services/path_fixer/tests/testdir"
filenames = [
"café.txt",
"comma,txt",
"🍭.txt",
'fixture/get_breakdown_values_escaped_".json',
]
joined = [os.path.join(prefix, filename) for filename in filenames]
toc = """"services/path_fixer/tests/testdir/caf\\303\\251.txt"
services/path_fixer/tests/testdir/comma,txt
"services/path_fixer/tests/testdir/\\360\\237\\215\\255.txt"
"services/path_fixer/tests/testdir/fixture/get_breakdown_values_escaped_\\".json"
"""
cleaned = fixpaths.clean_toc(toc)
joined.sort()
cleaned.sort()
assert joined == cleaned
cleaned = fixpaths.clean_toc(toc)
joined.sort()
cleaned.sort()
assert joined == cleaned
5 changes: 4 additions & 1 deletion services/processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,7 @@ def rewrite_or_delete_upload(

elif isinstance(report_info.raw_report, VersionOneParsedRawReport):
# only a version 1 report needs to be "rewritten readable"
archive_service.write_file(archive_url, report_info.raw_report.as_readable())

archive_service.write_file(
archive_url, report_info.raw_report.content().getvalue()
)
12 changes: 10 additions & 2 deletions services/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,14 @@ def parse_raw_report_from_storage(
"""
archive_service = self.get_archive_service(repo)
archive_url = upload.storage_path

log.info(
"Parsing the raw report from storage", extra=dict(archive_url=archive_url)
"Parsing the raw report from storage",
extra=dict(
commit=upload.report.commit_id,
repoid=repo.repoid,
archive_url=archive_url,
),
)

archive_file = archive_service.read_file(archive_url)
Expand All @@ -553,11 +559,13 @@ def parse_raw_report_from_storage(

raw_uploaded_report = parser.parse_raw_report_from_bytes(archive_file)

raw_report_count = len(raw_uploaded_report.uploaded_files)
raw_report_count = len(raw_uploaded_report.get_uploaded_files())
if raw_report_count < 1:
log.warning(
"Raw upload contains no uploaded files",
extra=dict(
commit=upload.report.commit_id,
repoid=repo.repoid,
raw_report_count=raw_report_count,
upload_version=upload_version,
archive_url=archive_url,
Expand Down
37 changes: 21 additions & 16 deletions services/report/parser/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import sentry_sdk

from services.path_fixer.fixpaths import clean_toc
from services.report.parser.types import LegacyParsedRawReport, ParsedUploadedReportFile


Expand Down Expand Up @@ -68,6 +67,7 @@ def cut_sections(self, raw_report: bytes):
This function takes the proper steps to find all the relevant sections of a report:
- toc: the 'network', list of files present on this report
- env: the envvars the user set on the upload
- uploaded_files: the actual report files
- report_fixes: the report fixes some languages need
Expand Down Expand Up @@ -99,8 +99,8 @@ def cut_sections(self, raw_report: bytes):
while i_start < i_end and raw_report[i_start] in whitespaces:
i_start += 1
yield {
"filename": filename,
"contents": raw_report[i_start:i_end],
"filename": filename,
"footer": separator,
}

Expand All @@ -110,27 +110,32 @@ def parse_raw_report_from_bytes(self, raw_report: bytes) -> LegacyParsedRawRepor
self.ignore_from_now_on_marker
)
sections = self.cut_sections(raw_report)
return self._generate_parsed_report_from_sections(sections)
res = self._generate_parsed_report_from_sections(sections)
return res

def _generate_parsed_report_from_sections(self, sections):
uploaded_files = []
toc = None
report_fixes = None
toc_section = None
env_section = None
report_fixes_section = None
for sect in sections:
if sect["footer"] == self.network_separator:
toc = clean_toc(sect["contents"].decode(errors="replace").strip())
toc_section = sect["contents"]
elif sect["footer"] == self.env_separator:
pass
elif sect["filename"] == "fixes":
report_fixes = sect["contents"]
env_section = sect["contents"]
else:
file = ParsedUploadedReportFile(
filename=sect["filename"], file_contents=sect["contents"]
)
uploaded_files.append(file)

if sect["filename"] == "fixes":
report_fixes_section = sect["contents"]
else:
uploaded_files.append(
ParsedUploadedReportFile(
filename=sect.get("filename"),
file_contents=sect["contents"],
)
)
return LegacyParsedRawReport(
toc=toc or [],
toc=toc_section,
env=env_section,
uploaded_files=uploaded_files,
report_fixes=report_fixes,
report_fixes=report_fixes_section,
)
18 changes: 10 additions & 8 deletions services/report/parser/tests/unit/test_version_one_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from services.report.parser.version_one import (
ParsedUploadedReportFile,
VersionOneReportParser,
_parse_coverage_file_contents,
)

input_data = b"""{
Expand Down Expand Up @@ -48,6 +47,7 @@
def test_version_one_parser():
subject = VersionOneReportParser()
res = subject.parse_raw_report_from_bytes(input_data)
assert res.get_env() is None
assert res.get_report_fixes(None) == {
"SwiftExample/AppDelegate.swift": {
"eof": 15,
Expand All @@ -58,13 +58,13 @@ def test_version_one_parser():
"lines": [1, 17, 3, 22, 7, 9, 12, 14],
},
}
assert res.toc == [
assert res.get_toc() == [
"path/to/file1.c",
"path/from/another.cpp",
"path/from/aaaaaa.cpp",
]
assert len(res.uploaded_files) == 2
first_file, second_file = res.uploaded_files
assert len(res.get_uploaded_files()) == 2
first_file, second_file = res.get_uploaded_files()
assert isinstance(first_file, ParsedUploadedReportFile)
assert first_file.filename == "coverage.xml"
assert (
Expand All @@ -82,26 +82,28 @@ def test_version_one_parser():
assert second_file.labels == ["simple", "a.py::fileclass::test_simple"]

assert (
res.as_readable()
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode()}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode()}\n<<<<<< EOF\n\n".encode()
res.content().getvalue().decode("utf-8")
== f"path/to/file1.c\npath/from/another.cpp\npath/from/aaaaaa.cpp\n<<<<<< network\n\n# path=coverage.xml\n{first_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n# path=another.coverage.json\n{second_file.contents.decode('utf-8')}\n<<<<<< EOF\n\n"
)


def test_version_one_parser_parse_coverage_file_contents_bad_format():
subject = VersionOneReportParser()
coverage_file = {"format": "unknown", "data": b"simple", "filename": "filename.py"}
assert _parse_coverage_file_contents(coverage_file) == b"simple"
assert subject._parse_coverage_file_contents(coverage_file) == b"simple"


def test_version_one_parser_parse_coverage_file_contents_base64_zip_format():
original_input = b"some_cool_string right \n here"
formatted_input = base64.b64encode(zlib.compress(original_input))
# An assert for the sake of showing the result
assert formatted_input == b"eJwrzs9NjU/Oz8+JLy4pysxLVyjKTM8oUeBSyEgtSgUArOcK4w=="
subject = VersionOneReportParser()
coverage_file = {
"format": "base64+compressed",
"data": formatted_input,
"filename": "filename.py",
}
res = _parse_coverage_file_contents(coverage_file)
res = subject._parse_coverage_file_contents(coverage_file)
assert isinstance(res, bytes)
assert res == b"some_cool_string right \n here"
74 changes: 52 additions & 22 deletions services/report/parser/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from io import BytesIO
from typing import Any

from services.path_fixer.fixpaths import clean_toc
from services.report.fixes import get_fixes_from_raw


Expand Down Expand Up @@ -29,6 +30,8 @@ class ParsedRawReport(object):
toc
table of contents, this lists the files relevant to the report,
i.e. the files contained in the repository
env
list of env vars in environment of uploader (legacy only)
uploaded_files
list of class ParsedUploadedReportFile describing uploaded coverage files
report_fixes
Expand All @@ -38,31 +41,41 @@ class ParsedRawReport(object):

def __init__(
self,
toc: list[str],
toc: Any,
env: Any,
uploaded_files: list[ParsedUploadedReportFile],
report_fixes: Any,
):
self.toc = toc
self.env = env
self.uploaded_files = uploaded_files
self.report_fixes = report_fixes

def has_toc(self) -> bool:
return self.toc is not None

def has_env(self) -> bool:
return self.env is not None

def has_report_fixes(self) -> bool:
return self.report_fixes is not None

@property
def size(self):
return sum(f.size for f in self.uploaded_files)


class LegacyParsedRawReport(ParsedRawReport):
"""
report_fixes : bytes
<filename>:<line number>,<line number>,...
"""

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
report_fixes = self.report_fixes.decode(errors="replace")
return get_fixes_from_raw(report_fixes, path_fixer)
def content(self) -> BytesIO:
buffer = BytesIO()
if self.has_toc():
for file in self.get_toc():
buffer.write(f"{file}\n".encode("utf-8"))
buffer.write(b"<<<<<< network\n\n")
for file in self.uploaded_files:
buffer.write(f"# path={file.filename}\n".encode("utf-8"))
buffer.write(file.contents)
buffer.write(b"\n<<<<<< EOF\n\n")
buffer.seek(0)
return buffer


class VersionOneParsedRawReport(ParsedRawReport):
Expand All @@ -77,17 +90,34 @@ class VersionOneParsedRawReport(ParsedRawReport):
}
"""

def get_toc(self) -> list[str]:
return self.toc

def get_env(self):
return self.env

def get_uploaded_files(self):
return self.uploaded_files

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
return self.report_fixes

def as_readable(self) -> bytes:
buffer = b""
if self.toc:
for path in self.toc:
buffer += f"{path}\n".encode()
buffer += b"<<<<<< network\n\n"
for file in self.uploaded_files:
buffer += f"# path={file.filename}\n".encode()
buffer += file.contents
buffer += b"\n<<<<<< EOF\n\n"
return buffer

class LegacyParsedRawReport(ParsedRawReport):
"""
report_fixes : BinaryIO
<filename>:<line number>,<line number>,...
"""

def get_toc(self) -> list[str]:
return clean_toc(self.toc.decode(errors="replace").strip())

def get_env(self):
return self.env.decode(errors="replace")

def get_uploaded_files(self):
return self.uploaded_files

def get_report_fixes(self, path_fixer) -> dict[str, dict[str, Any]]:
report_fixes = self.report_fixes.decode(errors="replace")
return get_fixes_from_raw(report_fixes, path_fixer)
Loading

0 comments on commit 3eed85d

Please sign in to comment.