diff --git a/ReleaseHistory.md b/ReleaseHistory.md index e34c4c66d..2a0210306 100644 --- a/ReleaseHistory.md +++ b/ReleaseHistory.md @@ -10,7 +10,6 @@ * BUG: Fix `UnsupportedOperationException` in `ZipArchiveArtifact`. * BUG: Fix `MultithreadedAnalyzeCommandBase` to return rich return code with the `--rich-return-code` option. * NEW: Add `IsBinary` property to `IEnumeratedArtifact` and implement the property in `ZipArchiveArtifact`. -* NEW: Switch to content-based `IsBinary` categorization for `ZipArchiveArtifact`s. * PRF: Change default `max-file-size-in-kb` parameter to 10 megabytes. * PRF: Add support for efficiently peeking into non-seekable streams for binary/text categorization. * NEW: Add a new `--timeout-in-seconds` parameter to `AnalyzeOptionsBase`, which will override the `TimeoutInMilliseconds` property in `AnalyzeContextBase`. diff --git a/src/Sarif/ZipArchiveArtifact.cs b/src/Sarif/ZipArchiveArtifact.cs index fb39a4d54..26b9da28e 100644 --- a/src/Sarif/ZipArchiveArtifact.cs +++ b/src/Sarif/ZipArchiveArtifact.cs @@ -34,8 +34,8 @@ public bool IsBinary { get { - GetArtifactData(); - return this.bytes != null; + string extension = Path.GetExtension(Uri.ToString()); + return this.binaryExtensions.Contains(extension); } } @@ -91,20 +91,8 @@ public byte[] Bytes { if (this.contents == null && this.bytes == null) { - const int PeekWindowBytes = 1024; - var peekable = new PeekableStream(this.Stream, PeekWindowBytes); - - byte[] header = new byte[PeekWindowBytes]; - int length = this.Stream.Read(header, 0, header.Length); - bool isText = FileEncoding.IsTextualData(header, 0, length); - - peekable.Rewind(); - - if (isText) - { - this.contents = new StreamReader(Stream).ReadToEnd(); - } - else + string extension = Path.GetExtension(Uri.ToString()); + if (this.binaryExtensions.Contains(extension)) { // The underlying System.IO.Compression.DeflateStream throws on reads to get_Length. using var ms = new MemoryStream((int)SizeInBytes.Value); @@ -125,9 +113,12 @@ public byte[] Bytes ms.Read(this.bytes, 0, this.bytes.Length); } } + else + { + this.contents = new StreamReader(Stream).ReadToEnd(); + } } } - this.entry = null; } diff --git a/src/Test.UnitTests.Sarif/ArtifactProviderTests.cs b/src/Test.UnitTests.Sarif/ArtifactProviderTests.cs index b8faa0bd5..0916c47fc 100644 --- a/src/Test.UnitTests.Sarif/ArtifactProviderTests.cs +++ b/src/Test.UnitTests.Sarif/ArtifactProviderTests.cs @@ -6,6 +6,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Text; using FluentAssertions; @@ -31,19 +32,14 @@ public void MultithreadedZipArchiveArtifactProvider_RetrieveSizeInBytesBeforeRet [Fact] public void MultithreadedZipArchiveArtifactProvider_RetrieveSizeInBytesBeforeRetrievingBytes() { - string filePath = this.GetType().Assembly.Location; - using FileStream reader = File.OpenRead(filePath); - - int headerSize = 1024; - byte[] data = new byte[headerSize]; - reader.Read(data, 0, data.Length); + string entryContents = $"{Guid.NewGuid()}"; - // Note that even thought we populate an archive with binary contents, the extension - // of the archive entry indicates a text file. We still expect binary data on expansion. - ZipArchive zip = CreateZipArchiveWithBinaryContents("test.txt", data); + // Note that even thought we populate an archive with text contents, the extension + // of the archive entry indicates a binary file. So we expect binary data on expansion. + ZipArchive zip = CreateZipArchiveWithTextContents("test.exe", entryContents); var artifactProvider = new MultithreadedZipArchiveArtifactProvider(zip, FileSystem.Instance); - ValidateBinaryContents(artifactProvider.Artifacts, data); + ValidateBinaryContents(artifactProvider.Artifacts, Encoding.UTF8.GetBytes(entryContents)); } [Fact]