-
Notifications
You must be signed in to change notification settings - Fork 294
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix bug 447: MsExcelDecoder.DecodeAsync only works on text data types (…
…#450) See #447 Fix Excel decoder to better support cell types and export numbers and other values. The solution is not perfect for Dates, Currencies and Percentages, due to limitations of the underlying lib and more investigation required to work around these.
- Loading branch information
Showing
6 changed files
with
106 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,25 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System; | ||
using System.Globalization; | ||
|
||
namespace Microsoft.KernelMemory.DataFormats.Office; | ||
|
||
public class MsExcelDecoderConfig | ||
{ | ||
public bool WithWorksheetNumber { get; set; } = true; | ||
|
||
public bool WithEndOfWorksheetMarker { get; set; } = false; | ||
|
||
public bool WithQuotes { get; set; } = true; | ||
|
||
public string WorksheetNumberTemplate { get; set; } = "\n# Worksheet {number}\n"; | ||
|
||
public string EndOfWorksheetMarkerTemplate { get; set; } = "\n# End of worksheet {number}"; | ||
|
||
public string RowPrefix { get; set; } = string.Empty; | ||
|
||
public string ColumnSeparator { get; set; } = ", "; | ||
|
||
public string RowSuffix { get; set; } = string.Empty; | ||
|
||
public string BlankCellValue { get; set; } = string.Empty; | ||
public string BooleanTrueValue { get; set; } = "TRUE"; | ||
public string BooleanFalseValue { get; set; } = "FALSE"; | ||
public string TimeSpanFormat { get; set; } = "g"; | ||
public IFormatProvider TimeSpanProvider { get; set; } = CultureInfo.CurrentCulture; | ||
public string DateFormat { get; set; } = "d"; | ||
public IFormatProvider DateFormatProvider { get; set; } = CultureInfo.CurrentCulture; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
service/tests/Core.FunctionalTests/DataFormats/Office/MsExcelDecoderTest.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.KernelMemory.DataFormats; | ||
using Microsoft.KernelMemory.DataFormats.Office; | ||
using Microsoft.TestHelpers; | ||
using Xunit.Abstractions; | ||
|
||
namespace Microsoft.Core.FunctionalTests.DataFormats.Office; | ||
|
||
public class MsExcelDecoderTest : BaseFunctionalTestCase | ||
{ | ||
public MsExcelDecoderTest(IConfiguration cfg, ITestOutputHelper output) : base(cfg, output) | ||
{ | ||
} | ||
|
||
[Fact] | ||
[Trait("Category", "UnitTest")] | ||
[Trait("Category", "DataFormats")] | ||
public async Task ItExtractsAllTypes() | ||
{ | ||
// Arrange | ||
const string file = "file3-data.xlsx"; | ||
var decoder = new MsExcelDecoder(); | ||
|
||
// Act | ||
FileContent result = await decoder.DecodeAsync(file); | ||
string content = result.Sections.Aggregate("", (current, s) => current + (s.Content + "\n")); | ||
Console.WriteLine(content); | ||
|
||
// Assert | ||
Assert.Contains("\"0.5\"", content); // 50% percentage | ||
Assert.Contains("\"512.99\"", content); // number | ||
Assert.Contains("\"3.99999999\"", content); // number | ||
Assert.Contains("\"0.25\"", content); // fraction | ||
Assert.Contains("\"123.6\"", content); // currency | ||
Assert.Contains("\"4518\"", content); // currency | ||
Assert.Contains("\"444666\"", content); // currency | ||
Assert.Contains("\"United States of America\"", content); // text | ||
Assert.Contains("\"Rome\", \"\", \"Tokyo\"", content); // text with empty columns | ||
Assert.Contains("\"1/12/2009\"", content); // date | ||
Assert.Contains("\"12/25/2090\"", content); // date | ||
Assert.Contains("\"98001\"", content); // zip code | ||
Assert.Contains("\"15554000600\"", content); // phone number | ||
Assert.Contains("\"TRUE\"", content); // boolean | ||
} | ||
} |
Binary file not shown.