Skip to content

Commit

Permalink
Fix JSON serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Jan 24, 2025
1 parent 3a4047a commit 3a6c3cf
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ WordAlignmentPlatformApi.WordAlignmentPlatformApiClient client
{
private readonly WordAlignmentPlatformApi.WordAlignmentPlatformApiClient _client = client;
private static readonly JsonSerializerOptions JsonSerializerOptions =
new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, Converters = { new WordAlignmentConverter() } };

public string OutboxId => ServalWordAlignmentPlatformOutboxConstants.OutboxId;

Expand Down Expand Up @@ -52,8 +52,6 @@ await _client.BuildRestartingAsync(
);
break;
case ServalWordAlignmentPlatformOutboxConstants.InsertInferences:
var jsonSerializerOptions = new JsonSerializerOptions(JsonSerializerOptions);
jsonSerializerOptions.Converters.Add(new WordAlignmentJsonConverter());
IAsyncEnumerable<Models.WordAlignment> wordAlignments = JsonSerializer
.DeserializeAsyncEnumerable<Models.WordAlignment>(
contentStream!,
Expand Down Expand Up @@ -109,32 +107,83 @@ await _client.IncrementTrainEngineCorpusSizeAsync(
};
}
}
}

public class WordAlignmentJsonConverter : JsonConverter<object>
{
public override object Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
internal class WordAlignmentConverter : JsonConverter<Models.WordAlignment>
{
switch (reader.TokenType)
public override Models.WordAlignment Read(
ref Utf8JsonReader reader,
Type typeToConvert,
JsonSerializerOptions options
)
{
case JsonTokenType.True:
return true;
case JsonTokenType.False:
return false;
case JsonTokenType.Number when reader.TryGetInt64(out long l):
return l;
case JsonTokenType.Number:
return reader.GetDouble();
case JsonTokenType.String:
var str = reader.GetString();
if (SIL.Machine.Corpora.AlignedWordPair.TryParse(str, out var alignedWordPair))
return alignedWordPair;
return str!;
default:
throw new JsonException();
if (reader.TokenType != JsonTokenType.StartObject)
{
throw new JsonException(
$"Expected StartObject token at the beginning of WordAlignment object but instead encountered {reader.TokenType}"
);
}
string corpusId = "",
textId = "";
IReadOnlyList<double> confidences = [];
IReadOnlyList<string> refs = [],
sourceTokens = [],
targetTokens = [];
IReadOnlyList<SIL.Machine.Corpora.AlignedWordPair> alignedWordPairs = [];
while (reader.Read() && reader.TokenType != JsonTokenType.EndObject)
{
if (reader.TokenType == JsonTokenType.PropertyName)
{
string s = reader.GetString()!;
switch (s)
{
case "corpus_id":
reader.Read();
corpusId = reader.GetString()!;
break;
case "text_id":
reader.Read();
textId = reader.GetString()!;
break;
case "confidences":
reader.Read();
confidences = JsonSerializer.Deserialize<IList<double>>(ref reader, options)!.ToArray();
break;
case "refs":
reader.Read();
refs = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
break;
case "source_tokens":
reader.Read();
sourceTokens = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
break;
case "target_tokens":
reader.Read();
targetTokens = JsonSerializer.Deserialize<IList<string>>(ref reader, options)!.ToArray();
break;
case "alignment":
reader.Read();
alignedWordPairs = SIL.Machine.Corpora.AlignedWordPair.Parse(reader.GetString()).ToArray();
break;
default:
throw new JsonException(
$"Unexpected property name {s} when deserializing WordAlignment object"
);
}
}
}
return new Models.WordAlignment()
{
CorpusId = corpusId,
TextId = textId,
Refs = refs,
Alignment = alignedWordPairs,
Confidences = confidences,
SourceTokens = sourceTokens,
TargetTokens = targetTokens
};
}
}

public override void Write(Utf8JsonWriter writer, object objectToWrite, JsonSerializerOptions options) =>
JsonSerializer.Serialize(writer, objectToWrite, objectToWrite.GetType(), options);
public override void Write(Utf8JsonWriter writer, Models.WordAlignment value, JsonSerializerOptions options) =>
throw new NotSupportedException();
}
}
14 changes: 9 additions & 5 deletions src/Serval/test/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -481,13 +481,13 @@ public async Task GetWordAlignment()
{
string engineId = await _helperClient.CreateNewEngineAsync("Statistical", "es", "en", "STAT1");
string[] books = ["1JN.txt", "2JN.txt", "MAT.txt"];
ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false);
ParallelCorpusConfig test_corpus = await _helperClient.MakeParallelTextCorpus(["3JN.txt"], "es", "en", false);
string train_corpusId = await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false);
string corpusId = await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, test_corpus, true);
ParallelCorpusConfig trainCorpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false);
ParallelCorpusConfig testCorpus = await _helperClient.MakeParallelTextCorpus(["3JN.txt"], "es", "en", false);
string trainCorpusId = await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, trainCorpus, false);
string corpusId = await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, testCorpus, true);
_helperClient.WordAlignmentBuildConfig.TrainOn =
[
new TrainingCorpusConfig2() { ParallelCorpusId = train_corpusId }
new TrainingCorpusConfig2() { ParallelCorpusId = trainCorpusId }
];
_helperClient.WordAlignmentBuildConfig.WordAlignOn =
[
Expand All @@ -508,6 +508,10 @@ public async Task GetWordAlignment()
}
)
);

IList<Client.WordAlignment> wordAlignments =
await _helperClient.WordAlignmentEnginesClient.GetAllWordAlignmentsAsync(engineId, corpusId);
Assert.That(wordAlignments, Has.Count.EqualTo(14)); //Number of verses in 3JN
}

[TearDown]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ private static IEnumerable<Row> CollapseRanges(ParallelTextRow[] rows)

foreach (NParallelTextRow row in rows)
{
//row at 0 is source filtered for pretranslation, row at 1 is target filtered for pretranslation, row at 2 is target filtered for training
if (
hasUnfinishedRange
&& (!row.IsInRange(0) || row.IsRangeStart(0))
Expand All @@ -267,7 +268,7 @@ private static IEnumerable<Row> CollapseRanges(ParallelTextRow[] rows)
}

textId = row.TextId;
refs.AddRange(row.NRefs[2]);
refs.AddRange(row.NRefs[2].Count > 0 ? row.NRefs[2] : row.NRefs[1]);
isInTrainingData = isInTrainingData || row.Text(2).Length > 0;

if (row.Text(0).Length > 0)
Expand Down

0 comments on commit 3a6c3cf

Please sign in to comment.