Skip to content

Commit

Permalink
Change parser to parse lowercase forms of uppercase words separately (#…
Browse files Browse the repository at this point in the history
…54)

* Also fix undesired behavior in Sandbox and update liblcm

---------

Co-authored-by: Jason Naylor <[email protected]>
  • Loading branch information
jtmaxwell3 and jasonleenaylor authored May 15, 2024
1 parent 15aec1d commit e0fdca0
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 27 deletions.
2 changes: 1 addition & 1 deletion Build/mkall.targets
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
<ChorusNugetVersion>5.2.0-beta0003</ChorusNugetVersion>
<PalasoNugetVersion>13.0.0-beta0076</PalasoNugetVersion>
<ParatextNugetVersion>9.4.0.1-beta</ParatextNugetVersion>
<LcmNugetVersion>11.0.0-beta0089</LcmNugetVersion>
<LcmNugetVersion>11.0.0-beta0090</LcmNugetVersion>
<IcuNugetVersion>70.1.123</IcuNugetVersion>
<HermitCrabNugetVersion>2.5.13</HermitCrabNugetVersion>
<!--Todo: use PalasoBuildType, when it refers to somewhere that has the IPCFramework.-->
Expand Down
18 changes: 9 additions & 9 deletions Build/nuget-common/packages.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@
<package id="SIL.Core" version="8.1.0-beta0035" targetFramework="net461" />
<package id="SIL.Core" version="13.0.0-beta0076" targetFramework="net461" />
<package id="SIL.DesktopAnalytics" version="4.0.0" targetFramework="net461" />
<package id="SIL.LCModel.Build.Tasks" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Core.Tests" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Core" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.FixData" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Tests" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Tools" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Utils.Tests" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Utils" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel" version="11.0.0-beta0089" targetFramework="net461" />
<package id="SIL.LCModel.Build.Tasks" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Core.Tests" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Core" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.FixData" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Tests" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Tools" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Utils.Tests" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel.Utils" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.LCModel" version="11.0.0-beta0090" targetFramework="net461" />
<package id="SIL.Lexicon" version="13.0.0-beta0076" targetFramework="net461" />
<package id="SIL.libpalaso.l10ns" version="6.0.0" targetFramework="net461" />
<package id="SIL.Lift" version="13.0.0-beta0076" targetFramework="net461" />
Expand Down
20 changes: 19 additions & 1 deletion Src/LexText/Interlinear/InterlinDocRootSiteBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using SIL.LCModel.Infrastructure;
using SIL.FieldWorks.FwCoreDlgControls;
using XCore;
using SIL.LCModel.Core.Text;

namespace SIL.FieldWorks.IText
{
Expand Down Expand Up @@ -1046,11 +1047,28 @@ public virtual void PropChanged(int hvo, int tag, int ivMin, int cvIns, int cvDe
break;
case WfiWordformTags.kflidAnalyses:
IWfiWordform wordform = m_cache.ServiceLocator.GetInstance<IWfiWordformRepository>().GetObject(hvo);
if (RootStText.UniqueWordforms().Contains(wordform))
var uniqueWordforms = RootStText.UniqueWordforms();
if (uniqueWordforms.Contains(wordform))
{
m_wordformsToUpdate.Add(wordform);
m_mediator.IdleQueue.Add(IdleQueuePriority.High, PostponedUpdateWordforms);
}
// Update uppercase versions of wordform.
// (When a lowercase wordform changes, it affects the best guess of its uppercase versions.)
var form = wordform.Form.VernacularDefaultWritingSystem;
var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
foreach (IWfiWordform ucWordform in uniqueWordforms)
{
var ucForm = ucWordform.Form.VernacularDefaultWritingSystem;
if (ucForm != form && ucForm != null && !string.IsNullOrEmpty(ucForm.Text))
{
if (cf.ToLower(ucForm.Text) == form.Text)
{
m_wordformsToUpdate.Add(ucWordform);
m_mediator.IdleQueue.Add(IdleQueuePriority.High, PostponedUpdateWordforms);
}
}
}
break;
}
}
Expand Down
25 changes: 24 additions & 1 deletion Src/LexText/Interlinear/SandboxBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1662,8 +1662,31 @@ private void GetDefaults(IWfiWordform wordform, ref IWfiAnalysis analysis, out I
// to prevent using data that does not exist anymore
if(!Cache.ServiceLocator.IsValidObjectId(hvoDefault))
hvoDefault = 0;
if (hvoDefault != 0 && m_fSetWordformInProgress)
{
// Verify that the guess includes the wordform set by the user.
// (The guesser may have guessed a lowercase wordform for an uppercase occurrence.)
// If it doesn't include the wordform, set hvoDefault to 0.
var obj = m_caches.MainCache.ServiceLocator.GetObject(hvoDefault);
IWfiWordform guessWf = null;
switch (obj.ClassID)
{
case WfiAnalysisTags.kClassId:
guessWf = ((IWfiAnalysis)obj).Wordform;
break;
case WfiGlossTags.kClassId:
guessWf = ((IWfiGloss)obj).Wordform;
break;
case WfiWordformTags.kClassId:
guessWf = (IWfiWordform)obj;
break;
}
if (guessWf != null && guessWf != wordform)
hvoDefault = 0;
}

}
else
if (hvoDefault == 0)
{
// Try to establish a default based on the wordform itself.
int ws = wordform.Cache.DefaultVernWs;
Expand Down
11 changes: 7 additions & 4 deletions Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,17 +149,20 @@ public void UpdateWordform()
parserWorker.Parser = new TestParserClass(lowerResult, null);

// SUT
// Parsing an uppercase wordform should cause the lowercase wordform to be parsed.
// The uppercase wordform doesn't get a parse.
var bVal = parserWorker.UpdateWordform(catsUpperTest, ParserPriority.Low);
ExecuteIdleQueue();
Assert.IsTrue(bVal);
CheckAnalysisSize("Cats", 1, false);
CheckAnalysisSize("cats", 0, false);
CheckAnalysisSize("Cats", 0, false);
CheckAnalysisSize("cats", 1, false);

// SUT
// The lowercase wordform has already been parsed.
bVal = parserWorker.UpdateWordform(catsLowerTest, ParserPriority.Low);
ExecuteIdleQueue();
Assert.IsTrue(bVal);
CheckAnalysisSize("Cats", 1, false);
Assert.IsFalse(bVal);
CheckAnalysisSize("Cats", 0, false);
CheckAnalysisSize("cats", 1, false);
}
#endregion // Tests
Expand Down
26 changes: 16 additions & 10 deletions Src/LexText/ParserCore/ParserWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
using SIL.LCModel.Infrastructure;
using SIL.ObjectModel;
using XCore;
using SIL.LCModel.DomainServices;

namespace SIL.FieldWorks.WordWorks.Parser
{
Expand Down Expand Up @@ -164,18 +165,23 @@ public bool UpdateWordform(IWfiWordform wordform, ParserPriority priority)
CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
.Normalize(form.Text.Replace(' ', '.')));

// If the parse of the original word was not successful,then try to parse the lowercase word.
if (result.Analyses.Count == 0 || result.ErrorMessage != null)
{
var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
string sLower = cf.ToLower(form.Text);
// Try parsing the lowercase word if it is different from the original word.
// Do this even if the uppercase word parsed successfully.
var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
string sLower = cf.ToLower(form.Text);

// Try parsing the lowercase word if it is different from the original word.
if (sLower != form.Text)
if (sLower != form.Text)
{
var lcResult = m_parser.ParseWord(
CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
.Normalize(sLower.Replace(' ', '.')));
if (lcResult.Analyses.Count > 0 && lcResult.ErrorMessage == null)
{
result = m_parser.ParseWord(
CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
.Normalize(sLower.Replace(' ', '.')));
var text = TsStringUtils.MakeString(sLower, form.get_WritingSystem(0));
var lcWordform = WfiWordformServices.FindOrCreateWordform(m_cache, text);
m_parseFiler.ProcessParse(lcWordform, priority, lcResult);
m_parseFiler.ProcessParse(wordform, priority, result);
return true;
}
}

Expand Down
3 changes: 2 additions & 1 deletion Src/xWorks/RecordList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,8 @@ protected virtual bool TryHandleUpdateOrMarkPendingReload(int hvo, int tag, int
return true;
}
}
else if (tag == SegmentTags.kflidAnalyses && m_publisher.OwningFieldName == "Wordforms")
// tag == WfiWordformTags.kflidAnalyses is needed for wordforms that don't appear in a segment.
else if ((tag == SegmentTags.kflidAnalyses || tag == WfiWordformTags.kflidAnalyses) && m_publisher.OwningFieldName == "Wordforms")
{
// Changing this potentially changes the list of wordforms that occur in the interesting texts.
// Hopefully we don't rebuild the list every time; usually this can only be changed in another view.
Expand Down

0 comments on commit e0fdca0

Please sign in to comment.