From e0fdca0de68add2a7af227986a058e5140364cef Mon Sep 17 00:00:00 2001
From: John T Maxwell III <maxwellparc@gmail.com>
Date: Wed, 15 May 2024 12:25:23 -0700
Subject: [PATCH] Change parser to parse lowercase forms of uppercase words
 separately (#54)

* Also fix undesired behavior in Sandbox and update liblcm

---------

Co-authored-by: Jason Naylor <jasonleenaylor@users.noreply.github.com>
---
 Build/mkall.targets                           |  2 +-
 Build/nuget-common/packages.config            | 18 ++++++-------
 .../Interlinear/InterlinDocRootSiteBase.cs    | 20 +++++++++++++-
 Src/LexText/Interlinear/SandboxBase.cs        | 25 +++++++++++++++++-
 .../ParserCoreTests/ParseWorkerTests.cs       | 11 +++++---
 Src/LexText/ParserCore/ParserWorker.cs        | 26 ++++++++++++-------
 Src/xWorks/RecordList.cs                      |  3 ++-
 7 files changed, 78 insertions(+), 27 deletions(-)
diff --git a/Build/mkall.targets b/Build/mkall.targets
index 298ba3c581..fe4a0ea566 100644
--- a/Build/mkall.targets
+++ b/Build/mkall.targets
@@ -285,7 +285,7 @@
 		<ChorusNugetVersion>5.2.0-beta0003</ChorusNugetVersion>
 		<PalasoNugetVersion>13.0.0-beta0076</PalasoNugetVersion>
 		<ParatextNugetVersion>9.4.0.1-beta</ParatextNugetVersion>
-		<LcmNugetVersion>11.0.0-beta0089</LcmNugetVersion>
+		<LcmNugetVersion>11.0.0-beta0090</LcmNugetVersion>
 		<IcuNugetVersion>70.1.123</IcuNugetVersion>
 		<HermitCrabNugetVersion>2.5.13</HermitCrabNugetVersion>
 		<!--Todo: use PalasoBuildType, when it refers to somewhere that has the IPCFramework.-->
diff --git a/Build/nuget-common/packages.config b/Build/nuget-common/packages.config
index e0e5d9a534..ac1ec0bc4b 100644
--- a/Build/nuget-common/packages.config
+++ b/Build/nuget-common/packages.config
@@ -52,15 +52,15 @@
   <package id="SIL.Core" version="8.1.0-beta0035"  targetFramework="net461" />
   <package id="SIL.Core" version="13.0.0-beta0076"  targetFramework="net461" />
   <package id="SIL.DesktopAnalytics" version="4.0.0" targetFramework="net461" />
-  <package id="SIL.LCModel.Build.Tasks" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Core.Tests" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Core" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.FixData" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Tests" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Tools" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Utils.Tests" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel.Utils" version="11.0.0-beta0089"  targetFramework="net461" />
-  <package id="SIL.LCModel" version="11.0.0-beta0089"  targetFramework="net461" />
+  <package id="SIL.LCModel.Build.Tasks" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Core.Tests" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Core" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.FixData" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Tests" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Tools" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Utils.Tests" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel.Utils" version="11.0.0-beta0090"  targetFramework="net461" />
+  <package id="SIL.LCModel" version="11.0.0-beta0090"  targetFramework="net461" />
   <package id="SIL.Lexicon" version="13.0.0-beta0076"  targetFramework="net461" />
   <package id="SIL.libpalaso.l10ns" version="6.0.0" targetFramework="net461" />
   <package id="SIL.Lift" version="13.0.0-beta0076"  targetFramework="net461" />
diff --git a/Src/LexText/Interlinear/InterlinDocRootSiteBase.cs b/Src/LexText/Interlinear/InterlinDocRootSiteBase.cs
index 2f749a8314..7649ebdab3 100644
--- a/Src/LexText/Interlinear/InterlinDocRootSiteBase.cs
+++ b/Src/LexText/Interlinear/InterlinDocRootSiteBase.cs
@@ -18,6 +18,7 @@
 using SIL.LCModel.Infrastructure;
 using SIL.FieldWorks.FwCoreDlgControls;
 using XCore;
+using SIL.LCModel.Core.Text;
 
 namespace SIL.FieldWorks.IText
 {
@@ -1046,11 +1047,28 @@ public virtual void PropChanged(int hvo, int tag, int ivMin, int cvIns, int cvDe
 					break;
 				case WfiWordformTags.kflidAnalyses:
 					IWfiWordform wordform = m_cache.ServiceLocator.GetInstance<IWfiWordformRepository>().GetObject(hvo);
-					if (RootStText.UniqueWordforms().Contains(wordform))
+					var uniqueWordforms = RootStText.UniqueWordforms();
+					if (uniqueWordforms.Contains(wordform))
 					{
 						m_wordformsToUpdate.Add(wordform);
 						m_mediator.IdleQueue.Add(IdleQueuePriority.High, PostponedUpdateWordforms);
 					}
+					// Update uppercase versions of wordform.
+					// (When a lowercase wordform changes, it affects the best guess of its uppercase versions.)
+					var form = wordform.Form.VernacularDefaultWritingSystem;
+					var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
+					foreach (IWfiWordform ucWordform in uniqueWordforms)
+					{
+						var ucForm = ucWordform.Form.VernacularDefaultWritingSystem;
+						if (ucForm != form && ucForm != null && !string.IsNullOrEmpty(ucForm.Text))
+						{
+							if (cf.ToLower(ucForm.Text) == form.Text)
+							{
+								m_wordformsToUpdate.Add(ucWordform);
+								m_mediator.IdleQueue.Add(IdleQueuePriority.High, PostponedUpdateWordforms);
+							}
+						}
+					}
 					break;
 			}
 		}
diff --git a/Src/LexText/Interlinear/SandboxBase.cs b/Src/LexText/Interlinear/SandboxBase.cs
index 7e4f10cf53..a19c272b9e 100644
--- a/Src/LexText/Interlinear/SandboxBase.cs
+++ b/Src/LexText/Interlinear/SandboxBase.cs
@@ -1662,8 +1662,31 @@ private void GetDefaults(IWfiWordform wordform, ref IWfiAnalysis analysis, out I
 				// to prevent using data that does not exist anymore
 				if(!Cache.ServiceLocator.IsValidObjectId(hvoDefault))
 					hvoDefault = 0;
+				if (hvoDefault != 0 && m_fSetWordformInProgress)
+				{
+					// Verify that the guess includes the wordform set by the user.
+					// (The guesser may have guessed a lowercase wordform for an uppercase occurrence.)
+					// If it doesn't include the wordform, set hvoDefault to 0.
+					var obj = m_caches.MainCache.ServiceLocator.GetObject(hvoDefault);
+					IWfiWordform guessWf = null;
+					switch (obj.ClassID)
+					{
+						case WfiAnalysisTags.kClassId:
+							guessWf = ((IWfiAnalysis)obj).Wordform;
+							break;
+						case WfiGlossTags.kClassId:
+							guessWf = ((IWfiGloss)obj).Wordform;
+							break;
+						case WfiWordformTags.kClassId:
+							guessWf = (IWfiWordform)obj;
+							break;
+					}
+					if (guessWf != null && guessWf != wordform)
+						hvoDefault = 0;
+				}
+
 			}
-			else
+			if (hvoDefault == 0)
 			{
 				// Try to establish a default based on the wordform itself.
 				int ws = wordform.Cache.DefaultVernWs;
diff --git a/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs b/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
index 46c86ce6a4..37383fb515 100644
--- a/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
+++ b/Src/LexText/ParserCore/ParserCoreTests/ParseWorkerTests.cs
@@ -149,17 +149,20 @@ public void UpdateWordform()
 			parserWorker.Parser = new TestParserClass(lowerResult, null);
 
 			// SUT
+			// Parsing an uppercase wordform should cause the lowercase wordform to be parsed.
+			// The uppercase wordform doesn't get a parse.
 			var bVal = parserWorker.UpdateWordform(catsUpperTest, ParserPriority.Low);
 			ExecuteIdleQueue();
 			Assert.IsTrue(bVal);
-			CheckAnalysisSize("Cats", 1, false);
-			CheckAnalysisSize("cats", 0, false);
+			CheckAnalysisSize("Cats", 0, false);
+			CheckAnalysisSize("cats", 1, false);
 
 			// SUT
+			// The lowercase wordform has already been parsed.
 			bVal = parserWorker.UpdateWordform(catsLowerTest, ParserPriority.Low);
 			ExecuteIdleQueue();
-			Assert.IsTrue(bVal);
-			CheckAnalysisSize("Cats", 1, false);
+			Assert.IsFalse(bVal);
+			CheckAnalysisSize("Cats", 0, false);
 			CheckAnalysisSize("cats", 1, false);
 		}
 		#endregion // Tests
diff --git a/Src/LexText/ParserCore/ParserWorker.cs b/Src/LexText/ParserCore/ParserWorker.cs
index 7015212615..e2eb769b62 100644
--- a/Src/LexText/ParserCore/ParserWorker.cs
+++ b/Src/LexText/ParserCore/ParserWorker.cs
@@ -32,6 +32,7 @@
 using SIL.LCModel.Infrastructure;
 using SIL.ObjectModel;
 using XCore;
+using SIL.LCModel.DomainServices;
 
 namespace SIL.FieldWorks.WordWorks.Parser
 {
@@ -164,18 +165,23 @@ public bool UpdateWordform(IWfiWordform wordform, ParserPriority priority)
 				CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
 				.Normalize(form.Text.Replace(' ', '.')));
 
-			// If the parse of the original word was not successful,then try to parse the lowercase word.
-			if (result.Analyses.Count == 0 || result.ErrorMessage != null)
-			{
-				var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
-				string sLower = cf.ToLower(form.Text);
+			// Try parsing the lowercase word if it is different from the original word.
+			// Do this even if the uppercase word parsed successfully.
+			var cf = new CaseFunctions(m_cache.ServiceLocator.WritingSystemManager.Get(form.get_WritingSystemAt(0)));
+			string sLower = cf.ToLower(form.Text);
 
-				// Try parsing the lowercase word if it is different from the original word.
-				if (sLower != form.Text)
+			if (sLower != form.Text)
+			{
+				var lcResult = m_parser.ParseWord(
+					CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
+					.Normalize(sLower.Replace(' ', '.')));
+				if (lcResult.Analyses.Count > 0 && lcResult.ErrorMessage == null)
 				{
-					result = m_parser.ParseWord(
-						CustomIcu.GetIcuNormalizer(FwNormalizationMode.knmNFD)
-						.Normalize(sLower.Replace(' ', '.')));
+					var text = TsStringUtils.MakeString(sLower, form.get_WritingSystem(0));
+					var lcWordform = WfiWordformServices.FindOrCreateWordform(m_cache, text);
+					m_parseFiler.ProcessParse(lcWordform, priority, lcResult);
+					m_parseFiler.ProcessParse(wordform, priority, result);
+					return true;
 				}
 			}
 
diff --git a/Src/xWorks/RecordList.cs b/Src/xWorks/RecordList.cs
index d36307882b..7a5ed518c5 100644
--- a/Src/xWorks/RecordList.cs
+++ b/Src/xWorks/RecordList.cs
@@ -1743,7 +1743,8 @@ protected virtual bool TryHandleUpdateOrMarkPendingReload(int hvo, int tag, int
 					return true;
 				}
 			}
-			else if (tag == SegmentTags.kflidAnalyses && m_publisher.OwningFieldName == "Wordforms")
+			// tag == WfiWordformTags.kflidAnalyses is needed for wordforms that don't appear in a segment.
+			else if ((tag == SegmentTags.kflidAnalyses || tag == WfiWordformTags.kflidAnalyses) && m_publisher.OwningFieldName == "Wordforms")
 			{
 				// Changing this potentially changes the list of wordforms that occur in the interesting texts.
 				// Hopefully we don't rebuild the list every time; usually this can only be changed in another view.