Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Lt 21911: Novel Root Guesser #172

Merged
merged 11 commits into from
Oct 9, 2024
6 changes: 3 additions & 3 deletions Build/mkall.targets
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@
<ParatextNugetVersion>9.4.0.1-beta</ParatextNugetVersion>
<LcmNugetVersion>11.0.0-beta0104</LcmNugetVersion>
<IcuNugetVersion>70.1.123</IcuNugetVersion>
<HermitCrabNugetVersion>2.5.13</HermitCrabNugetVersion>
<HermitCrabNugetVersion>3.3.0</HermitCrabNugetVersion>
<IPCFrameworkVersion>1.1.1-beta0001</IPCFrameworkVersion>
<!-- bt393 is the master branch build of ExCss for Windows development. Update when appropriate. -->
<ExCssBuildType Condition="'$(OS)'=='Windows_NT'">bt393</ExCssBuildType>
Expand Down Expand Up @@ -531,8 +531,8 @@
<SILNugetPackages Include="Autofac"><Version>4.9.4</Version><Path>lib/net45/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
<SILNugetPackages Include="Vulcan.Uczniowie.HelpProvider"><Version>1.0.16</Version><Path>lib/net461/*.*</Path></SILNugetPackages>
<!-- HermitCrab and related packages -->
<SILNugetPackages Include="SIL.Machine.Morphology.HermitCrab"><Version>$(HermitCrabNugetVersion)</Version><Path>lib/net461/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
<SILNugetPackages Include="SIL.Machine"><Version>$(HermitCrabNugetVersion)</Version><Path>lib/net461/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
<SILNugetPackages Include="SIL.Machine.Morphology.HermitCrab"><Version>$(HermitCrabNugetVersion)</Version><Path>lib/netstandard2.0/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
<SILNugetPackages Include="SIL.Machine"><Version>$(HermitCrabNugetVersion)</Version><Path>lib/netstandard2.0/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
<SILNugetPackages Include="Sandwych.Quickgraph.Core"><Version>1.0.0</Version><Path>lib/net45/*.*</Path><NoSymbols>true</NoSymbols></SILNugetPackages>
</ItemGroup>
</Target>
Expand Down
4 changes: 2 additions & 2 deletions Build/nuget-common/packages.config
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@
<package id="SIL.libpalaso.l10ns" version="6.0.0" targetFramework="net461" />
<package id="SIL.Lift" version="13.0.0-beta0076" targetFramework="net461" />
<package id="SIL.Media" version="13.0.0-beta0076" targetFramework="net461" />
<package id="SIL.Machine" version="2.5.13" targetFramework="net461" />
<package id="SIL.Machine.Morphology.HermitCrab" version="2.5.13" targetFramework="net461" />
<package id="SIL.Machine" version="3.3.0" targetFramework="netstandard2.0" />
<package id="SIL.Machine.Morphology.HermitCrab" version="3.3.0" targetFramework="netstandard2.0" />
<package id="SIL.ParatextShared" version="7.4.0.1" targetFramework="net40" /> <!-- REVIEW (Hasso) 2023.05: do we still integrate with PT 7? -->
<package id="SIL.Scripture" version="13.0.0-beta0076" targetFramework="net461" />
<package id="SIL.TestUtilities" version="13.0.0-beta0076" targetFramework="net461" />
Expand Down
4 changes: 3 additions & 1 deletion Src/LexText/Interlinear/InterlinVc.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1356,9 +1356,10 @@ private void DisplayMorphBundle(IVwEnv vwenv, int hvo)
{
vwenv.AddString(m_tssMissingVernacular);
}
else if (mf == null)
else if (mf == null || SandboxBase.IsLexicalPattern(mf.Form))
{
// If no morph, use the form of the morph bundle (and the entry is of course missing)
// If mf.Form is a lexical pattern then the form of the morph bundle is the guessed root.
var ws = GetRealWsOrBestWsForContext(wmb.Hvo, spec);
vwenv.AddStringAltMember(WfiMorphBundleTags.kflidForm, ws, this);
}
Expand Down Expand Up @@ -2637,4 +2638,5 @@ protected override void SetInt(int hvo, int flid, int newValue)
}

}

}
5 changes: 4 additions & 1 deletion Src/LexText/Interlinear/SandboxBase.ComboHandlers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,10 @@ private void AddAnalysesOf(IWfiWordform wordform, bool fBaseWordIsPhrase)
IMoForm morph = mb.MorphRA;
if (morph != null)
{
ITsString tss = morph.Form.get_String(m_sandbox.RawWordformWs);
// If morph.Form is a lexical pattern then mb.Form is the guessed root.
ITsString tss = IsLexicalPattern(morph.Form)
? mb.Form.get_String(m_sandbox.RawWordformWs)
: morph.Form.get_String(m_sandbox.RawWordformWs);
var morphType = morph.MorphTypeRA;
string sPrefix = morphType.Prefix;
string sPostfix = morphType.Postfix;
Expand Down
6 changes: 6 additions & 0 deletions Src/LexText/Interlinear/SandboxBase.GetRealyAnalysisMethod.cs
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,12 @@ private IAnalysis FinishItOff()
else
{
mb.MorphRA = mfRepository.GetObject(m_analysisMorphs[imorph]);
if (mb.MorphRA != null && IsLexicalPattern(mb.MorphRA.Form))
{
// If mb.MorphRA.Form is a lexical pattern then set mb.Form to the guessed root.
int hvoSbMorph = m_sda.get_VecItem(m_hvoSbWord, ktagSbWordMorphs, imorph);
mb.Form.set_String(wsVern, m_sandbox.GetFullMorphForm(hvoSbMorph));
}
}
// Set the MSA if we have one. Note that it is (pathologically) possible that the user has done
// something in another window to destroy the MSA we remember, so don't try to set it if so.
Expand Down
38 changes: 35 additions & 3 deletions Src/LexText/Interlinear/SandboxBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1325,9 +1325,14 @@ private bool LoadRealDataIntoSec1(int hvoSbWord, bool fLookForDefaults, bool fAd
}
else
{
// Create the secondary object corresponding to the MoForm in the usual way from the form object.
hvoMorphForm = CreateSecondaryAndCopyStrings(InterlinLineChoices.kflidMorphemes, mf.Hvo,
MoFormTags.kflidForm, hvoSbWord, sdaMain, cda);
hvoMorphForm = m_caches.FindOrCreateSec(mf.Hvo, kclsidSbNamedObj, hvoSbWord, ktagSbWordDummy);
if (IsLexicalPattern(mf.Form))
// If mf.Form is a lexical pattern then mb.Form is the guessed root.
CopyStringsToSecondary(InterlinLineChoices.kflidMorphemes, sdaMain, mb.Hvo,
WfiMorphBundleTags.kflidForm, cda, hvoMorphForm, ktagSbNamedObjName);
else
CopyStringsToSecondary(InterlinLineChoices.kflidMorphemes, sdaMain, mf.Hvo,
MoFormTags.kflidForm, cda, hvoMorphForm, ktagSbNamedObjName);
// Store the prefix and postfix markers from the MoMorphType object.
int hvoMorphType = sdaMain.get_ObjectProp(mf.Hvo,
MoFormTags.kflidMorphType);
Expand Down Expand Up @@ -1467,6 +1472,22 @@ private bool LoadRealDataIntoSec1(int hvoSbWord, bool fLookForDefaults, bool fAd
return fGuessing != 0;
}

/// <summary>
/// Does multiString contain a lexical pattern (e.g. [Seg]*)?
/// </summary>
public static bool IsLexicalPattern(IMultiUnicode multiString)
{
// This assumes that "[" and "]" are not part of any phonemes.
for (var i = 0; i < multiString.StringCount; i++)
{
int ws;
string text = multiString.GetStringFromIndex(i, out ws).Text;
if (text.Contains("[") && text.Contains("]"))
return true;
}
return false;
}

public static bool GetHasMultipleRelevantAnalyses(IWfiWordform analysis)
{
int humanCount = analysis.HumanApprovedAnalyses.Count();
Expand Down Expand Up @@ -2007,6 +2028,17 @@ where icuCollator.Compare(mf.Form.get_String(ws).Text, form) == 0 && mf.MorphTyp
&& (mf.MorphTypeRA == mmt || mf.MorphTypeRA.IsAmbiguousWith(mmt))
select mf).ToList();

if (morphs.Count == 0)
{
// Look for morphs in matching morph bundles with lexical patterns.
// If morph is a lexical pattern then the morph bundle's Form is the guessed root.
morphs = (from mb in Cache.ServiceLocator.GetInstance<IWfiMorphBundleRepository>().AllInstances()
where IsLexicalPattern(mb.MorphRA.Form)
&& icuCollator.Compare(mb.Form.get_String(ws).Text, form) == 0
&& mb.MorphRA.MorphTypeRA != null
&& (mb.MorphRA.MorphTypeRA == mmt || mb.MorphRA.MorphTypeRA.IsAmbiguousWith(mmt))
select mb.MorphRA).ToList();
}
if (morphs.Count == 1)
return morphs.First(); // special case: we can avoid the cost of figuring ReferringObjects.
IMoForm bestMorph = null;
Expand Down
3 changes: 2 additions & 1 deletion Src/LexText/ParserCore/FwXmlTraceManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,8 @@ private XElement CreateAllomorphElement(Allomorph allomorph)
if (inflTypeID != 0 && !m_cache.ServiceLocator.GetInstance<ILexEntryInflTypeRepository>().TryGetObject(inflTypeID, out inflType))
return null;

return HCParser.CreateAllomorphElement("Allomorph", form, msa, inflType, formID2 != 0);
string guessedString = allomorph.Guessed ? allomorph.Morpheme.Gloss : null;
return HCParser.CreateAllomorphElement("Allomorph", form, msa, inflType, formID2 != 0, guessedString);
}
}
}
28 changes: 24 additions & 4 deletions Src/LexText/ParserCore/HCLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1854,7 +1854,7 @@ private void LoadAllomorphCoOccurrenceRules(IMoAlloAdhocProhib alloAdhocProhib)
{
var rule = new AllomorphCoOccurrenceRule(ConstraintType.Exclude, others, adjacency);
firstAllo.AllomorphCoOccurrenceRules.Add(rule);
m_language.AllomorphCoOccurrenceRules.Add(rule);
m_language.AllomorphCoOccurrenceRules.Add((firstAllo, rule));
}
}
}
Expand Down Expand Up @@ -1904,7 +1904,7 @@ private void LoadMorphemeCoOccurrenceRules(IMoMorphAdhocProhib morphAdhocProhib)
{
var rule = new MorphemeCoOccurrenceRule(ConstraintType.Exclude, others, adjacency);
firstMorpheme.MorphemeCoOccurrenceRules.Add(rule);
m_language.MorphemeCoOccurrenceRules.Add(rule);
m_language.MorphemeCoOccurrenceRules.Add((firstMorpheme, rule));
}
}
}
Expand Down Expand Up @@ -2198,7 +2198,7 @@ private FeatureStruct LoadFeatureStruct(IFsFeatStruc fs, FeatureSystem featSys)
private Shape Segment(string str)
{
Shape shape;
if (m_acceptUnspecifiedGraphemes)
if (m_acceptUnspecifiedGraphemes && !IsLexicalPattern(str))
{
int[] baseCharPositions = null;
do
Expand All @@ -2222,11 +2222,20 @@ private Shape Segment(string str)
}
else
{
shape = m_table.Segment(str);
shape = m_table.Segment(str, true);
}
return shape;
}

/// <summary>
/// Does form contain a lexical pattern (e.g. [Seg]*)?
/// </summary>
public static bool IsLexicalPattern(string form)
{
// This assumes that "[" and "]" are not part of any phonemes.
return form.Contains("[") && form.Contains("]");
}

private static string FormatForm(string formStr)
{
return formStr.Trim().Replace(' ', '.');
Expand Down Expand Up @@ -2383,6 +2392,17 @@ private void LoadCharacterDefinitionTable(IPhPhonemeSet phonemeSet)
m_table.AddBoundary(otherChar);
}
}
// Add natural classes to table for lexical patterns.
foreach(NaturalClass hcNaturalClass in m_language.NaturalClasses)
{
m_table.AddNaturalClass(hcNaturalClass);
}
foreach (string ncName in m_naturalClassLookup.Keys)
{
NaturalClass hcNaturalClass;
if (TryLoadNaturalClass(m_naturalClassLookup[ncName], out hcNaturalClass))
m_table.AddNaturalClass(hcNaturalClass);
}
m_language.CharacterDefinitionTables.Add(m_table);
}

Expand Down
17 changes: 8 additions & 9 deletions Src/LexText/ParserCore/HCParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public ParseResult ParseWord(string word)
IEnumerable<Word> wordAnalyses;
try
{
wordAnalyses = m_morpher.ParseWord(word);
wordAnalyses = m_morpher.ParseWord(word, out _, true);
}
catch (Exception e)
{
Expand All @@ -103,7 +103,7 @@ public ParseResult ParseWord(string word)
if (GetMorphs(wordAnalysis, out morphs))
{
analyses.Add(new ParseAnalysis(morphs.Select(mi =>
new ParseMorph(mi.Form, mi.Msa, mi.InflType))));
new ParseMorph(mi.Form, mi.Msa, mi.InflType, mi.GuessedString))));
}
}
result = new ParseResult(analyses);
Expand Down Expand Up @@ -189,11 +189,11 @@ private XDocument ParseToXml(string form, bool tracing, IEnumerable<int> selectT
try
{
object trace;
foreach (Word wordAnalysis in m_morpher.ParseWord(form, out trace))
foreach (Word wordAnalysis in m_morpher.ParseWord(form, out trace, true))
{
List<MorphInfo> morphs;
if (GetMorphs(wordAnalysis, out morphs))
wordformElem.Add(new XElement("Analysis", morphs.Select(mi => CreateAllomorphElement("Morph", mi.Form, mi.Msa, mi.InflType, mi.IsCircumfix))));
wordformElem.Add(new XElement("Analysis", morphs.Select(mi => CreateAllomorphElement("Morph", mi.Form, mi.Msa, mi.InflType, mi.IsCircumfix, mi.GuessedString))));
}
if (tracing)
wordformElem.Add(new XElement("Trace", trace));
Expand Down Expand Up @@ -364,7 +364,6 @@ private bool GetMorphs(Word ws, out List<MorphInfo> result)
}
else
{
morphInfo.String += formStr;
continue;
}

Expand Down Expand Up @@ -394,7 +393,7 @@ private bool GetMorphs(Word ws, out List<MorphInfo> result)
morphInfo = new MorphInfo
{
Form = form,
String = formStr,
GuessedString = allomorph.Guessed ? formStr : null,
Msa = msa,
InflType = inflType,
IsCircumfix = formID2 > 0
Expand Down Expand Up @@ -466,11 +465,11 @@ private static string GetMorphTypeString(Guid typeGuid)
return "unknown";
}

internal static XElement CreateAllomorphElement(string name, IMoForm form, IMoMorphSynAnalysis msa, ILexEntryInflType inflType, bool circumfix)
internal static XElement CreateAllomorphElement(string name, IMoForm form, IMoMorphSynAnalysis msa, ILexEntryInflType inflType, bool circumfix, string guessedString)
{
Guid morphTypeGuid = circumfix ? MoMorphTypeTags.kguidMorphCircumfix : (form.MorphTypeRA == null ? Guid.Empty : form.MorphTypeRA.Guid);
var elem = new XElement(name, new XAttribute("id", form.Hvo), new XAttribute("type", GetMorphTypeString(morphTypeGuid)),
new XElement("Form", circumfix ? form.OwnerOfClass<ILexEntry>().HeadWord.Text : form.GetFormWithMarkers(form.Cache.DefaultVernWs)),
new XElement("Form", circumfix ? form.OwnerOfClass<ILexEntry>().HeadWord.Text : guessedString ?? form.GetFormWithMarkers(form.Cache.DefaultVernWs)),
new XElement("LongName", form.LongName));
elem.Add(CreateMorphemeElement(msa, inflType));
return elem;
Expand Down Expand Up @@ -567,7 +566,7 @@ private string ProcessParseException(Exception e)
class MorphInfo
{
public IMoForm Form { get; set; }
public string String { get; set; }
public string GuessedString { get; set; }
public IMoMorphSynAnalysis Msa { get; set; }
public ILexEntryInflType InflType { get; set; }
public bool IsCircumfix { get; set; }
Expand Down
7 changes: 7 additions & 0 deletions Src/LexText/ParserCore/ParseFiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Linq;
using SIL.LCModel;
using SIL.LCModel.Application;
using SIL.LCModel.Core.Text;
using SIL.LCModel.Infrastructure;
using XCore;

Expand Down Expand Up @@ -250,6 +251,12 @@ private void ProcessAnalysis(IWfiWordform wordform, ParseAnalysis analysis)
mb.MsaRA = morph.Msa;
if (morph.InflType != null)
mb.InflTypeRA = morph.InflType;
if (morph.GuessedString != null)
{
// Override default Form with GuessedString.
int vernWS = m_cache.DefaultVernWs;
mb.Form.set_String(vernWS, TsStringUtils.MakeString(morph.GuessedString, vernWS));
}
}
matches.Add(newAnal);
}
Expand Down
31 changes: 29 additions & 2 deletions Src/LexText/ParserCore/ParseResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ public bool MatchesIWfiAnalysis(IWfiAnalysis analysis)
foreach (IWfiMorphBundle mb in analysis.MorphBundlesOS)
{
var current = this.Morphs[i++];
if (mb.MorphRA == current.Form && mb.MsaRA == current.Msa && mb.InflTypeRA == current.InflType)
if (mb.MorphRA == current.Form && mb.MsaRA == current.Msa && mb.InflTypeRA == current.InflType &&
(current.GuessedString == null || EquivalentFormString(mb.Form, current.GuessedString)))
{
// Possibly matches condition (2), above.
mbMatch = true;
Expand All @@ -131,6 +132,16 @@ public bool MatchesIWfiAnalysis(IWfiAnalysis analysis)
return false;
}

private bool EquivalentFormString(IMultiString multiString, string formString)
{
foreach (int ws in multiString.AvailableWritingSystemIds)
{
if (multiString.get_String(ws).Text == formString)
return true;
}
return false;
}

public override int GetHashCode()
{
int code = 23;
Expand All @@ -145,17 +156,24 @@ public class ParseMorph : IEquatable<ParseMorph>
private readonly IMoForm m_form;
private readonly IMoMorphSynAnalysis m_msa;
private readonly ILexEntryInflType m_inflType;
private readonly string m_guessedString;

public ParseMorph(IMoForm form, IMoMorphSynAnalysis msa)
: this(form, msa, null)
{
}

public ParseMorph(IMoForm form, IMoMorphSynAnalysis msa, ILexEntryInflType inflType)
: this(form, msa, inflType, null)
{
}

public ParseMorph(IMoForm form, IMoMorphSynAnalysis msa, ILexEntryInflType inflType, string guessedString)
{
m_form = form;
m_msa = msa;
m_inflType = inflType;
m_guessedString = guessedString;
}

public IMoForm Form
Expand All @@ -173,14 +191,22 @@ public ILexEntryInflType InflType
get { return m_inflType; }
}

public string GuessedString
{
get { return m_guessedString; }
}

public bool IsValid
{
get { return Form.IsValidObject && Msa.IsValidObject && (m_inflType == null || m_inflType.IsValidObject); }
}

public bool Equals(ParseMorph other)
{
return m_form == other.m_form && m_msa == other.m_msa && m_inflType == other.m_inflType;
return m_form == other.m_form
&& m_msa == other.m_msa
&& m_inflType == other.m_inflType
&& m_guessedString == other.m_guessedString;
}

public override bool Equals(object obj)
Expand All @@ -195,6 +221,7 @@ public override int GetHashCode()
code = code * 31 + m_form.Guid.GetHashCode();
code = code * 31 + m_msa.Guid.GetHashCode();
code = code * 31 + (m_inflType == null ? 0 : m_inflType.Guid.GetHashCode());
code = code * 31 + (m_guessedString == null ? 0 : m_guessedString.GetHashCode());
return code;
}
}
Expand Down
Loading
Loading