From 365e335c4b0d9dc9760aa7539ddb7351463275c4 Mon Sep 17 00:00:00 2001 From: dehilsterlexis Date: Wed, 10 Jul 2024 21:31:44 -0400 Subject: [PATCH] NLP-ENGINE-422 Added ambig and meaning logic Signed-off-by: dehilsterlexis --- cs/libconsh/cg.cpp | 78 +++++++++++++++++++++++++-------- include/Api/consh/cg.h | 8 ++-- include/Api/lite/Arun.h | 1 + include/Api/lite/io.h | 95 +++++++++++++++++++++++++++++++++++++++++ lite/dicttok.cpp | 20 ++++++++- lite/dicttok.h | 5 +++ lite/fn.cpp | 74 ++++++++++++++++++++++++++++++++ lite/fn.h | 1 + lite/fnrun.cpp | 27 ++++++++++++ lite/func_defs.h | 1 + lite/funcs.h | 1 + lite/ivar.cpp | 46 ++++++++++++++++++++ lite/ivar.h | 2 + lite/var.cpp | 67 +++++++++++++++++++++++++++++ lite/var.h | 11 ++++- nlp/main.cpp | 2 +- 16 files changed, 415 insertions(+), 24 deletions(-) create mode 100644 include/Api/lite/io.h diff --git a/cs/libconsh/cg.cpp b/cs/libconsh/cg.cpp index f0396c3a..ed9e7ffb 100644 --- a/cs/libconsh/cg.cpp +++ b/cs/libconsh/cg.cpp @@ -53,6 +53,7 @@ All rights reserved. #include "cmd.h" #include "dyn.h" // 06/29/00 AM. #include "lite/dir.h" +#include "lite/io.h" #include "prim/unicu.h" using namespace unicu; @@ -628,24 +629,15 @@ _stprintf(path, _T("%s%c%s%c%s"), getAppdir(),DIR_CH, kbdir,DIR_CH, dir); _TCHAR infile[MAXPATH*2]; _TCHAR *suff; suff = _T("kb"); // Kb file suffix. -std::vector files; +std::vector kbfiles; +std::vector dictfiles; bool kbLoaded = false; bool bound = false; -if (openDict(files)) { - _stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff); - if (!readFile(infile)) - return false; - con_add_root(this); - bind_sys(this); - bound = true; - readDicts(files); - outputTime(_T("[READ dict files time="),s_time); - s_time = clock(); - kbLoaded = true; -} +openKBB(kbfiles); +openDict(dictfiles); -if (openKBB(files)) { +if (kbfiles.size() > 0) { if (!bound) { _stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff); if (!readFile(infile)) @@ -653,12 +645,25 @@ if (openKBB(files)) { bind_sys(this); con_add_root(this); } - readKBBs(files); + readKBBs(kbfiles); outputTime(_T("[READ kbb files time="),s_time); s_time = clock(); kbLoaded = true; } +if (dictfiles.size() > 0) { + _stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff); + if (!readFile(infile)) + return false; + con_add_root(this); + bind_sys(this); + bound = true; + readDicts(dictfiles,kbfiles); + outputTime(_T("[READ dict files time="),s_time); + s_time = clock(); + kbLoaded = true; +} + if (!kbLoaded) { // Using a master take file for readin kb. // 07/01/03 AM. @@ -3481,6 +3486,31 @@ if (!(word = kbm_->dict_find_word(str)) ) return word; } +CONCEPT *CG::matchDictKB(std::string dictFilename, std::vector kbfiles) { + if (kbfiles.size() == 0) return NULL; + std::vector::iterator ptr; + + _TCHAR buff[MAXSTR], buffkb[MAXSTR]; + _TCHAR *head, *headkb; + CONCEPT *con, *dictcon; + _tcscpy(buff, dictFilename.c_str()); + file_head(buff, head); + + for (ptr = kbfiles.begin(); ptr < kbfiles.end(); ptr++) { + _tcscpy(buffkb, ptr->string().c_str()); + file_head(buff, headkb); + + if (!_tcscmp(head,headkb)) { + con = findRoot(); + dictcon = findConcept(con,"dictionary"); + if (dictcon) { + return dictcon; + } + } + } + return NULL; +} + bool CG::openDict(std::vector& files) { bool found = false; files.clear(); @@ -3501,16 +3531,16 @@ bool CG::openDict(std::vector& files) { return found; } -bool CG::readDicts(std::vector files) { +bool CG::readDicts(std::vector files, std::vector kbfiles) { std::vector::iterator ptr; if (files.size() == 0) return false; for (ptr = files.begin(); ptr < files.end(); ptr++) { - readDict(ptr->string()); + readDict(ptr->string(), kbfiles); } return true; } -bool CG::readDict(std::string file) { +bool CG::readDict(std::string file, std::vector kbfiles) { bool dirty; CONCEPT *wordCon, *parentCon; _TCHAR buf[MAXMSG]; @@ -3520,6 +3550,8 @@ bool CG::readDict(std::string file) { int lineCount = 0; DICT_CALL caller; + CONCEPT *ambigKB = matchDictKB(file, kbfiles); + // For error printouts std::size_t botDirPos = file.find_last_of(DIR_CH); std::string filename = file.substr(botDirPos+1, file.length()-2); @@ -3705,6 +3737,16 @@ bool CG::readDict(std::string file) { } else { parentCon = wordCon; } + // add ambigous concept to word concept + if (ambigKB) { + CONCEPT *kbcon = findConcept(ambigKB,token); + if (kbcon) { + CONCEPT *ambigCon = NULL; + if (!findVal(wordCon,"meaning",ambigCon)) { + addVal(wordCon,_T("meaning"),kbcon); + } + } + } } else if (cc == '=') { int donothing = 1; diff --git a/include/Api/consh/cg.h b/include/Api/consh/cg.h index d7168a4d..d5771ae6 100644 --- a/include/Api/consh/cg.h +++ b/include/Api/consh/cg.h @@ -25,6 +25,7 @@ All rights reserved. #include #include +#include "lite/global.h" #define FNAMESIZ 256 #define NAMESIZ 256 @@ -212,9 +213,10 @@ class LIBCONSH_API CG // Find dictionary concept for given string. // 06/29/03 AM. CONCEPT *findWordConcept(_TCHAR*); // 06/29/03 AM. + CONCEPT *matchDictKB(std::string dictFilename, std::vector kbfiles); bool openDict(std::vector& files); - bool readDicts(std::vector files); - bool readDict(std::string file); + bool readDicts(std::vector files, std::vector kbfiles); + bool readDict(std::string file, std::vector kbfiles); bool openKBB(std::vector& files); bool readKBBs(std::vector files); @@ -608,7 +610,7 @@ class LIBCONSH_API CG private: _TCHAR appdir_[FNAMESIZ]; // Base directory path. - _TCHAR kbdir_[FNAMESIZ]; + _TCHAR kbdir_[MAXPATH]; std::_t_ifstream allDictStream_; diff --git a/include/Api/lite/Arun.h b/include/Api/lite/Arun.h index 9825f542..be42883c 100644 --- a/include/Api/lite/Arun.h +++ b/include/Api/lite/Arun.h @@ -1283,6 +1283,7 @@ class LITE_API Arun static RFASem *pnprev(Nlppp*,NODE*); // 01/08/01 AM. static RFASem *pnprev(Nlppp*,RFASem*); // 04/29/01 AM. + static bool pnremoveval(Nlppp*,NODE*,_TCHAR*); static _TCHAR *pnrename(Nlppp*,NODE*,_TCHAR*); // 01/08/01 AM. static _TCHAR *pnrename(Nlppp*,RFASem*,_TCHAR*); // 04/28/01 AM. static _TCHAR *pnrename(Nlppp*,NODE*,RFASem*); // 04/28/01 AM. diff --git a/include/Api/lite/io.h b/include/Api/lite/io.h new file mode 100644 index 00000000..09fed755 --- /dev/null +++ b/include/Api/lite/io.h @@ -0,0 +1,95 @@ +/******************************************************************************* +Copyright (c) 2001-2010 by Text Analysis International, Inc. +All rights reserved. +******************************************************************************** +* +* NAME: IO.H +* FILE: lite\io.h +* CR: 10/06/98 AM. +* SUBJ: Declares for I/O functions. +* +*******************************************************************************/ + +//#include "fstream.h" + +void copy_file(const _TCHAR *, const _TCHAR *); +bool file_exists(const _TCHAR *iname); // 12/14/98 AM. +void file_to_buffer(const _TCHAR *, _TCHAR *, + /*UP*/ long &len // 05/28/00 /AM. + ); +_TCHAR *pretty_char(_TCHAR); +_TCHAR *pretty_str( + _TCHAR *str, // String to be prettified. + _TCHAR *buf, // Buffer for placing prettified string. + long size = -1 // Buffer size. (-1 means count not used.) + ); +_TCHAR *c_char(_TCHAR,_TCHAR*); // 05/10/00 AM. +_TCHAR *c_str( // 05/10/00 AM. + _TCHAR *str, // String to be prettified. + _TCHAR *buf, // Buffer for placing prettified string. + long size = -1 // Buffer size. (-1 means count not used.) + ); + + +void read_file( + _TCHAR *fname, // The filename + /*UP*/ + long &len, // Length of file. + _TCHAR* &buf // Buffer to create. + ) + ; + +_TCHAR *next_token(/*DU*/ _TCHAR* &buf, bool &eol, _TCHAR *comment); + +bool fix_file_name( + _TCHAR *file, // Buffer big enough to hold extension. + _TCHAR *suff // File name extension needed. + ) + ; + +bool eq_str_range(_TCHAR *str, _TCHAR *ptr, long start, long end); + +_TCHAR *make_str(_TCHAR *str); +_TCHAR *make_str(_TCHAR *str, long len); + +bool strcat_e( + /*DU*/ _TCHAR* &ptr, + /*DN*/ _TCHAR *str, + /*DU*/ long &count + ); + + +bool strncat_e( + /*DU*/ _TCHAR* &ptr, + /*DN*/ _TCHAR *str, + /*DN*/ long len, + /*DU*/ long &count + ); + +std::_t_filebuf *directOutput(_TCHAR *fname); + +bool file_name( // 12/24/99 AM. + _TCHAR *file, // Buffer with full file string. + /*UP*/ + _TCHAR* &fname // Pointer to the name in buffer. + ); +bool file_path( // 12/24/99 AM. + _TCHAR *file, // Buffer with full file string. + /*UP*/ + _TCHAR* &fpath // Pointer to the path in buffer. + ); +bool file_parent( + _TCHAR *file, // Buffer with full file string. + /*UP*/ + _TCHAR* &fpath // Pointer to the path in buffer. + ); +bool file_head( // 12/24/99 AM. + _TCHAR *file, // Buffer with full file string. + /*UP*/ + _TCHAR* &fhead // Pointer to the file head in buffer. + ); +bool file_tail( // 12/24/99 AM. + _TCHAR *file, // Buffer with full file string. + /*UP*/ + _TCHAR* &ftail // Pointer to tail in buffer. + ); diff --git a/lite/dicttok.cpp b/lite/dicttok.cpp index ced6181a..b736a6ac 100644 --- a/lite/dicttok.cpp +++ b/lite/dicttok.cpp @@ -919,7 +919,11 @@ inline bool DICTTok::findAttrs(Node *node, CONCEPT *con, _TCHAR *str, bool i } else if (cg_->isValNum(vals)) { long long num = 0L; cg_->popVal(vals,num); - replaceNum(node,strattr,num); + replaceNum(node,strattr,num); + } else if (cg_->isValCon(vals)) { + CONCEPT *con = NULL; + cg_->popVal(vals,con); + replaceCon(node,strattr,con); } else cg_->nextVal(vals); } @@ -991,6 +995,20 @@ return Ivar::nodeReplaceval(pn, name, val); } +inline bool DICTTok::replaceCon( + Node *node, + _TCHAR *name, // variable name. + CONCEPT *con + ) +{ +if (!node) + return false; + +Pn *pn = node->getData(); + +return Ivar::nodeReplaceval(pn, name, con); +} + /******************************************** * FN: POPSVAL * CR: 08/01/11 AM. diff --git a/lite/dicttok.h b/lite/dicttok.h index 2db58f88..c73ff05f 100644 --- a/lite/dicttok.h +++ b/lite/dicttok.h @@ -122,6 +122,11 @@ class DICTTok : public Algo _TCHAR *name, // variable name. _TCHAR *str ); + inline bool replaceCon( + Node *node, + _TCHAR *name, + CONCEPT *con + ); inline _TCHAR *popsval( VAL *val ); diff --git a/lite/fn.cpp b/lite/fn.cpp index be3bf5ae..40b626de 100644 --- a/lite/fn.cpp +++ b/lite/fn.cpp @@ -480,6 +480,8 @@ switch (fnid) // 12/21/01 AM. return fnPnprev(args,nlppp,/*UP*/sem); // 10/18/00 AM. case FNpnpushval: return fnPnpushval(args,nlppp,/*UP*/sem); // 12/12/14 AM. + case FNpnremoveval: + return fnPnremoveval(args,nlppp,/*UP*/sem); case FNpnreplaceval: return fnPnreplaceval(args,nlppp,/*UP*/sem); // 06/27/01 AM. case FNpnroot: @@ -9819,6 +9821,78 @@ return false; // 12/15/14 AM. } +/******************************************** +* FN: FNPNREMOVEVAL +* CR: 07/09/24 +* SUBJ: Remove value of a PNODE variable. +* RET: True if ok, else false. +* FORMS: pnremoveval(pnode, var_str, int_val/str_val/sem_val) +********************************************/ + +bool Fn::fnPnremoveval( + Delt *args, + Nlppp *nlppp, + /*UP*/ + RFASem* &sem + ) +{ +sem = 0; +Parse *parse = nlppp->parse_; + +enum Iargtype typ; + +RFASem *sem1; +_TCHAR *name1=0; + +if (!Arg::sem1(_T("pnremoveval"),nlppp,(DELTS*&)args,sem1)) + return false; +if (!Arg::str1(_T("pnremoveval"), /*UP*/ (DELTS*&)args, name1)) + return false; +if (!Arg::done((DELTS*)args, _T("pnremoveval"),parse)) + return false; + +if (!sem1) + { + _stprintf(Errbuf,_T("[pnremoveval: Warning. Given no pnode.]")); + return parse->errOut(true); // UNFIXED + } +if (!name1) + { + _stprintf(Errbuf,_T("[pnremoveval: Warning. Given no name.]")); + return parse->errOut(true); // UNFIXED + } + +// Get object from sem. +if (sem1->getType() != RSNODE) + { + _stprintf(Errbuf,_T("[pnremoveval: Bad semantic arg.]")); + return parse->errOut(false); // UNFIXED + } + +Node *node = sem1->getNode(); + +if (!node) + { + _stprintf(Errbuf,_T("[pnremoveval: Couldn't fetch node.]")); + return parse->errOut(true); // UNFIXED + } + +if (*name1 == '$') // Get special var. + { + _stprintf(Errbuf,_T("[pnremoveval: Variable name can't start with '$'.]")); + return parse->errOut(true); // UNFIXED + } + +Pn *pn = node->getData(); + +_TCHAR *name2=0; +nlppp->parse_->internStr(name1, /*UP*/name2); // Intern str. + +Ivar::nodeRemoveval(pn, name2); +return true; +} + + /******************************************** * FN: FNPNREPLACEVAL * CR: 06/27/01 AM. diff --git a/lite/fn.h b/lite/fn.h index 843bd018..318f58b0 100644 --- a/lite/fn.h +++ b/lite/fn.h @@ -948,6 +948,7 @@ static bool fnStruniquechars( RFASem* &sem ); static bool fnPnmakevar(Delt*,Nlppp*,/*UP*/RFASem*&); // 06/26/01 AM. + static bool fnPnremoveval(Delt*,Nlppp*,/*UP*/RFASem*&); static bool fnPnreplaceval(Delt*,Nlppp*,/*UP*/RFASem*&); // 06/27/01 AM. static bool fnPnpushval(Delt*,Nlppp*,/*UP*/RFASem*&); // 12/12/14 AM. static bool fnPnrpushval(Delt*,Nlppp*,/*UP*/RFASem*&); // 12/12/14 AM. diff --git a/lite/fnrun.cpp b/lite/fnrun.cpp index 6a5e9612..699a0fa5 100644 --- a/lite/fnrun.cpp +++ b/lite/fnrun.cpp @@ -8569,6 +8569,33 @@ return pnprev(nlppp,pnode); } +bool Arun::pnremoveval( + Nlppp *nlppp, + NODE *nd, + _TCHAR *name1 + ) +{ +if (!nd || !name1 || !*name1) + return false; + +Node *node = (Node *) nd; + +if (*name1 == '$') // Get special var. + { + _stprintf(Errbuf,_T("[pnremoveval: Variable name can't start with '$'.]")); + return errOut(false); // UNFIXED + } + +_TCHAR *name2=0; // 07/24/07 AM. +nlppp->parse_->internStr(name1, /*UP*/name2); // Intern str. // 07/24/07 AM. + +Pn *pn = node->getData(); +Ivar::nodeRemoveval(pn,name2); + +return true; +} + + /******************************************** * FN: PNRENAME * CR: 01/08/01 AM. diff --git a/lite/func_defs.h b/lite/func_defs.h index 1269f986..f6bd76cc 100644 --- a/lite/func_defs.h +++ b/lite/func_defs.h @@ -168,6 +168,7 @@ enum funcDef FNpnnext, FNpnprev, FNpnpushval, // 12/12/14 AM. + FNpnremoveval, FNpnrename, FNpnreplaceval, FNpnroot, diff --git a/lite/funcs.h b/lite/funcs.h index cfc005da..e3ba18e0 100644 --- a/lite/funcs.h +++ b/lite/funcs.h @@ -162,6 +162,7 @@ _T("pnname"), _T("pnnext"), _T("pnprev"), _T("pnpushval"), // 12/12/14 AM. +_T("pnremoveval"), _T("pnrename"), _T("pnreplaceval"), _T("pnroot"), diff --git a/lite/ivar.cpp b/lite/ivar.cpp index fe09a286..3733388b 100644 --- a/lite/ivar.cpp +++ b/lite/ivar.cpp @@ -2330,6 +2330,52 @@ return false; } +bool Ivar::nodeRemoveval( + Pn *pn, + _TCHAR *name + ) +{ +if (!pn || !name || !*name) + return false; + +Dlist *dlist = pn->getDsem(); +if (!dlist) + dlist = new Dlist(); // Empty list. + +int argCount = 0; +if (!Var::rmVal(name, dlist, argCount)) + return false; + +pn->setDsem(dlist); +return true; +} + + +bool Ivar::nodeReplaceval( + Pn *pn, + _TCHAR *name, + CONCEPT *con, + bool bRM, + bool bPUSH + ) +{ +if (!pn || !name || !*name || !con) + return false; + +Dlist *dlist = pn->getDsem(); +if (!dlist) + dlist = new Dlist(); // Empty list. + +RFASem *semval = new RFASem(con,RS_KBCONCEPT); + +if (!Var::setVal(name, semval,bRM,bPUSH,dlist)) + return false; + +pn->setDsem(dlist); +return true; +} + + /******************************************** * FN: NODEREPLACEVAL * CR: 06/26/01 AM. diff --git a/lite/ivar.h b/lite/ivar.h index 78d44f6f..e4e85faf 100644 --- a/lite/ivar.h +++ b/lite/ivar.h @@ -206,7 +206,9 @@ class Ivar static bool nodeVarEQ(Pn*,_TCHAR*,_TCHAR*); // 06/16/05 AM. static bool nodeVarEQ(Pn*,_TCHAR*,long long); // 06/16/05 AM. static bool nodeVarGTLT(Pn*,_TCHAR*,long long,bool=false); + static bool nodeRemoveval(Pn*,_TCHAR*); static bool nodeReplaceval(Pn*,_TCHAR*,RFASem*,bool=true,bool=false); // 06/26/01 AM. + static bool nodeReplaceval(Pn*,_TCHAR*,CONCEPT*,bool=true,bool=false); static bool nodeReplaceval(Pn*,_TCHAR*,Dlist*); // 11/14/02 AM. static bool nodeReplaceval(Pn*,_TCHAR*,_TCHAR*,bool=true,bool=false); // 06/26/01 AM. static bool nodeReplaceval(Pn*,_TCHAR*,long long,bool=true,bool=false); // 06/26/01 AM. diff --git a/lite/var.cpp b/lite/var.cpp index 77a7e844..d05a4c64 100644 --- a/lite/var.cpp +++ b/lite/var.cpp @@ -1351,6 +1351,29 @@ else return true; } + +/******************************************** +* FN: RMVAL +* CR: 07/09/24 Dd. +* SUBJ: Remove a value +* NOTE: +********************************************/ + +bool Var::rmVal( + _TCHAR *name, + Dlist* &dlist, + int &argCount + ) +{ +if (!name) + return false; + +Ipair *pairx = 0; +Var::remove(name, dlist); + +return true; +} + ////// /******************************************** @@ -1689,6 +1712,50 @@ return true; // Didn't find it. That's ok too. } +/******************************************** +* FN: remove +* CR: 07/10/24 Dd. +* SUBJ: Rmove variable from a list. +* RET: True if ok, else false. +* NOTE: +********************************************/ + +bool Var::remove( + _TCHAR *name, // Variable name to find. + Dlist *dlist // List of variable-value pairs. + ) +{ +if (!name) + { + std::_t_strstream gerrStr; + gerrStr << _T("[Var::find: given null name.]") << std::ends; + errOut(&gerrStr,false); + return false; + } + +Delt *delt = 0; +Ipair *pr = 0; +if (dlist) + { + for (delt = dlist->getFirst(); delt; delt = delt->Right()) + { + pr = delt->getData(); + if (!_tcscmp(name, pr->getKey())) + { + Delt *left = delt->Left(); + Delt *right = delt->Right(); + left->setRight(delt->Right()); + right->setLeft(delt->Left()); + delete delt; + return true; + } + } + } + +return true; // Didn't find it. That's ok too. +} + + /******************************************** * FN: VAL * CR: 12/08/98 AM. diff --git a/lite/var.h b/lite/var.h index afaefcf5..a9fa10fb 100644 --- a/lite/var.h +++ b/lite/var.h @@ -97,9 +97,15 @@ class Var /*DU*/ Dlist* &dlist ); + static bool delVar( // 02/22/00 AM. + _TCHAR *name, + Dlist* &dlist + ); + static bool setVal(_TCHAR*,long long,bool,bool,/*DU*/Dlist*&); // 06/26/01 AM. static bool setVal(_TCHAR*,float,bool,bool,/*DU*/Dlist*&); // 08/18/01 AM. + static bool rmVal(_TCHAR*,Dlist*&,int&); static bool setVal(Ipair*,long long); // 11/15/99 AM. static bool setVal(Ipair*,float); // 08/18/01 AM. @@ -108,7 +114,6 @@ class Var static bool setVal(Ipair*,RFASem*); // 02/22/00 AM. static bool setVal(Ipair*,Dlist*); // 08/10/02 AM. - static bool pushVal( // 12/14/14 AM. _TCHAR *name, _TCHAR *str, @@ -127,6 +132,10 @@ class Var /*DU*/ Ipair* &pair // Pair found or null. ); + static bool remove( + _TCHAR *name, // Variable name to find. + Dlist *dlist // List of variable-value pairs. + ); static bool val( // 08/03/99 AM. _TCHAR *name, Dlist *dlist, /*DU*/ _TCHAR *str); static bool val( // 08/06/99 AM. diff --git a/nlp/main.cpp b/nlp/main.cpp index b038cf3b..c5d5a2dc 100644 --- a/nlp/main.cpp +++ b/nlp/main.cpp @@ -15,7 +15,7 @@ All rights reserved. #include "lite/nlp_engine.h" #include "version.h" -#define NLP_ENGINE_VERSION "2.11.9" +#define NLP_ENGINE_VERSION "2.12.0" bool cmdReadArgs(int,_TCHAR*argv[],_TCHAR*&,_TCHAR*&,_TCHAR*&,_TCHAR*&,bool&,bool&,bool&); void cmdHelpargs(_TCHAR*);