Skip to content

Commit

Permalink
NLP-ENGINE-422 Added ambig and meaning logic
Browse files Browse the repository at this point in the history
Signed-off-by: dehilsterlexis <[email protected]>
  • Loading branch information
dehilsterlexis committed Jul 11, 2024
1 parent 0ed18f6 commit 365e335
Show file tree
Hide file tree
Showing 16 changed files with 415 additions and 24 deletions.
78 changes: 60 additions & 18 deletions cs/libconsh/cg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ All rights reserved.
#include "cmd.h"
#include "dyn.h" // 06/29/00 AM.
#include "lite/dir.h"
#include "lite/io.h"

#include "prim/unicu.h"
using namespace unicu;
Expand Down Expand Up @@ -628,37 +629,41 @@ _stprintf(path, _T("%s%c%s%c%s"), getAppdir(),DIR_CH, kbdir,DIR_CH, dir);
_TCHAR infile[MAXPATH*2];
_TCHAR *suff;
suff = _T("kb"); // Kb file suffix.
std::vector<std::filesystem::path> files;
std::vector<std::filesystem::path> kbfiles;
std::vector<std::filesystem::path> dictfiles;
bool kbLoaded = false;
bool bound = false;

if (openDict(files)) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(files);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}
openKBB(kbfiles);
openDict(dictfiles);

if (openKBB(files)) {
if (kbfiles.size() > 0) {
if (!bound) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
bind_sys(this);
con_add_root(this);
}
readKBBs(files);
readKBBs(kbfiles);
outputTime(_T("[READ kbb files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (dictfiles.size() > 0) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(dictfiles,kbfiles);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (!kbLoaded) {

// Using a master take file for readin kb. // 07/01/03 AM.
Expand Down Expand Up @@ -3481,6 +3486,31 @@ if (!(word = kbm_->dict_find_word(str)) )
return word;
}

CONCEPT *CG::matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles) {
if (kbfiles.size() == 0) return NULL;
std::vector<std::filesystem::path>::iterator ptr;

_TCHAR buff[MAXSTR], buffkb[MAXSTR];
_TCHAR *head, *headkb;
CONCEPT *con, *dictcon;
_tcscpy(buff, dictFilename.c_str());
file_head(buff, head);

for (ptr = kbfiles.begin(); ptr < kbfiles.end(); ptr++) {
_tcscpy(buffkb, ptr->string().c_str());
file_head(buff, headkb);

if (!_tcscmp(head,headkb)) {
con = findRoot();
dictcon = findConcept(con,"dictionary");
if (dictcon) {
return dictcon;
}
}
}
return NULL;
}

bool CG::openDict(std::vector<std::filesystem::path>& files) {
bool found = false;
files.clear();
Expand All @@ -3501,16 +3531,16 @@ bool CG::openDict(std::vector<std::filesystem::path>& files) {
return found;
}

bool CG::readDicts(std::vector<std::filesystem::path> files) {
bool CG::readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles) {
std::vector<std::filesystem::path>::iterator ptr;
if (files.size() == 0) return false;
for (ptr = files.begin(); ptr < files.end(); ptr++) {
readDict(ptr->string());
readDict(ptr->string(), kbfiles);
}
return true;
}

bool CG::readDict(std::string file) {
bool CG::readDict(std::string file, std::vector<std::filesystem::path> kbfiles) {
bool dirty;
CONCEPT *wordCon, *parentCon;
_TCHAR buf[MAXMSG];
Expand All @@ -3520,6 +3550,8 @@ bool CG::readDict(std::string file) {
int lineCount = 0;
DICT_CALL caller;

CONCEPT *ambigKB = matchDictKB(file, kbfiles);

// For error printouts
std::size_t botDirPos = file.find_last_of(DIR_CH);
std::string filename = file.substr(botDirPos+1, file.length()-2);
Expand Down Expand Up @@ -3705,6 +3737,16 @@ bool CG::readDict(std::string file) {
} else {
parentCon = wordCon;
}
// add ambigous concept to word concept
if (ambigKB) {
CONCEPT *kbcon = findConcept(ambigKB,token);
if (kbcon) {
CONCEPT *ambigCon = NULL;
if (!findVal(wordCon,"meaning",ambigCon)) {
addVal(wordCon,_T("meaning"),kbcon);
}
}
}

} else if (cc == '=') {
int donothing = 1;
Expand Down
8 changes: 5 additions & 3 deletions include/Api/consh/cg.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ All rights reserved.

#include <vector>
#include <filesystem>
#include "lite/global.h"

#define FNAMESIZ 256
#define NAMESIZ 256
Expand Down Expand Up @@ -212,9 +213,10 @@ class LIBCONSH_API CG
// Find dictionary concept for given string. // 06/29/03 AM.
CONCEPT *findWordConcept(_TCHAR*); // 06/29/03 AM.

CONCEPT *matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles);
bool openDict(std::vector<std::filesystem::path>& files);
bool readDicts(std::vector<std::filesystem::path> files);
bool readDict(std::string file);
bool readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles);
bool readDict(std::string file, std::vector<std::filesystem::path> kbfiles);

bool openKBB(std::vector<std::filesystem::path>& files);
bool readKBBs(std::vector<std::filesystem::path> files);
Expand Down Expand Up @@ -608,7 +610,7 @@ class LIBCONSH_API CG

private:
_TCHAR appdir_[FNAMESIZ]; // Base directory path.
_TCHAR kbdir_[FNAMESIZ];
_TCHAR kbdir_[MAXPATH];

std::_t_ifstream allDictStream_;

Expand Down
1 change: 1 addition & 0 deletions include/Api/lite/Arun.h
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,7 @@ class LITE_API Arun
static RFASem *pnprev(Nlppp*,NODE*); // 01/08/01 AM.
static RFASem *pnprev(Nlppp*,RFASem*); // 04/29/01 AM.

static bool pnremoveval(Nlppp*,NODE*,_TCHAR*);
static _TCHAR *pnrename(Nlppp*,NODE*,_TCHAR*); // 01/08/01 AM.
static _TCHAR *pnrename(Nlppp*,RFASem*,_TCHAR*); // 04/28/01 AM.
static _TCHAR *pnrename(Nlppp*,NODE*,RFASem*); // 04/28/01 AM.
Expand Down
95 changes: 95 additions & 0 deletions include/Api/lite/io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*******************************************************************************
Copyright (c) 2001-2010 by Text Analysis International, Inc.
All rights reserved.
********************************************************************************
*
* NAME: IO.H
* FILE: lite\io.h
* CR: 10/06/98 AM.
* SUBJ: Declares for I/O functions.
*
*******************************************************************************/

//#include "fstream.h"

void copy_file(const _TCHAR *, const _TCHAR *);
bool file_exists(const _TCHAR *iname); // 12/14/98 AM.
void file_to_buffer(const _TCHAR *, _TCHAR *,
/*UP*/ long &len // 05/28/00 /AM.
);
_TCHAR *pretty_char(_TCHAR);
_TCHAR *pretty_str(
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size = -1 // Buffer size. (-1 means count not used.)
);
_TCHAR *c_char(_TCHAR,_TCHAR*); // 05/10/00 AM.
_TCHAR *c_str( // 05/10/00 AM.
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size = -1 // Buffer size. (-1 means count not used.)
);


void read_file(
_TCHAR *fname, // The filename
/*UP*/
long &len, // Length of file.
_TCHAR* &buf // Buffer to create.
)
;

_TCHAR *next_token(/*DU*/ _TCHAR* &buf, bool &eol, _TCHAR *comment);

bool fix_file_name(
_TCHAR *file, // Buffer big enough to hold extension.
_TCHAR *suff // File name extension needed.
)
;

bool eq_str_range(_TCHAR *str, _TCHAR *ptr, long start, long end);

_TCHAR *make_str(_TCHAR *str);
_TCHAR *make_str(_TCHAR *str, long len);

bool strcat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DU*/ long &count
);


bool strncat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DN*/ long len,
/*DU*/ long &count
);

std::_t_filebuf *directOutput(_TCHAR *fname);

bool file_name( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fname // Pointer to the name in buffer.
);
bool file_path( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_parent(
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_head( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fhead // Pointer to the file head in buffer.
);
bool file_tail( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &ftail // Pointer to tail in buffer.
);
20 changes: 19 additions & 1 deletion lite/dicttok.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,11 @@ inline bool DICTTok::findAttrs(Node<Pn> *node, CONCEPT *con, _TCHAR *str, bool i
} else if (cg_->isValNum(vals)) {
long long num = 0L;
cg_->popVal(vals,num);
replaceNum(node,strattr,num);
replaceNum(node,strattr,num);
} else if (cg_->isValCon(vals)) {
CONCEPT *con = NULL;
cg_->popVal(vals,con);
replaceCon(node,strattr,con);
} else
cg_->nextVal(vals);
}
Expand Down Expand Up @@ -991,6 +995,20 @@ return Ivar::nodeReplaceval(pn, name, val);
}


inline bool DICTTok::replaceCon(
Node<Pn> *node,
_TCHAR *name, // variable name.
CONCEPT *con
)
{
if (!node)
return false;

Pn *pn = node->getData();

return Ivar::nodeReplaceval(pn, name, con);
}

/********************************************
* FN: POPSVAL
* CR: 08/01/11 AM.
Expand Down
5 changes: 5 additions & 0 deletions lite/dicttok.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ class DICTTok : public Algo
_TCHAR *name, // variable name.
_TCHAR *str
);
inline bool replaceCon(
Node<Pn> *node,
_TCHAR *name,
CONCEPT *con
);
inline _TCHAR *popsval(
VAL *val
);
Expand Down
Loading

0 comments on commit 365e335

Please sign in to comment.