Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NLP-ENGINE-422 Added ambig and meaning logic #499

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
"version": "0.2.0",
"configurations": [
{
"name": "Corp Test (linux)",
"name": "Ambig (linux)",
"type": "cppdbg",
"request": "launch",
"program": "/home/dehilster/nlp-engine/bin/nlp",
"args": ["-ANA","/home/dehilster/analyzers/Test","-WORK","/home/dehilster/nlp-engine/","/home/dehilster/analyzers/Test/input/após.txt","-DEV"],
"args": ["-ANA","/home/dehilster/analyzers/Ambig","-WORK","/home/dehilster/nlp-engine/","/home/dehilster/analyzers/Ambig/input/após.txt","-DEV"],
"stopAtEntry": false,
"cwd": "/home/dehilster/nlp-engine/",
"environment": [],
Expand All @@ -23,6 +23,25 @@
}
]
},
{
"name": "Test (win)",
"type": "cppvsdbg",
"request": "launch",
"program": "C:\\dev\\nlp-engine\\bin\\Debug\\nlp.exe",
"args": ["-ANA","C:\\dev\\analyzers\\Test","-WORK","C:\\dev\\nlp-engine","C:\\dev\\analyzers\\Test\\input\\test.txt","-DEV"],
"stopAtEntry": false,
"cwd": "C:\\dev\\nlp-engine",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
},
{
"name": "Parse EN-US (win)",
"type": "cppvsdbg",
Expand Down
78 changes: 60 additions & 18 deletions cs/libconsh/cg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ All rights reserved.
#include "cmd.h"
#include "dyn.h" // 06/29/00 AM.
#include "lite/dir.h"
#include "lite/io.h"

#include "prim/unicu.h"
using namespace unicu;
Expand Down Expand Up @@ -628,37 +629,41 @@ _stprintf(path, _T("%s%c%s%c%s"), getAppdir(),DIR_CH, kbdir,DIR_CH, dir);
_TCHAR infile[MAXPATH*2];
_TCHAR *suff;
suff = _T("kb"); // Kb file suffix.
std::vector<std::filesystem::path> files;
std::vector<std::filesystem::path> kbfiles;
std::vector<std::filesystem::path> dictfiles;
bool kbLoaded = false;
bool bound = false;

if (openDict(files)) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(files);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}
openKBB(kbfiles);
openDict(dictfiles);

if (openKBB(files)) {
if (kbfiles.size() > 0) {
if (!bound) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
bind_sys(this);
con_add_root(this);
}
readKBBs(files);
readKBBs(kbfiles);
outputTime(_T("[READ kbb files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (dictfiles.size() > 0) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(dictfiles,kbfiles);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (!kbLoaded) {

// Using a master take file for readin kb. // 07/01/03 AM.
Expand Down Expand Up @@ -3481,6 +3486,31 @@ if (!(word = kbm_->dict_find_word(str)) )
return word;
}

CONCEPT *CG::matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles) {
if (kbfiles.size() == 0) return NULL;
std::vector<std::filesystem::path>::iterator ptr;

_TCHAR buff[MAXSTR], buffkb[MAXSTR];
_TCHAR *head, *headkb;
CONCEPT *con, *dictcon;
_tcscpy(buff, dictFilename.c_str());
file_head(buff, head);

for (ptr = kbfiles.begin(); ptr < kbfiles.end(); ptr++) {
_tcscpy(buffkb, ptr->string().c_str());
file_head(buff, headkb);

if (!_tcscmp(head,headkb)) {
con = findRoot();
dictcon = findConcept(con,"dictionary");
if (dictcon) {
return dictcon;
}
}
}
return NULL;
}

bool CG::openDict(std::vector<std::filesystem::path>& files) {
bool found = false;
files.clear();
Expand All @@ -3501,16 +3531,16 @@ bool CG::openDict(std::vector<std::filesystem::path>& files) {
return found;
}

bool CG::readDicts(std::vector<std::filesystem::path> files) {
bool CG::readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles) {
std::vector<std::filesystem::path>::iterator ptr;
if (files.size() == 0) return false;
for (ptr = files.begin(); ptr < files.end(); ptr++) {
readDict(ptr->string());
readDict(ptr->string(), kbfiles);
}
return true;
}

bool CG::readDict(std::string file) {
bool CG::readDict(std::string file, std::vector<std::filesystem::path> kbfiles) {
bool dirty;
CONCEPT *wordCon, *parentCon;
_TCHAR buf[MAXMSG];
Expand All @@ -3520,6 +3550,8 @@ bool CG::readDict(std::string file) {
int lineCount = 0;
DICT_CALL caller;

CONCEPT *ambigKB = matchDictKB(file, kbfiles);

// For error printouts
std::size_t botDirPos = file.find_last_of(DIR_CH);
std::string filename = file.substr(botDirPos+1, file.length()-2);
Expand Down Expand Up @@ -3705,6 +3737,16 @@ bool CG::readDict(std::string file) {
} else {
parentCon = wordCon;
}
// add ambigous concept to word concept
if (ambigKB) {
CONCEPT *kbcon = findConcept(ambigKB,token);
if (kbcon) {
CONCEPT *ambigCon = NULL;
if (!findVal(wordCon,"meaning",ambigCon)) {
addVal(wordCon,_T("meaning"),kbcon);
}
}
}

} else if (cc == '=') {
int donothing = 1;
Expand Down
8 changes: 5 additions & 3 deletions include/Api/consh/cg.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ All rights reserved.

#define FNAMESIZ 256
#define NAMESIZ 256
#define MAXPATH 2048
//#define PATHSIZ 512
// SIZES // 03/25/13 AM.
// Messes up concept display in Attribute Editor. // 09/14/13 AM.
Expand Down Expand Up @@ -212,9 +213,10 @@ class LIBCONSH_API CG
// Find dictionary concept for given string. // 06/29/03 AM.
CONCEPT *findWordConcept(_TCHAR*); // 06/29/03 AM.

CONCEPT *matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles);
bool openDict(std::vector<std::filesystem::path>& files);
bool readDicts(std::vector<std::filesystem::path> files);
bool readDict(std::string file);
bool readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles);
bool readDict(std::string file, std::vector<std::filesystem::path> kbfiles);

bool openKBB(std::vector<std::filesystem::path>& files);
bool readKBBs(std::vector<std::filesystem::path> files);
Expand Down Expand Up @@ -608,7 +610,7 @@ class LIBCONSH_API CG

private:
_TCHAR appdir_[FNAMESIZ]; // Base directory path.
_TCHAR kbdir_[FNAMESIZ];
_TCHAR kbdir_[MAXPATH];

std::_t_ifstream allDictStream_;

Expand Down
1 change: 1 addition & 0 deletions include/Api/lite/Arun.h
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,7 @@ class LITE_API Arun
static RFASem *pnprev(Nlppp*,NODE*); // 01/08/01 AM.
static RFASem *pnprev(Nlppp*,RFASem*); // 04/29/01 AM.

static bool pnremoveval(Nlppp*,NODE*,_TCHAR*);
static _TCHAR *pnrename(Nlppp*,NODE*,_TCHAR*); // 01/08/01 AM.
static _TCHAR *pnrename(Nlppp*,RFASem*,_TCHAR*); // 04/28/01 AM.
static _TCHAR *pnrename(Nlppp*,NODE*,RFASem*); // 04/28/01 AM.
Expand Down
5 changes: 0 additions & 5 deletions include/Api/lite/dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ All rights reserved.
#include <filesystem>
#include <regex>

LITE_API bool plain_file(struct _tfinddata_t &fileinfo);
LITE_API bool is_dir(struct _tfinddata_t &fileinfo);
LITE_API bool is_file(_TCHAR *name);
LITE_API bool path_exists(_TCHAR *name);

LITE_API void rm_path(
_TCHAR *infile, // The path (file, directory, or directory tree).
bool tree // True if removing subdirectories recursively.
Expand Down
94 changes: 94 additions & 0 deletions include/Api/lite/io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*******************************************************************************
Copyright (c) 2001-2010 by Text Analysis International, Inc.
All rights reserved.
********************************************************************************
*
* NAME: IO.H
* FILE: lite\io.h
* CR: 10/06/98 AM.
* SUBJ: Declares for I/O functions.
*
*******************************************************************************/

//#include "fstream.h"

void copy_file(const _TCHAR *, const _TCHAR *);
bool file_exists(const _TCHAR *iname); // 12/14/98 AM.
void file_to_buffer(const _TCHAR *, _TCHAR *,
/*UP*/ long &len // 05/28/00 /AM.
);
_TCHAR *pretty_char(_TCHAR);
_TCHAR *pretty_str(
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size
);
_TCHAR *c_char(_TCHAR,_TCHAR*); // 05/10/00 AM.
_TCHAR *c_str( // 05/10/00 AM.
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size // Buffer size. (-1 means count not used.)
);


void read_file(
_TCHAR *fname, // The filename
/*UP*/
long &len, // Length of file.
_TCHAR* &buf // Buffer to create.
)
;

_TCHAR *next_token(/*DU*/ _TCHAR* &buf, bool &eol, _TCHAR *comment);

bool fix_file_name(
_TCHAR *file, // Buffer big enough to hold extension.
_TCHAR *suff // File name extension needed.
)
;

bool eq_str_range(_TCHAR *str, _TCHAR *ptr, long start, long end);

_TCHAR *make_str(_TCHAR *str);
_TCHAR *make_str(_TCHAR *str, long len);

bool strcat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DU*/ long &count
);


bool strncat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DN*/ long len,
/*DU*/ long &count
);


bool file_name( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fname // Pointer to the name in buffer.
);
bool file_path( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_parent(
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_head( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fhead // Pointer to the file head in buffer.
);
bool file_tail( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &ftail // Pointer to tail in buffer.
);
20 changes: 19 additions & 1 deletion lite/dicttok.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,11 @@ inline bool DICTTok::findAttrs(Node<Pn> *node, CONCEPT *con, _TCHAR *str, bool i
} else if (cg_->isValNum(vals)) {
long long num = 0L;
cg_->popVal(vals,num);
replaceNum(node,strattr,num);
replaceNum(node,strattr,num);
} else if (cg_->isValCon(vals)) {
CONCEPT *con = NULL;
cg_->popVal(vals,con);
replaceCon(node,strattr,con);
} else
cg_->nextVal(vals);
}
Expand Down Expand Up @@ -991,6 +995,20 @@ return Ivar::nodeReplaceval(pn, name, val);
}


inline bool DICTTok::replaceCon(
Node<Pn> *node,
_TCHAR *name, // variable name.
CONCEPT *con
)
{
if (!node)
return false;

Pn *pn = node->getData();

return Ivar::nodeReplaceval(pn, name, con);
}

/********************************************
* FN: POPSVAL
* CR: 08/01/11 AM.
Expand Down
5 changes: 5 additions & 0 deletions lite/dicttok.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ class DICTTok : public Algo
_TCHAR *name, // variable name.
_TCHAR *str
);
inline bool replaceCon(
Node<Pn> *node,
_TCHAR *name,
CONCEPT *con
);
inline _TCHAR *popsval(
VAL *val
);
Expand Down
Loading
Loading