Skip to content

Commit

Permalink
Merge pull request #499 from dehilsterlexis/NLP-ENGINE-422
Browse files Browse the repository at this point in the history
NLP-ENGINE-422 Added ambig and meaning logic
  • Loading branch information
dehilsterlexis authored Jul 11, 2024
2 parents dcefb6d + e610897 commit 0ec9e4d
Show file tree
Hide file tree
Showing 22 changed files with 454 additions and 325 deletions.
23 changes: 21 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
"version": "0.2.0",
"configurations": [
{
"name": "Corp Test (linux)",
"name": "Ambig (linux)",
"type": "cppdbg",
"request": "launch",
"program": "/home/dehilster/nlp-engine/bin/nlp",
"args": ["-ANA","/home/dehilster/analyzers/Test","-WORK","/home/dehilster/nlp-engine/","/home/dehilster/analyzers/Test/input/após.txt","-DEV"],
"args": ["-ANA","/home/dehilster/analyzers/Ambig","-WORK","/home/dehilster/nlp-engine/","/home/dehilster/analyzers/Ambig/input/após.txt","-DEV"],
"stopAtEntry": false,
"cwd": "/home/dehilster/nlp-engine/",
"environment": [],
Expand All @@ -23,6 +23,25 @@
}
]
},
{
"name": "Test (win)",
"type": "cppvsdbg",
"request": "launch",
"program": "C:\\dev\\nlp-engine\\bin\\Debug\\nlp.exe",
"args": ["-ANA","C:\\dev\\analyzers\\Test","-WORK","C:\\dev\\nlp-engine","C:\\dev\\analyzers\\Test\\input\\test.txt","-DEV"],
"stopAtEntry": false,
"cwd": "C:\\dev\\nlp-engine",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
},
{
"name": "Parse EN-US (win)",
"type": "cppvsdbg",
Expand Down
78 changes: 60 additions & 18 deletions cs/libconsh/cg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ All rights reserved.
#include "cmd.h"
#include "dyn.h" // 06/29/00 AM.
#include "lite/dir.h"
#include "lite/io.h"

#include "prim/unicu.h"
using namespace unicu;
Expand Down Expand Up @@ -628,37 +629,41 @@ _stprintf(path, _T("%s%c%s%c%s"), getAppdir(),DIR_CH, kbdir,DIR_CH, dir);
_TCHAR infile[MAXPATH*2];
_TCHAR *suff;
suff = _T("kb"); // Kb file suffix.
std::vector<std::filesystem::path> files;
std::vector<std::filesystem::path> kbfiles;
std::vector<std::filesystem::path> dictfiles;
bool kbLoaded = false;
bool bound = false;

if (openDict(files)) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(files);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}
openKBB(kbfiles);
openDict(dictfiles);

if (openKBB(files)) {
if (kbfiles.size() > 0) {
if (!bound) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
bind_sys(this);
con_add_root(this);
}
readKBBs(files);
readKBBs(kbfiles);
outputTime(_T("[READ kbb files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (dictfiles.size() > 0) {
_stprintf(infile, _T("%s%chier.%s"), path,DIR_CH, suff);
if (!readFile(infile))
return false;
con_add_root(this);
bind_sys(this);
bound = true;
readDicts(dictfiles,kbfiles);
outputTime(_T("[READ dict files time="),s_time);
s_time = clock();
kbLoaded = true;
}

if (!kbLoaded) {

// Using a master take file for readin kb. // 07/01/03 AM.
Expand Down Expand Up @@ -3481,6 +3486,31 @@ if (!(word = kbm_->dict_find_word(str)) )
return word;
}

CONCEPT *CG::matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles) {
if (kbfiles.size() == 0) return NULL;
std::vector<std::filesystem::path>::iterator ptr;

_TCHAR buff[MAXSTR], buffkb[MAXSTR];
_TCHAR *head, *headkb;
CONCEPT *con, *dictcon;
_tcscpy(buff, dictFilename.c_str());
file_head(buff, head);

for (ptr = kbfiles.begin(); ptr < kbfiles.end(); ptr++) {
_tcscpy(buffkb, ptr->string().c_str());
file_head(buff, headkb);

if (!_tcscmp(head,headkb)) {
con = findRoot();
dictcon = findConcept(con,"dictionary");
if (dictcon) {
return dictcon;
}
}
}
return NULL;
}

bool CG::openDict(std::vector<std::filesystem::path>& files) {
bool found = false;
files.clear();
Expand All @@ -3501,16 +3531,16 @@ bool CG::openDict(std::vector<std::filesystem::path>& files) {
return found;
}

bool CG::readDicts(std::vector<std::filesystem::path> files) {
bool CG::readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles) {
std::vector<std::filesystem::path>::iterator ptr;
if (files.size() == 0) return false;
for (ptr = files.begin(); ptr < files.end(); ptr++) {
readDict(ptr->string());
readDict(ptr->string(), kbfiles);
}
return true;
}

bool CG::readDict(std::string file) {
bool CG::readDict(std::string file, std::vector<std::filesystem::path> kbfiles) {
bool dirty;
CONCEPT *wordCon, *parentCon;
_TCHAR buf[MAXMSG];
Expand All @@ -3520,6 +3550,8 @@ bool CG::readDict(std::string file) {
int lineCount = 0;
DICT_CALL caller;

CONCEPT *ambigKB = matchDictKB(file, kbfiles);

// For error printouts
std::size_t botDirPos = file.find_last_of(DIR_CH);
std::string filename = file.substr(botDirPos+1, file.length()-2);
Expand Down Expand Up @@ -3705,6 +3737,16 @@ bool CG::readDict(std::string file) {
} else {
parentCon = wordCon;
}
// add ambigous concept to word concept
if (ambigKB) {
CONCEPT *kbcon = findConcept(ambigKB,token);
if (kbcon) {
CONCEPT *ambigCon = NULL;
if (!findVal(wordCon,"meaning",ambigCon)) {
addVal(wordCon,_T("meaning"),kbcon);
}
}
}

} else if (cc == '=') {
int donothing = 1;
Expand Down
8 changes: 5 additions & 3 deletions include/Api/consh/cg.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ All rights reserved.

#define FNAMESIZ 256
#define NAMESIZ 256
#define MAXPATH 2048
//#define PATHSIZ 512
// SIZES // 03/25/13 AM.
// Messes up concept display in Attribute Editor. // 09/14/13 AM.
Expand Down Expand Up @@ -212,9 +213,10 @@ class LIBCONSH_API CG
// Find dictionary concept for given string. // 06/29/03 AM.
CONCEPT *findWordConcept(_TCHAR*); // 06/29/03 AM.

CONCEPT *matchDictKB(std::string dictFilename, std::vector<std::filesystem::path> kbfiles);
bool openDict(std::vector<std::filesystem::path>& files);
bool readDicts(std::vector<std::filesystem::path> files);
bool readDict(std::string file);
bool readDicts(std::vector<std::filesystem::path> files, std::vector<std::filesystem::path> kbfiles);
bool readDict(std::string file, std::vector<std::filesystem::path> kbfiles);

bool openKBB(std::vector<std::filesystem::path>& files);
bool readKBBs(std::vector<std::filesystem::path> files);
Expand Down Expand Up @@ -608,7 +610,7 @@ class LIBCONSH_API CG

private:
_TCHAR appdir_[FNAMESIZ]; // Base directory path.
_TCHAR kbdir_[FNAMESIZ];
_TCHAR kbdir_[MAXPATH];

std::_t_ifstream allDictStream_;

Expand Down
1 change: 1 addition & 0 deletions include/Api/lite/Arun.h
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,7 @@ class LITE_API Arun
static RFASem *pnprev(Nlppp*,NODE*); // 01/08/01 AM.
static RFASem *pnprev(Nlppp*,RFASem*); // 04/29/01 AM.

static bool pnremoveval(Nlppp*,NODE*,_TCHAR*);
static _TCHAR *pnrename(Nlppp*,NODE*,_TCHAR*); // 01/08/01 AM.
static _TCHAR *pnrename(Nlppp*,RFASem*,_TCHAR*); // 04/28/01 AM.
static _TCHAR *pnrename(Nlppp*,NODE*,RFASem*); // 04/28/01 AM.
Expand Down
5 changes: 0 additions & 5 deletions include/Api/lite/dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ All rights reserved.
#include <filesystem>
#include <regex>

LITE_API bool plain_file(struct _tfinddata_t &fileinfo);
LITE_API bool is_dir(struct _tfinddata_t &fileinfo);
LITE_API bool is_file(_TCHAR *name);
LITE_API bool path_exists(_TCHAR *name);

LITE_API void rm_path(
_TCHAR *infile, // The path (file, directory, or directory tree).
bool tree // True if removing subdirectories recursively.
Expand Down
94 changes: 94 additions & 0 deletions include/Api/lite/io.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*******************************************************************************
Copyright (c) 2001-2010 by Text Analysis International, Inc.
All rights reserved.
********************************************************************************
*
* NAME: IO.H
* FILE: lite\io.h
* CR: 10/06/98 AM.
* SUBJ: Declares for I/O functions.
*
*******************************************************************************/

//#include "fstream.h"

void copy_file(const _TCHAR *, const _TCHAR *);
bool file_exists(const _TCHAR *iname); // 12/14/98 AM.
void file_to_buffer(const _TCHAR *, _TCHAR *,
/*UP*/ long &len // 05/28/00 /AM.
);
_TCHAR *pretty_char(_TCHAR);
_TCHAR *pretty_str(
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size
);
_TCHAR *c_char(_TCHAR,_TCHAR*); // 05/10/00 AM.
_TCHAR *c_str( // 05/10/00 AM.
_TCHAR *str, // String to be prettified.
_TCHAR *buf, // Buffer for placing prettified string.
long size // Buffer size. (-1 means count not used.)
);


void read_file(
_TCHAR *fname, // The filename
/*UP*/
long &len, // Length of file.
_TCHAR* &buf // Buffer to create.
)
;

_TCHAR *next_token(/*DU*/ _TCHAR* &buf, bool &eol, _TCHAR *comment);

bool fix_file_name(
_TCHAR *file, // Buffer big enough to hold extension.
_TCHAR *suff // File name extension needed.
)
;

bool eq_str_range(_TCHAR *str, _TCHAR *ptr, long start, long end);

_TCHAR *make_str(_TCHAR *str);
_TCHAR *make_str(_TCHAR *str, long len);

bool strcat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DU*/ long &count
);


bool strncat_e(
/*DU*/ _TCHAR* &ptr,
/*DN*/ _TCHAR *str,
/*DN*/ long len,
/*DU*/ long &count
);


bool file_name( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fname // Pointer to the name in buffer.
);
bool file_path( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_parent(
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fpath // Pointer to the path in buffer.
);
bool file_head( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &fhead // Pointer to the file head in buffer.
);
bool file_tail( // 12/24/99 AM.
_TCHAR *file, // Buffer with full file string.
/*UP*/
_TCHAR* &ftail // Pointer to tail in buffer.
);
20 changes: 19 additions & 1 deletion lite/dicttok.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,11 @@ inline bool DICTTok::findAttrs(Node<Pn> *node, CONCEPT *con, _TCHAR *str, bool i
} else if (cg_->isValNum(vals)) {
long long num = 0L;
cg_->popVal(vals,num);
replaceNum(node,strattr,num);
replaceNum(node,strattr,num);
} else if (cg_->isValCon(vals)) {
CONCEPT *con = NULL;
cg_->popVal(vals,con);
replaceCon(node,strattr,con);
} else
cg_->nextVal(vals);
}
Expand Down Expand Up @@ -991,6 +995,20 @@ return Ivar::nodeReplaceval(pn, name, val);
}


inline bool DICTTok::replaceCon(
Node<Pn> *node,
_TCHAR *name, // variable name.
CONCEPT *con
)
{
if (!node)
return false;

Pn *pn = node->getData();

return Ivar::nodeReplaceval(pn, name, con);
}

/********************************************
* FN: POPSVAL
* CR: 08/01/11 AM.
Expand Down
5 changes: 5 additions & 0 deletions lite/dicttok.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ class DICTTok : public Algo
_TCHAR *name, // variable name.
_TCHAR *str
);
inline bool replaceCon(
Node<Pn> *node,
_TCHAR *name,
CONCEPT *con
);
inline _TCHAR *popsval(
VAL *val
);
Expand Down
Loading

0 comments on commit 0ec9e4d

Please sign in to comment.