Fix few warnings #2

mingodad · 2021-04-08T10:56:10Z

When compiling on Ubuntu 18.04 I fix a few compiler warnings see bellow:

------------------------------- EbnfAnalyzer.cpp -------------------------------
index 5bbb7a5..fde2d13 100644
@@ -556,7 +556,7 @@ void EbnfAnalyzer::findAmbiguousAlternatives(Ast::Node* node, FirstFollowSet* se
 
             // TODO: each alternative might have a different predicate type LL or LA
             // currently just assume everything is ok if an LA predicate is present
-            if( predA && !predA->getLa().isEmpty() || predB && !predB->getLa().isEmpty() )
+            if( (predA && !predA->getLa().isEmpty()) || (predB && !predB->getLa().isEmpty()) )
                 continue;
 
             if( ll > 0 )

-------------------------------- EbnfErrors.cpp --------------------------------
index 02c27c6..23e15ea 100644
@@ -20,7 +20,7 @@
 #include "EbnfErrors.h"
 #include <QtDebug>
 
-EbnfErrors::EbnfErrors(QObject *parent) : QObject(parent),d_reportToConsole(false),d_errCounter(0)
+EbnfErrors::EbnfErrors(QObject *parent) : QObject(parent),d_errCounter(0),d_reportToConsole(false)
 {
     d_eventLatency.setSingleShot(true);
     connect(&d_eventLatency, SIGNAL(timeout()), this, SIGNAL(sigChanged()));

-------------------------------- EbnfLexer.cpp --------------------------------
index ccad2e1..67541a4 100644
@@ -24,7 +24,7 @@
 #include <QtDebug>
 
 EbnfLexer::EbnfLexer(QObject *parent) : QObject(parent),
-    d_lastToken(EbnfToken::Invalid),d_lineNr(0),d_colNr(0),d_in(0)
+    d_in(0), d_lineNr(0),d_colNr(0),d_lastToken(EbnfToken::Invalid)
 {
 
 }

-------------------------------- EbnfSyntax.cpp --------------------------------
index 121b9e3..3a408f0 100644
@@ -89,7 +89,7 @@ Ast::NodeSet EbnfSyntax::collectNodes(const Ast::NodeRefSet& pattern, const Ast:
     return res;
 }
 
-EbnfSyntax::EbnfSyntax(EbnfErrors* errs):d_finished(false),d_errs(errs)
+EbnfSyntax::EbnfSyntax(EbnfErrors* errs):d_errs(errs), d_finished(false)
 {
 
 }

--------------------------------- EbnfSyntax.h ---------------------------------
index 3d88de3..93f5e07 100644
@@ -84,9 +84,9 @@ namespace Ast
         Definition* d_def; // resolved nonterminal
         Node* d_parent; // TODO: ev. unnötig; man kann damit bottom up über Sequence hinweg schauen
         Node(Type t, Definition* d, const EbnfToken& tok = EbnfToken()):Symbol(tok),d_type(t),
-            d_quant(One),d_owner(d),d_def(0),d_parent(0),d_leftRecursive(false){}
+            d_quant(One),d_leftRecursive(false),d_owner(d),d_def(0),d_parent(0){}
         Node(Type t, Node* parent, const EbnfToken& tok = EbnfToken()):Symbol(tok),d_type(t),
-            d_quant(One),d_owner(parent->d_owner),d_def(0),d_parent(parent),d_leftRecursive(false){ parent->d_subs.append(this); }
+            d_quant(One),d_leftRecursive(false),d_owner(parent->d_owner),d_def(0),d_parent(parent){ parent->d_subs.append(this); }
         ~Node();
         bool doIgnore() const;
         bool isNullable() const;

--------------------------------- EbnfToken.h ---------------------------------
index 3ed23f9..de61938 100644
@@ -31,6 +31,7 @@ struct EbnfToken
         Sym(const Sym& rhs ):d_str(rhs.d_str){}
         Sym():d_str(0){}
 
+        Sym& operator=(const Sym &rhs) = default;
         operator QByteArray() const { return toBa(); }
         QByteArray toBa() const;
         QString toStr() const;
@@ -67,7 +68,7 @@ struct EbnfToken
     quint32 d_lineNr;
     Sym d_val; // utf-8
     EbnfToken(TokenType t = Invalid, quint32 line = 0,quint16 col = 0, quint16 len = 0, const QByteArray& val = QByteArray() ):
-        d_type(t),d_lineNr(line),d_colNr(col),d_len(len),d_op(Normal){ d_val = getSym(val);}
+        d_type(t),d_op(Normal),d_len(len),d_colNr(col),d_lineNr(line){ d_val = getSym(val);}
     QString toString(bool labeled = true) const;
     bool isValid() const { return d_type != Eof && d_type != Invalid; }
     bool isErr() const { return d_type == Invalid; }

--------------------------------- GenUtils.cpp ---------------------------------
index 12af2e4..b92eb99 100644
@@ -160,7 +160,7 @@ static bool lessThan( const QString& lhs, const QString& rhs )
 {
     const bool lhsAlnum = GenUtils::containsAlnum(lhs);
     const bool rhsAlnum = GenUtils::containsAlnum(rhs);
-    if( lhsAlnum && rhsAlnum || !lhsAlnum && !rhsAlnum )
+    if( (lhsAlnum && rhsAlnum) || (!lhsAlnum && !rhsAlnum) )
         return lhs < rhs;
     else
         return !lhsAlnum && rhsAlnum;

--------------------------------- LaParser.cpp ---------------------------------
index cf349c6..48757a0 100644
@@ -282,7 +282,7 @@ LaLexer::Tok LaLexer::nextTokenImp()
     return Tok(Tok::Eof);
 }
 
-int LaLexer::skipWhiteSpace()
+void LaLexer::skipWhiteSpace()
 {
     while( d_pos < d_str.size() && ::isspace(d_str[d_pos]) )
         d_pos++;

---------------------------------- LaParser.h ----------------------------------
index 3f1df44..c4b97bd 100644
@@ -40,7 +40,7 @@ public:
     Tok peekToken(quint8 lookAhead = 1);
 protected:
     Tok nextTokenImp();
-    int skipWhiteSpace();
+    void skipWhiteSpace();
     Tok ident();
     Tok literal();
     Tok index();

------------------------------- SyntaxTreeMdl.h -------------------------------
index aa84594..9ddce10 100644
@@ -49,7 +49,7 @@ private:
         const Ast::Symbol* d_sym;
         QList<Slot*> d_children;
         Slot* d_parent;
-        Slot(Slot* p = 0):d_parent(p){ if( p ) p->d_children.append(this); }
+        Slot(Slot* p = 0):d_sym(NULL), d_parent(p){ if( p ) p->d_children.append(this); }
         ~Slot() { foreach( Slot* s, d_children ) delete s; }
     };
     void fill(Slot* super, const Ast::Node* sym);

The text was updated successfully, but these errors were encountered:

mingodad · 2021-04-08T12:10:11Z

Also this prevent segfaults due to syntax errors before attempt to generate anything form the Generate menu:

-------------------------------- MainWindow.cpp --------------------------------
index ee8156f..5976cc1 100644
@@ -396,10 +396,16 @@ void MainWindow::onExpandSelected()
 
 void MainWindow::onGenSynTree()
 {
+    const QString title = tr("Generate Syntax Tree");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
 
     loadTokMap();
-    SynTreeGen::generateTree( d_edit->getPath(), d_edit->getSyntax() );
+    EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate syntax tree, fix the issues first !") );
+        return;
+    }
+    SynTreeGen::generateTree( d_edit->getPath(), syn );
 //    QSet<QByteArray> res = EbnfAnalyzer::collectAllTerminalStrings(d_edit->getSyntax());
 //    for( QSet<QByteArray>::const_iterator i = res.begin(); i != res.end(); ++i )
     //        qDebug() << (*i) << SynTreeGen::symToString((*i));
@@ -407,28 +413,45 @@ void MainWindow::onGenSynTree()
 
 void MainWindow::onGenTt()
 {
+    const QString title = tr("Generate Token Types");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
 
     loadTokMap();
-    SynTreeGen::generateTt( d_edit->getPath(), d_edit->getSyntax(), true, true );
+    EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate token types, fix the issues first !") );
+        return;
+    }
+    SynTreeGen::generateTt( d_edit->getPath(), syn, true, true );
 }
 
 void MainWindow::onGenHtml()
 {
+    const QString title = tr("Generate Html");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
 
+    EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate html, fix the issues first !") );
+        return;
+    }
     HtmlSyntax gen;
-    gen.generateHtml( d_edit->getPath(), d_edit->getSyntax() );
+    gen.generateHtml( d_edit->getPath(), syn );
 }
 
 void MainWindow::onGenCoco()
 {
+    const QString title = tr("Generate Coco/R");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
 
     loadTokMap();
     CocoGen gen;
-    QFileInfo info(d_edit->getPath());
     EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate Coco/R, fix the issues first !") );
+        return;
+    }
+    QFileInfo info(d_edit->getPath());
     gen.generate( info.absoluteDir().absoluteFilePath( info.completeBaseName() + ".atg"), syn, d_tbl, true );
     SynTreeGen::generateTt( d_edit->getPath(), syn, true, false );
     SynTreeGen::generateTree( d_edit->getPath(), syn, true );
@@ -436,16 +459,28 @@ void MainWindow::onGenCoco()
 
 void MainWindow::onGenAntlr()
 {
+    const QString title = tr("Generate Antlr");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
+    EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate Antlr, fix the issues first !") );
+        return;
+    }
     QFileInfo info(d_edit->getPath());
-    AntlrGen::generate( info.absoluteDir().absoluteFilePath( info.completeBaseName() + ".g"), d_edit->getSyntax() );
+    AntlrGen::generate( info.absoluteDir().absoluteFilePath( info.completeBaseName() + ".g"), syn );
 }
 
 void MainWindow::onGenLlgen()
 {
+    const QString title = tr("Generate Llgen");
     ENABLED_IF( !d_edit->getPath().isEmpty() );
+    EbnfSyntax* syn = d_edit->getSyntax();
+    if(!syn) {
+        QMessageBox::critical(this,title,tr("Cannot generate Llgen, fix the issues first !") );
+        return;
+    }
     QFileInfo info(d_edit->getPath());
-    LlgenGen::generate( info.absoluteDir().absoluteFilePath( info.completeBaseName() + ".g"), d_edit->getSyntax(), d_tbl );
+    LlgenGen::generate( info.absoluteDir().absoluteFilePath( info.completeBaseName() + ".g"), syn, d_tbl );
 }
 
 void MainWindow::onOutputFirstSet()

-------------------------------- SynTreeGen.cpp --------------------------------
index 65d0ea0..b587c8d 100644
@@ -29,6 +29,8 @@ bool SynTreeGen::generateTree(const QString& ebnfPath, EbnfSyntax* syn, bool inc
 {
     Q_ASSERT( syn != 0 );
 
+    if(!syn)
+        return false;
     const QByteArray nameSpace = syn->getPragmaFirst("%namespace");
     const QByteArray nameSpace2 = nameSpace.isEmpty() ? nameSpace : ( nameSpace + "::" );
     QByteArray module = syn->getPragmaFirst("%module");

rochus-keller · 2021-04-09T09:34:32Z

Thanks. Looks like rather cosmetic warnings by the compiler which I usually ignore or suppress (e.g. -Wno-reorder). The issues causing a segfault should be fixed of course.

mingodad · 2021-04-09T11:21:00Z

I removed all suppressing flags to see what's there and then fixed several of then.

mingodad · 2021-04-09T11:33:10Z

Looking at the EBNF syntax used by this project I noticed that you have some extensions that isn't documented, like this from LjsTools/syntax/LjAsm.ebnf:

desig ::= 
	[ \LL:2\ fname '.' ] // function name to uniquely identify source of upvalue
	vname 
...
From Oberon/syntax/Oberon.ebnf:

ArrayType ::= ( ARRAY
#ifdef OBNX
| CARRAY
#endif
)
#ifdef BBOX
[ SysFlag ]
#endif
#ifdef OBN2
[ LengthList ]
#else
LengthList
#endif
OF type
#ifdef OBNX
| '[' [ LengthList ] ']' type
#endif


RET_ ::= RET [ \LA: 1:ident & 2:!':' \ desig [ posint ] ] // number of returns, leave out if 1

The \LL:k\ is briefly mentioned on the README , could you expand the description of the EBNF syntax accepted and if possible add a syntax.ebnf file describing it ?

Cheers !

mingodad · 2021-04-09T11:53:17Z

There is any reason to not allow nonterminals identifiers to start with '_' ?

In my clone I'm allowing it with the change shown bellow:

-------------------------------- EbnfLexer.cpp --------------------------------
index ccad2e1..9765991 100644
@@ -63,7 +63,7 @@ EbnfToken EbnfLexer::nextTokenImp()
         if( d_colNr == 0 && ch == '#' )
         {
             return ppsym();
-        }else if( ch.isLetterOrNumber() || ch == '$' || ch == '%' )
+        }else if( ch.isLetterOrNumber() || ch == '$' || ch == '%' || ch == '_')
         {
             // Identifier oder Reserved Word
             EbnfToken t = ident();

rochus-keller · 2021-04-09T12:25:32Z

that you have some extensions that isn't documented

Well, I wouldn't consider the project to be a complete project ready for everyones use; it's work in progress and I add features as I need them; there are also still bugs in the analyzer which I will debug and fix some day; and of course I also should write some documentation; currently there is no other way than to look in the source code; the syntax of the LA prefix is in LaParser.h.

EDIT: actually the analyzer/generator are not yet as intelligent as they could be; a lot of LA prefixes could be generated automatically from simple LL:k prefixes; some day I will improve it, but until then some of my grammars use rather lengthy LA prefixes as a work around.

There is any reason to not allow nonterminals identifiers to start with '_' ?

I have to check, don't remember. Maybe there could be some ambiguity in generated code. If you don't find any issues with underscore prefixed names, then there is likely no reason to not do so. Please note that I'm only using the Coco/R generator; all other generators are remains from earlier attempts with parser generators and don't even support all syntax features.

mingodad · 2021-04-09T12:35:39Z

Thank you for reply !

I'm looking to allow to us this project to also generate tree-sitter grammars see this tree-sitter/tree-sitter#1013 and https://github.com/eatkins/tree-sitter-ebnf-generator, also export to view with https://www.bottlecaps.de/rr/ui .

Would be nice if we could generate a Lua parser with CocoR and with your knowledge of Lua we could have direct evaluation of the grammar on the EBNFStudio.

rochus-keller · 2021-04-09T12:43:42Z

Welcome. Note that I added an edit to my answer.

this project to also generate tree-sitter grammars

Not sure whether this works without a full redesign, because EbnfStudio is designed for LL(k). Tree sitter does LR which requires completely different analyzers/generators.

rochus-keller · 2021-04-09T12:45:14Z

Would be nice if we could generate a Lua parser with CocoR

That's actually already implemented. Have a look at https://github.com/rochus-keller/LjTools/blob/master/LuaLexer.cpp and https://github.com/rochus-keller/LjTools/blob/master/LuaParser.cpp.

mingodad · 2021-04-09T13:02:31Z

I'm looking at it right now and noticed that you have a reference to Luajit in LjBcViewer.pro to ../LuaJIT/src/LuaJit.pri that doesn't exists in the standard Luajit. Do you have any customization that you forgot to mention ?

include( ../LuaJIT/src/LuaJit.pri ){
    LIBS += -ldl
} else {
    LIBS += -lluajit
}

rochus-keller · 2021-04-09T14:05:07Z

../LuaJIT/src/LuaJit.pri

You can ignore that. The code means: "if you find LuaJit.pri, then use it and link with dl, otherwhise ignore it and link with the full luajit so".

Here is the pri file in case you also want to add LuaJIT in source form to the project: rochus-keller/LjTools#1 (comment)

mingodad · 2021-04-24T10:52:07Z

After playing a bit with CocoR I've got a simple/naive converter for the CocoR syntax to the EBNF accepted by https://www.bottlecaps.de/rr/ui and applied to a slight edited Lua.atg form Ljstools (you can copy and paste in the Edit Grammar tab and then switch to the View diagram tab), and found that you didn't manage precedence on that grammar (probably because you don't need validate the code).

//"--" lf cr  '+' lf  '+' tab
Lua ::= chunk
chunk ::=  ( stat  ( T_Semi  )?  )*  ( laststat  ( T_Semi  )?  )?
block ::= chunk
stat ::= assigOrCall_  | dostat_  | whilestat_  | repeatstat_  | ifstat_  | forstat_  | gfuncdecl_  | localdecl_
dostat_ ::= T_do block T_end
whilestat_ ::= T_while exp T_do block T_end
repeatstat_ ::= T_repeat block T_until exp
ifstat_ ::= T_if exp T_then block  ( T_elseif exp T_then block  )*  ( T_else block  )? T_end
forstat_ ::= T_for T_Name  ( T_Eq exp T_Comma exp  ( T_Comma exp  )?  |  ( T_Comma T_Name  )* T_in explist  ) T_do block T_end
gfuncdecl_ ::= T_function funcname funcbody
localdecl_ ::= T_local  ( lfuncdecl_  | lvardecl_  )
lfuncdecl_ ::= T_function T_Name funcbody
lvardecl_ ::= namelist  ( T_Eq explist  )?
assigOrCall_ ::= prefixexp  ( assignment_  )?
call_ ::=  ( T_Colon T_Name  )? args
assignment_ ::=  ( T_Comma prefixexp  )* T_Eq explist
laststat ::= T_return  ( explist  )?  | T_break
funcname ::= T_Name  ( desig_  )*  ( T_Colon T_Name  )?
namelist ::= T_Name  ( T_Comma T_Name  )*
explist ::= exp  ( T_Comma exp  )*
exp ::= T_nil exp_nlr_  | T_false exp_nlr_  | T_true exp_nlr_  | T_Number exp_nlr_  | T_String exp_nlr_  | T_3Dot exp_nlr_  | lambdecl_ exp_nlr_  | prefixexp exp_nlr_  | tableconstructor exp_nlr_  | unop exp exp_nlr_
exp_nlr_ ::=  ( binop exp exp_nlr_  )?
prefixexp ::=  ( T_Name  | T_Lpar exp T_Rpar  )  ( index_  | desig_  | call_  )*
index_ ::= T_Lbrack exp T_Rbrack
desig_ ::= T_Dot T_Name
args ::= T_Lpar  ( explist  )? T_Rpar  | tableconstructor  | T_String
lambdecl_ ::= T_function funcbody
funcbody ::= T_Lpar  ( parlist  )? T_Rpar block T_end
parlist ::= namelist  ( T_Comma T_3Dot  )?  | T_3Dot
tableconstructor ::= T_Lbrace  ( fieldlist  )? T_Rbrace
fieldlist ::= field  ( fieldsep field  )*  ( fieldsep  )?
field ::= index_ T_Eq exp  | T_Name T_Eq exp  | exp
fieldsep ::= T_Comma  | T_Semi
binop ::= T_Plus  | T_Minus  | T_Star  | T_Slash  | T_Hat  | T_Percent  | T_2Dot  | T_Lt  | T_Leq  | T_Gt  | T_Geq  | T_2Eq  | T_TildeEq  | T_and  | T_or
unop ::= T_Minus  | T_not  | T_Hash

letter ::= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
digit ::= "0123456789"
cr ::= '\r'
lf ::= '\n'
tab ::= '\t'
stringCh ::= ANY  '-' '"'  '-' '\'  '-' cr  '-' lf
charCh ::= ANY  '-' "'"  '-' '\'  '-' cr  '-' lf
printable ::= '\u0020'  .. '\u007e'
hex ::= "0123456789abcdef"
T_Name  ::= letter  ( letter  | digit  )*
T_Number  ::= digit  ( digit  )*  ( '.' digit  ( digit  )*  )?
T_String  ::= '"'  ( stringCh  | '\' printable  )* '"'  | "'"  ( charCh  | '\' printable  )* "'"
badString  ::= '"'  ( stringCh  | '\' printable  )*  ( cr  | lf  )  | "'"  ( charCh  | '\' printable  )*  ( cr  | lf  )
T_Hash  ::= '#'
T_Percent  ::= '%'
T_Lpar  ::= '('
T_Rpar  ::= ')'
T_Star  ::= '*'
T_Plus  ::= '+'
T_Comma  ::= ','
T_Minus  ::= '-'
T_2Minus  ::= "--"
T_2MinusLbrack  ::= "--["
T_Dot  ::= '.'
T_2Dot  ::= ".."
T_3Dot  ::= "..."
T_Slash  ::= '/'
T_Colon  ::= ':'
T_Semi  ::= ';'
T_Lt  ::= '<'
T_Leq  ::= "<="
T_Eq  ::= '='
T_2Eq  ::= "=="
T_Gt  ::= '>'
T_Geq  ::= ">="
T_Lbrack  ::= '['
T_Rbrack  ::= ']'
T_Rbrack2Minus  ::= "]--"
T_Hat  ::= '^'
T_Lbrace  ::= '{'
T_Rbrace  ::= '}'
T_TildeEq  ::= "~="
T_and  ::= "and"
T_break  ::= "break"
T_do  ::= "do"
T_else  ::= "else"
T_elseif  ::= "elseif"
T_end  ::= "end"
T_false  ::= "false"
T_for  ::= "for"
T_function  ::= "function"
T_if  ::= "if"
T_in  ::= "in"
T_local  ::= "local"
T_nil  ::= "nil"
T_not  ::= "not"
T_or  ::= "or"
T_repeat  ::= "repeat"
T_return  ::= "return"
T_then  ::= "then"
T_true  ::= "true"
T_until  ::= "until"
T_while  ::= "while"

rochus-keller · 2021-04-24T11:54:51Z

I used the original grammar from https://www.lua.org/manual/5.1/manual.html#8 which declares all binary operations in the same production and terms/factors/primaries are all combined in the 'exp' production. That didn't bother me because I don't use it as a compiler frontend. If your goal is to have a tree-sitter parser then actually it neither should bother you. Otherwise the additional productions could easily be added to the grammer, or alternatively the precedence rules can be handled directly by the parser. There is no grammar suited for every purpose. If you need it to understand the language and draw a syntax diagram you don't have to bother whether its LL(1). Grammars optimized for a specific parser generator are rarely beautiful.

mingodad · 2021-04-24T11:59:12Z

Thank you for reply !
I'm not looking at beauty with the railroad diagram, I'm looking for help to visualize/understand the grammar.
And I'm also trying to create a validating Lua grammar in CocoR, that's why I did the comment about precedence.

rochus-keller · 2021-04-24T12:07:01Z

The idea of EbnfStudio is to have a "pure" grammar (i.e. not polluted with implementation specific parser code). That's why all my prefixes are implementation independent. The output of the parser is a non-abstract syntax tree; only this tree is subject to validation and AST construction. If you don't care for a pure grammar or need more powerful features directly in the parser (to save an additional phase and tree) you better directly work with Coco/R (or the more powerful ANTLR); EbnfStudio doesn't help you in this case.

mingodad · 2021-05-10T07:49:09Z

I just found this variation of CocoR http://cocos-parsergen.sourceforge.net/index.html it seems to achieve a similar goal to your "pure" grammar.

rochus-keller · 2021-05-10T10:08:50Z

Looks interesting, thanks. The philosophy is a bit different, and I couldn't make use of it because I need C++. Tight integration with an IDE and automatic generation of a generic syntax tree are yet two other features I would miss.

mingodad · 2021-05-22T18:46:44Z

I've got a conversion from CocoR CSharp to Lua using https://github.com/yanghuan/CSharp.lua and the result is here https://github.com/yanghuan/CSharp.lua/files/6521486/Coco-lua.zip , it needs going through it and fix/simplify several things but overall the big picture is there in Lua.

rochus-keller · 2021-05-23T19:55:44Z

Cool.

mingodad · 2021-05-28T08:28:12Z

I made a fix for left recursion detection here SSW-CocoR/CocoR-CPP#2 probably you'll be interested on it.

mingodad · 2021-06-03T16:56:49Z

I did added an AST generation based on your implementation see (SSW-CocoR/CocoR-CPP#1 (comment)) on this repository https://github.com/mingodad/CocoR-CPP and would be nice if someone could test it and give feedback.

rochus-keller · 2021-06-06T10:12:17Z

Do I get this right: you re-engineered the Coco/R code generator so it not only generates a parser but also a syntax tree generator (likely a concrete syntax tree, not an abstract one)? How does this get along with the semantic actions if specified with the grammar?

mingodad · 2021-06-06T11:14:11Z

Yes you are right it's a concrete syntax tree but we can filter/remove/join nodes with some custom code, if it's activated defining a macro PARSER_WITH_AST all the info is stored in an array Parser::ast_root and it's up to the user to use it or not independent of the semantic actions (it's mimicking your Syntree) see one snippet bellow, also we can compile CocoR and the generated parsers with or without wchar_t defining this macro WITHOUT_WCHAR.

Also relaxed the 2 characters limit for long comments so a dirty Lua comments can be implemented like this:

/* The order is important code generation for comments start from the last backwards */
COMMENTS FROM "--" TO lf
COMMENTS FROM "--[[" TO "]]" NESTED
COMMENTS FROM "--[=[" TO "]=]" NESTED
COMMENTS FROM "--[==[" TO "]==]" NESTED
COMMENTS FROM "--[===[" TO "]===]" NESTED
COMMENTS FROM "--[====[" TO "]====]" NESTED

And also added a limited semantic action for token declarations like the pragmas and then we can also with a custom Scanner method parse Lua long strings like this:

TK_LONG_STRING = '[' '=' {'='} '[' . (. parseLongString(); .)
...
		case 25:
			case_25:
			{t->kind = 5 /* TK_LONG_STRING */; loopState = false;parseLongString();  break;}
		case 26:
...
void Scanner::parseLongString() {
	int cdelim1, cdelim2;
	cdelim1 = tval[0];
	switch(cdelim1) {
		case '[': cdelim2 = ']'; break;
		default:
			wprintf(L"Unexpected long string delimiter %lc\n", cdelim1);
			exit(1);
	}
	int innerCount = 0;
	// get how many '=' we have
	for(int imax = tlen;  innerCount < imax; ++innerCount) {
		if(tval[innerCount+1] != L'=') break;
	}
	int nested = 1;
	//print("==", line, col, innerCount);
	for(;;) {
		if(ch == cdelim2) {
			AddCh();
			int eqCount = 0;
			while(ch == L'=') {
				if(++eqCount == innerCount) {
					AddCh();
					if(ch == cdelim2 && (--nested == 0)) {
						AddCh();
						//print("=0=" + tval.tostring() + "=1=");
						return; //done
					}
				}
				AddCh();
			}
			continue;
		} else if (ch == EOF) {
			t->kind = eofSym;
			break;
		 }
		AddCh();
	}
}

Also I'm experimenting with showing a kind of naive TreeView for LL1 errors/warnings:

LL1 warning in Statement:213:0: TK_STRUCT is start of several alternatives
	=> ClassStatement:227:4:
	-> ClassStatement:319:0:
	  = TK_CLASS:320:3:
	=> CommaExpr:240:4:
	-> CommaExpr:419:0:
	  -> Expression:422:0:
	    -> LogicalOrExp:429:0:
	      -> LogicalAndExp:432:0:
	        -> BitwiseOrExp:435:0:
	          -> BitwiseXorExp:438:0:
	            -> BitwiseAndExp:441:0:
	              -> EqExp:444:0:
	                -> CompExp:447:0:
	                  -> ShiftExp:450:0:
	                    -> PlusExp:453:0:
	                      -> MultExp:456:0:
	                        -> PrefixedExpr:459:0:
	                          -> Factor:468:0:
	                            = TK_CLASS:486:17:

void Parser::GMPL() {
#ifdef PARSER_WITH_AST
		Token *ntTok = new Token(); ntTok->kind = eNonTerminals::_GMPL; ntTok->line = 0; ntTok->val = coco_string_create(_SC("GMPL"));ast_root = new SynTree( ntTok ); ast_stack.Clear(); ast_stack.Add(ast_root);
#endif
		Statement();
		while (StartOf(1 /* nt   */)) {
			Statement();
		}
		if (la->kind == 50 /* "end" */) {
			Get();
#ifdef PARSER_WITH_AST
	AstAddTerminal();
#endif
			Expect(_T_SEMICOLON);
#ifdef PARSER_WITH_AST
	AstAddTerminal();
#endif
		}
		Expect(_EOF);
#ifdef PARSER_WITH_AST
	AstAddTerminal();
#endif
#ifdef PARSER_WITH_AST
		AstPopNonTerminal();
#endif
}

void Parser::Statement() {
#ifdef PARSER_WITH_AST
		bool ntAdded = AstAddNonTerminal(eNonTerminals::_Statement, _SC("Statement"), la->line);
#endif
		if (la->kind == 51 /* "model" */) {
			model_statement();
		} else if (la->kind == 52 /* "data" */) {
			data_statement();
		} else if (StartOf(2 /* nt   */)) {
			simple_statement();
		} else SynErr(123);
#ifdef PARSER_WITH_AST
		if(ntAdded) AstPopNonTerminal();
#endif
}

Can you try it and give feedback ?

rochus-keller · 2021-06-06T15:26:52Z

Interesting approach; personally, I tend not to change existing libraries as far as possible and instead implement the desired functions through separate components. Handling unicode was also possible with this approach without modifying Coco/R using a lexer which can handle unicode and a special token mapping, see e.g. https://github.com/rochus-keller/Simula/.

mingodad · 2021-06-14T12:32:04Z

I've done some more improvements to CocoR and one of then is dump a pruned syntax tree based on a simple algorithm and I'm thinking and apply it to the syntax tree construction to save memory as an option.
What do you think, based on your experience ?

C++ code at https://github.com/mingodad/CocoR-CPP/blob/master/src/Parser.cpp#L1321

Simple Lua script:

local function fib(n)
    if (n < 2) then 
	return 1
    else
	return fib(n-2) + fib(n-1)
    end
end

print(fib(32))

Full syntax tree:

2	0	0	Lua
    2	1	1	statlist
        1	1	2	statement
            2	1	9	localstat
                = 	1	1	36	local
                3	1	30	localfunc
                    = 	1	7	25	function
                    1	1	19	str_checkname
                        = 	1	16	1	fib
                    5	1	26	body
                        = 	1	19	37	(
                        1	1	29	parlist
                            = 	1	20	1	n
                        = 	1	21	52	)
                        1	2	1	statlist
                            1	2	2	statement
                                5	2	3	ifstat
                                    = 	2	5	31	if
                                    3	2	15	test_then_block
                                        1	2	17	expr
                                            1	2	44	subexpr
                                                1	2	46	simpleexp
                                                    1	2	33	suffixedexp
                                                        3	2	35	primaryexp
                                                            = 	2	8	37	(
                                                            1	2	17	expr
                                                                3	2	44	subexpr
                                                                    1	2	46	simpleexp
                                                                        1	2	33	suffixedexp
                                                                            1	2	35	primaryexp
                                                                                1	2	27	singlevar
                                                                                    1	2	19	str_checkname
                                                                                        = 	2	9	1	n
                                                                    1	2	47	getbinopr
                                                                        = 	2	11	38	<
                                                                    1	2	44	subexpr
                                                                        1	2	46	simpleexp
                                                                            = 	2	13	3	2
                                                            = 	2	14	52	)
                                        = 	2	16	56	then
                                        1	3	1	statlist
                                            1	3	2	statement
                                                2	3	11	retstat
                                                    = 	3	2	51	return
                                                    1	3	24	explist
                                                        1	3	17	expr
                                                            1	3	44	subexpr
                                                                1	3	46	simpleexp
                                                                    = 	3	9	3	1
                                    = 	4	5	18	else
                                    1	5	16	block
                                        1	5	1	statlist
                                            1	5	2	statement
                                                2	5	11	retstat
                                                    = 	5	2	51	return
                                                    1	5	24	explist
                                                        1	5	17	expr
                                                            3	5	44	subexpr
                                                                1	5	46	simpleexp
                                                                    2	5	33	suffixedexp
                                                                        1	5	35	primaryexp
                                                                            1	5	27	singlevar
                                                                                1	5	19	str_checkname
                                                                                    = 	5	9	1	fib
                                                                        3	5	38	funcargs
                                                                            = 	5	12	37	(
                                                                            1	5	24	explist
                                                                                1	5	17	expr
                                                                                    3	5	44	subexpr
                                                                                        1	5	46	simpleexp
                                                                                            1	5	33	suffixedexp
                                                                                                1	5	35	primaryexp
                                                                                                    1	5	27	singlevar
                                                                                                        1	5	19	str_checkname
                                                                                                            = 	5	13	1	n
                                                                                        1	5	47	getbinopr
                                                                                            = 	5	14	39	-
                                                                                        1	5	44	subexpr
                                                                                            1	5	46	simpleexp
                                                                                                = 	5	15	3	2
                                                                            = 	5	16	52	)
                                                                1	5	47	getbinopr
                                                                    = 	5	18	46	+
                                                                1	5	44	subexpr
                                                                    1	5	46	simpleexp
                                                                        2	5	33	suffixedexp
                                                                            1	5	35	primaryexp
                                                                                1	5	27	singlevar
                                                                                    1	5	19	str_checkname
                                                                                        = 	5	20	1	fib
                                                                            3	5	38	funcargs
                                                                                = 	5	23	37	(
                                                                                1	5	24	explist
                                                                                    1	5	17	expr
                                                                                        3	5	44	subexpr
                                                                                            1	5	46	simpleexp
                                                                                                1	5	33	suffixedexp
                                                                                                    1	5	35	primaryexp
                                                                                                        1	5	27	singlevar
                                                                                                            1	5	19	str_checkname
                                                                                                                = 	5	24	1	n
                                                                                            1	5	47	getbinopr
                                                                                                = 	5	25	39	-
                                                                                            1	5	44	subexpr
                                                                                                1	5	46	simpleexp
                                                                                                    = 	5	26	3	1
                                                                                = 	5	27	52	)
                                    = 	6	5	20	end
                        = 	7	1	20	end
        1	9	2	statement
            1	9	14	exprstat
                2	9	33	suffixedexp
                    1	9	35	primaryexp
                        1	9	27	singlevar
                            1	9	19	str_checkname
                                = 	9	1	1	print
                    3	9	38	funcargs
                        = 	9	6	37	(
                        1	9	24	explist
                            1	9	17	expr
                                1	9	44	subexpr
                                    1	9	46	simpleexp
                                        2	9	33	suffixedexp
                                            1	9	35	primaryexp
                                                1	9	27	singlevar
                                                    1	9	19	str_checkname
                                                        = 	9	7	1	fib
                                            3	9	38	funcargs
                                                = 	9	10	37	(
                                                1	9	24	explist
                                                    1	9	17	expr
                                                        1	9	44	subexpr
                                                            1	9	46	simpleexp
                                                                = 	9	11	3	32
                                                = 	9	13	52	)
                        = 	9	14	52	)
    = 	10	1	0

Pruned syntax tree:

2	0	0	Lua
    2	1	1	statlist
        2	1	9	localstat
            = 	1	1	36	local
            3	1	30	localfunc
                = 	1	7	25	function
                1	1	19	str_checkname
                    = 	1	16	1	fib
                5	1	26	body
                    = 	1	19	37	(
                    1	1	29	parlist
                        = 	1	20	1	n
                    = 	1	21	52	)
                    5	2	3	ifstat
                        = 	2	5	31	if
                        3	2	15	test_then_block
                            3	2	35	primaryexp
                                = 	2	8	37	(
                                3	2	44	subexpr
                                    1	2	19	str_checkname
                                        = 	2	9	1	n
                                    1	2	47	getbinopr
                                        = 	2	11	38	<
                                    1	2	46	simpleexp
                                        = 	2	13	3	2
                                = 	2	14	52	)
                            = 	2	16	56	then
                            2	3	11	retstat
                                = 	3	2	51	return
                                1	3	46	simpleexp
                                    = 	3	9	3	1
                        = 	4	5	18	else
                        2	5	11	retstat
                            = 	5	2	51	return
                            3	5	44	subexpr
                                2	5	33	suffixedexp
                                    1	5	19	str_checkname
                                        = 	5	9	1	fib
                                    3	5	38	funcargs
                                        = 	5	12	37	(
                                        3	5	44	subexpr
                                            1	5	19	str_checkname
                                                = 	5	13	1	n
                                            1	5	47	getbinopr
                                                = 	5	14	39	-
                                            1	5	46	simpleexp
                                                = 	5	15	3	2
                                        = 	5	16	52	)
                                1	5	47	getbinopr
                                    = 	5	18	46	+
                                2	5	33	suffixedexp
                                    1	5	19	str_checkname
                                        = 	5	20	1	fib
                                    3	5	38	funcargs
                                        = 	5	23	37	(
                                        3	5	44	subexpr
                                            1	5	19	str_checkname
                                                = 	5	24	1	n
                                            1	5	47	getbinopr
                                                = 	5	25	39	-
                                            1	5	46	simpleexp
                                                = 	5	26	3	1
                                        = 	5	27	52	)
                        = 	6	5	20	end
                    = 	7	1	20	end
        2	9	33	suffixedexp
            1	9	19	str_checkname
                = 	9	1	1	print
            3	9	38	funcargs
                = 	9	6	37	(
                2	9	33	suffixedexp
                    1	9	19	str_checkname
                        = 	9	7	1	fib
                    3	9	38	funcargs
                        = 	9	10	37	(
                        1	9	46	simpleexp
                            = 	9	11	3	32
                        = 	9	13	52	)
                = 	9	14	52	)
    = 	10	1	0

mingodad mentioned this issue Jun 6, 2021

Cooperating on improve CocoR parsers sebdeveloper6952/rust-cocor#1

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix few warnings #2

Fix few warnings #2

mingodad commented Apr 8, 2021

mingodad commented Apr 8, 2021

rochus-keller commented Apr 9, 2021

mingodad commented Apr 9, 2021

mingodad commented Apr 9, 2021

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021 •

edited

Loading

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021

rochus-keller commented Apr 9, 2021

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021 •

edited

Loading

mingodad commented Apr 24, 2021

rochus-keller commented Apr 24, 2021

mingodad commented Apr 24, 2021

rochus-keller commented Apr 24, 2021

mingodad commented May 10, 2021

rochus-keller commented May 10, 2021

mingodad commented May 22, 2021

rochus-keller commented May 23, 2021

mingodad commented May 28, 2021

mingodad commented Jun 3, 2021

rochus-keller commented Jun 6, 2021

mingodad commented Jun 6, 2021

rochus-keller commented Jun 6, 2021

mingodad commented Jun 14, 2021

Fix few warnings #2

Fix few warnings #2

Comments

mingodad commented Apr 8, 2021

mingodad commented Apr 8, 2021

rochus-keller commented Apr 9, 2021

mingodad commented Apr 9, 2021

mingodad commented Apr 9, 2021

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021 • edited Loading

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021

rochus-keller commented Apr 9, 2021

mingodad commented Apr 9, 2021

rochus-keller commented Apr 9, 2021 • edited Loading

mingodad commented Apr 24, 2021

rochus-keller commented Apr 24, 2021

mingodad commented Apr 24, 2021

rochus-keller commented Apr 24, 2021

mingodad commented May 10, 2021

rochus-keller commented May 10, 2021

mingodad commented May 22, 2021

rochus-keller commented May 23, 2021

mingodad commented May 28, 2021

mingodad commented Jun 3, 2021

rochus-keller commented Jun 6, 2021

mingodad commented Jun 6, 2021

rochus-keller commented Jun 6, 2021

mingodad commented Jun 14, 2021

rochus-keller commented Apr 9, 2021 •

edited

Loading

rochus-keller commented Apr 9, 2021 •

edited

Loading