diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
index 3cea67f..f1e0848 100644
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
platform: [windows-latest, macos-latest, ubuntu-latest]
- python-version: ["3.9", "3.10", "3.11", "3.12"]
+ python-version: ["3.12"]
steps:
- uses: actions/checkout@v4
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 3f5056a..25ab335 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -10,9 +10,17 @@ on:
types:
- published
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.event.action == 'published' }}
+
+defaults:
+ run:
+ shell: bash -l {0}
+
jobs:
- build_sdist_and_wheel:
- name: Build SDist and Wheel
+ build_sdist:
+ name: Build Source
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -21,32 +29,85 @@ jobs:
submodules: true
- name: Build SDist
- run: pipx run build
+ run: pipx run build --sdist
- name: Check metadata
run: pipx run twine check dist/*
- uses: actions/upload-artifact@v4
with:
- path: dist/*
+ name: dist
+ path: dist/*.tar.gz
+
+ cibuildwheel:
+ name: "${{ matrix.variant.platform }} on ${{ matrix.variant.os }}"
+ runs-on: ${{ matrix.variant.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ variant:
+ - { os: ubuntu-22.04, platform: 'manylinux' }
+ - { os: ubuntu-22.04, platform: 'musllinux' }
+ - { os: macos-13, platform: 'macosx' } # Intel support
+ - { os: macos-14, platform: 'macosx' } # Apple silicon support
+ - { os: windows-2022, platform: "win" }
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: actions/setup-python@v5
+ with:
+ python-version: 3.8
+ if: runner.os == 'macOS' && runner.arch == 'ARM64'
+
+ - uses: pypa/cibuildwheel@v2.20.0
+ env:
+ CIBW_BUILD: "*-${{ matrix.variant.platform }}*"
+ MACOSX_DEPLOYMENT_TARGET: "10.15"
+
+ - name: Verify clean directory
+ run: git diff --exit-code
+ shell: bash
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: "cibw-wheels-${{ matrix.variant.platform }}-${{ matrix.variant.os }}"
+ path: wheelhouse/*.whl
upload_all:
name: Upload if release
- needs: [build_sdist_and_wheel]
+ needs: [build_sdist, cibuildwheel]
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/mccode_antlr
permissions:
id-token: write
- if: github.event_name == 'release' && github.event.action == 'published'
+ contents: write
+ if: github.event_name == 'release'
steps:
- uses: actions/setup-python@v5
- uses: actions/download-artifact@v4
+ id: download
+ with:
+ path: artifacts
+
+ - name: Move artifacts into a single folder
+ run: |
+ mkdir dist
+ find ${{ steps.download.outputs.download-path }} -type f -regex ".*\.\(tar\.gz\|whl\)" -exec mv {} dist/. \;
+
+ - name: Attach artifacts to GitHub tagged draft release
+ uses: ncipollo/release-action@v1
with:
- name: artifact
- path: dist
+ allowUpdates: true
+ draft: ${{ github.event.action != 'published' }}
+ artifacts: "dist/*.whl,dist/*.tar.gz"
- - uses: pypa/gh-action-pypi-publish@release/v1
+ - name: Publish artifacts to PyPI
+ if: ${{ github.event.action == 'published' }}
+ uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
index 7e2978b..b46cec8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@
*build/
*.egg-info/
*.c
+*.so
+*.dll
+*.dynlib
diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp b/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp
new file mode 100644
index 0000000..6ceadb8
--- /dev/null
+++ b/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp
@@ -0,0 +1,10 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "ANTLRErrorListener.h"
+
+antlr4::ANTLRErrorListener::~ANTLRErrorListener()
+{
+}
diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorListener.h b/lib/antlr4-cpp-runtime/ANTLRErrorListener.h
new file mode 100755
index 0000000..d6efad1
--- /dev/null
+++ b/lib/antlr4-cpp-runtime/ANTLRErrorListener.h
@@ -0,0 +1,167 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "RecognitionException.h"
+
+namespace antlrcpp {
+ class BitSet;
+}
+
+namespace antlr4 {
+
+ /// How to emit recognition errors (an interface in Java).
+ class ANTLR4CPP_PUBLIC ANTLRErrorListener {
+ public:
+ virtual ~ANTLRErrorListener();
+
+ ///
Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.
+ * + *When {@code exact} is {@code true}, all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.
+ * + *When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.
+ * + *{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.
+ * + *Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp new file mode 100644 index 0000000..1655a57 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorStrategy.h" + +antlr4::ANTLRErrorStrategy::~ANTLRErrorStrategy() +{ +} diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h new file mode 100755 index 0000000..a3eecd1 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + ///This method handles the consumption of any tokens - the caller should + * not call {@link Parser#consume} after a successful recovery.
+ * + *Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.
+ * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + ///+ * This error strategy is useful in the following scenarios.
+ * + *+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}
+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + ///+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.
+ */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vectorThis field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.
+ */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + *+ * If {@code i} specifies an index at or after the EOF token, the EOF token + * index is returned. This is due to the fact that the EOF token is treated + * as though it were on every channel.
+ */ + virtual ssize_t previousTokenOnChannel(size_t i, size_t channel); + + virtual std::vector+ * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.
+ */ + + std::pair+ * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link #text} field and + * the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will + * be assigned the result of calling {@link #getText}, and {@link #source} + * will be constructed from the result of {@link Token#getTokenSource} and + * {@link Token#getInputStream}.
+ * + * @param oldToken The token to copy. + */ + CommonToken(Token *oldToken); + + virtual size_t getType() const override; + + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. + */ + virtual void setText(const std::string &text) override; + virtual std::string getText() const override; + + virtual void setLine(size_t line) override; + virtual size_t getLine() const override; + + virtual size_t getCharPositionInLine() const override; + virtual void setCharPositionInLine(size_t charPositionInLine) override; + + virtual size_t getChannel() const override; + virtual void setChannel(size_t channel) override; + + virtual void setType(size_t type) override; + + virtual size_t getStartIndex() const override; + virtual void setStartIndex(size_t start); + + virtual size_t getStopIndex() const override; + virtual void setStopIndex(size_t stop); + + virtual size_t getTokenIndex() const override; + virtual void setTokenIndex(size_t index) override; + + virtual TokenSource *getTokenSource() const override; + virtual CharStream *getInputStream() const override; + + virtual std::string toString() const override; + + virtual std::string toString(Recognizer *r) const; + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/CommonTokenFactory.cpp b/lib/antlr4-cpp-runtime/CommonTokenFactory.cpp new file mode 100755 index 0000000..b04d68f --- /dev/null +++ b/lib/antlr4-cpp-runtime/CommonTokenFactory.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" +#include "CommonToken.h" +#include "CharStream.h" + +#include "CommonTokenFactory.h" + +using namespace antlr4; + +const std::unique_ptr+ * This token factory does not explicitly copy token text when constructing + * tokens.
+ */ + static const std::unique_ptr+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + const bool copyText; + + public: + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + *+ * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.
+ * + * @param copyText The value for {@link #copyText}. + */ + CommonTokenFactory(bool copyText); + + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + *+ * The {@link #DEFAULT} instance should be used instead of calling this + * directly.
+ */ + CommonTokenFactory(); + + virtual std::unique_ptr+ * This token stream provides access to all tokens by index or when calling + * methods like {@link #getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and + * {@link #LB}.
+ * + *+ * By default, tokens are placed on the default channel + * ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the + * {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to + * call {@link Lexer#setChannel}. + *
+ * + *+ * Note: lexer rules which use the {@code ->skip} lexer command or call + * {@link Lexer#skip} do not produce tokens at all, so input text matched by + * such a rule will not be available as part of the token stream, regardless of + * channel.
+ */ + class ANTLR4CPP_PUBLIC CommonTokenStream : public BufferedTokenStream { + public: + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + CommonTokenStream(TokenSource *tokenSource); + + /** + * Constructs a new {@link CommonTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + CommonTokenStream(TokenSource *tokenSource, size_t channel); + + virtual Token* LT(ssize_t k) override; + + /// Count EOF just once. + virtual int getNumberOfOnChannelTokens(); + + protected: + /** + * Specifies the channel to use for filtering tokens. + * + *+ * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.
+ */ + size_t channel; + + virtual ssize_t adjustSeekIndex(size_t i) override; + + virtual Token* LB(size_t k) override; + + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/ConsoleErrorListener.cpp b/lib/antlr4-cpp-runtime/ConsoleErrorListener.cpp new file mode 100755 index 0000000..c7925e4 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ConsoleErrorListener.cpp @@ -0,0 +1,15 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ConsoleErrorListener.h" + +using namespace antlr4; + +ConsoleErrorListener ConsoleErrorListener::INSTANCE; + +void ConsoleErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, + size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) { + std::cerr << "line " << line << ":" << charPositionInLine << " " << msg << std::endl; +} diff --git a/lib/antlr4-cpp-runtime/ConsoleErrorListener.h b/lib/antlr4-cpp-runtime/ConsoleErrorListener.h new file mode 100755 index 0000000..65c6f8c --- /dev/null +++ b/lib/antlr4-cpp-runtime/ConsoleErrorListener.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "BaseErrorListener.h" + +namespace antlr4 { + + class ANTLR4CPP_PUBLIC ConsoleErrorListener : public BaseErrorListener { + public: + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + static ConsoleErrorListener INSTANCE; + + /** + * {@inheritDoc} + * + *+ * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.
+ * + *+ * line line:charPositionInLine msg + *+ */ + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/DefaultErrorStrategy.cpp b/lib/antlr4-cpp-runtime/DefaultErrorStrategy.cpp new file mode 100755 index 0000000..54d35ee --- /dev/null +++ b/lib/antlr4-cpp-runtime/DefaultErrorStrategy.cpp @@ -0,0 +1,336 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "NoViableAltException.h" +#include "misc/IntervalSet.h" +#include "atn/ParserATNSimulator.h" +#include "InputMismatchException.h" +#include "FailedPredicateException.h" +#include "ParserRuleContext.h" +#include "atn/RuleTransition.h" +#include "atn/ATN.h" +#include "atn/ATNState.h" +#include "support/StringUtils.h" +#include "support/Casts.h" +#include "Parser.h" +#include "CommonToken.h" +#include "Vocabulary.h" + +#include "DefaultErrorStrategy.h" + +using namespace antlr4; +using namespace antlr4::atn; + +using namespace antlrcpp; + +DefaultErrorStrategy::DefaultErrorStrategy() { + InitializeInstanceFields(); +} + +DefaultErrorStrategy::~DefaultErrorStrategy() { +} + +void DefaultErrorStrategy::reset(Parser *recognizer) { + _errorSymbols.clear(); + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::beginErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = true; +} + +bool DefaultErrorStrategy::inErrorRecoveryMode(Parser * /*recognizer*/) { + return errorRecoveryMode; +} + +void DefaultErrorStrategy::endErrorCondition(Parser * /*recognizer*/) { + errorRecoveryMode = false; + lastErrorIndex = -1; +} + +void DefaultErrorStrategy::reportMatch(Parser *recognizer) { + endErrorCondition(recognizer); +} + +void DefaultErrorStrategy::reportError(Parser *recognizer, const RecognitionException &e) { + // If we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (inErrorRecoveryMode(recognizer)) { + return; // don't report spurious errors + } + + beginErrorCondition(recognizer); + if (is
Implements Jim Idle's magic sync mechanism in closures and optional + * subrules. E.g.,
+ * + *+ * a : sync ( stuff sync )* ; + * sync : {consume to what can follow sync} ; + *+ * + * At the start of a sub rule upon error, {@link #sync} performs single + * token deletion, if possible. If it can't do that, it bails on the current + * rule and uses the default error recovery, which consumes until the + * resynchronization set of the current rule. + * + *
If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + * with an empty alternative), then the expected set includes what follows + * the subrule.
+ * + *During loop iteration, it consumes until it sees a token that can start a + * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + * stay in the loop as long as possible.
+ * + *ORIGINS
+ * + *Previous versions of ANTLR did a poor job of their recovery within loops. + * A single mismatch token or missing token would force the parser to bail + * out of the entire rules surrounding the loop. So, for rule
+ * + *+ * classDef : 'class' ID '{' member* '}' + *+ * + * input with an extra token between members would force the parser to + * consume until it found the next class definition rather than the next + * member definition of the current class. + * + *
This functionality cost a little bit of effort because the parser has to + * compare token set at the start of the loop and at each iteration. If for + * some reason speed is suffering for you, you can turn off this + * functionality by simply overriding this method as a blank { }.
+ */ + virtual void sync(Parser *recognizer) override; + + ///This method is called when {@link #singleTokenDeletion} identifies + * single-token deletion as a viable recovery strategy for a mismatched + * input error.
+ * + *The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.
+ * + * @param recognizer the parser instance + */ + virtual void reportUnwantedToken(Parser *recognizer); + + /** + * This method is called to report a syntax error which requires the + * insertion of a missing token into the input stream. At the time this + * method is called, the missing token has not yet been inserted. When this + * method returns, {@code recognizer} is in error recovery mode. + * + *This method is called when {@link #singleTokenInsertion} identifies + * single-token insertion as a viable recovery strategy for a mismatched + * input error.
+ * + *The default implementation simply returns if the handler is already in + * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + * enter error recovery mode, followed by calling + * {@link Parser#notifyErrorListeners}.
+ * + * @param recognizer the parser instance + */ + virtual void reportMissingToken(Parser *recognizer); + + public: + /** + * {@inheritDoc} + * + *The default implementation attempts to recover from the mismatched input + * by using single token insertion and deletion as described below. If the + * recovery attempt fails, this method throws an + * {@link InputMismatchException}.
+ * + *EXTRA TOKEN (single token deletion)
+ * + *{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + * right token, however, then assume {@code LA(1)} is some extra spurious + * token and delete it. Then consume and return the next token (which was + * the {@code LA(2)} token) as the successful result of the match operation.
+ * + *This recovery strategy is implemented by {@link #singleTokenDeletion}.
+ * + *MISSING TOKEN (single token insertion)
+ * + *If current token (at {@code LA(1)}) is consistent with what could come + * after the expected {@code LA(1)} token, then assume the token is missing + * and use the parser's {@link TokenFactory} to create it on the fly. The + * "insertion" is performed by returning the created token as the successful + * result of the match operation.
+ * + *This recovery strategy is implemented by {@link #singleTokenInsertion}.
+ * + *EXAMPLE
+ * + *For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + * the parser returns from the nested call to {@code expr}, it will have + * call chain:
+ * + *+ * stat → expr → atom + *+ * + * and it will be trying to match the {@code ')'} at this point in the + * derivation: + * + *
+ * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + *+ * + * The attempt to match {@code ')'} will fail when it sees {@code ';'} and + * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + * is in the set of tokens that can follow the {@code ')'} token reference + * in rule {@code atom}. It can assume that you forgot the {@code ')'}. + */ + virtual Token* recoverInline(Parser *recognizer) override; + + ///
+ /// IntStream stream = ...; + /// int index = -1; + /// int mark = stream.mark(); + /// try { + /// index = stream.index(); + /// // perform work here... + /// } finally { + /// if (index != -1) { + /// stream.seek(index); + /// } + /// stream.release(mark); + /// } + ///+ ///
+ * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.
+ */ + class ANTLR4CPP_PUBLIC InterpreterRuleContext : public ParserRuleContext { + public: + InterpreterRuleContext(); + + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ + InterpreterRuleContext(ParserRuleContext *parent, size_t invokingStateNumber, size_t ruleIndex); + + virtual size_t getRuleIndex() const override; + + protected: + /** This is the backing field for {@link #getRuleIndex}. */ + const size_t _ruleIndex = INVALID_INDEX; +}; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/Lexer.cpp b/lib/antlr4-cpp-runtime/Lexer.cpp new file mode 100755 index 0000000..b0385c5 --- /dev/null +++ b/lib/antlr4-cpp-runtime/Lexer.cpp @@ -0,0 +1,294 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/LexerATNSimulator.h" +#include "Exceptions.h" +#include "misc/Interval.h" +#include "CommonTokenFactory.h" +#include "LexerNoViableAltException.h" +#include "ANTLRErrorListener.h" +#include "support/CPPUtils.h" +#include "CommonToken.h" + +#include "Lexer.h" + +#define DEBUG_LEXER 0 + +using namespace antlrcpp; +using namespace antlr4; + +Lexer::Lexer() : Recognizer() { + InitializeInstanceFields(); + _input = nullptr; +} + +Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) { + InitializeInstanceFields(); +} + +void Lexer::reset() { + // wack Lexer state variables + _input->seek(0); // rewind the input + + _syntaxErrors = 0; + token.reset(); + type = Token::INVALID_TYPE; + channel = Token::DEFAULT_CHANNEL; + tokenStartCharIndex = INVALID_INDEX; + tokenStartCharPositionInLine = 0; + tokenStartLine = 0; + type = 0; + _text = ""; + + hitEOF = false; + mode = Lexer::DEFAULT_MODE; + modeStack.clear(); + + getInterpreter+ /// ParseTree t = parser.expr(); + /// ParseTreePattern p = parser.compileParseTreePattern("+ ///+0", MyParser.RULE_expr); + /// ParseTreeMatch m = p.match(t); + /// String id = m.get("ID"); + ///
+ /// A B + /// ^ + ///+ /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using
+ /// return getExpectedTokens().contains(symbol); + ///+ ///