diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 3cea67f..f1e0848 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.12"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 3f5056a..25ab335 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -10,9 +10,17 @@ on: types: - published +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event.action == 'published' }} + +defaults: + run: + shell: bash -l {0} + jobs: - build_sdist_and_wheel: - name: Build SDist and Wheel + build_sdist: + name: Build Source runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -21,32 +29,85 @@ jobs: submodules: true - name: Build SDist - run: pipx run build + run: pipx run build --sdist - name: Check metadata run: pipx run twine check dist/* - uses: actions/upload-artifact@v4 with: - path: dist/* + name: dist + path: dist/*.tar.gz + + cibuildwheel: + name: "${{ matrix.variant.platform }} on ${{ matrix.variant.os }}" + runs-on: ${{ matrix.variant.os }} + strategy: + fail-fast: false + matrix: + variant: + - { os: ubuntu-22.04, platform: 'manylinux' } + - { os: ubuntu-22.04, platform: 'musllinux' } + - { os: macos-13, platform: 'macosx' } # Intel support + - { os: macos-14, platform: 'macosx' } # Apple silicon support + - { os: windows-2022, platform: "win" } + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: 3.8 + if: runner.os == 'macOS' && runner.arch == 'ARM64' + + - uses: pypa/cibuildwheel@v2.20.0 + env: + CIBW_BUILD: "*-${{ matrix.variant.platform }}*" + MACOSX_DEPLOYMENT_TARGET: "10.15" + + - name: Verify clean directory + run: git diff --exit-code + shell: bash + + - uses: actions/upload-artifact@v4 + with: + name: "cibw-wheels-${{ matrix.variant.platform }}-${{ matrix.variant.os }}" + path: wheelhouse/*.whl upload_all: name: Upload if release - needs: [build_sdist_and_wheel] + needs: [build_sdist, cibuildwheel] runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/mccode_antlr permissions: id-token: write - if: github.event_name == 'release' && github.event.action == 'published' + contents: write + if: github.event_name == 'release' steps: - uses: actions/setup-python@v5 - uses: actions/download-artifact@v4 + id: download + with: + path: artifacts + + - name: Move artifacts into a single folder + run: | + mkdir dist + find ${{ steps.download.outputs.download-path }} -type f -regex ".*\.\(tar\.gz\|whl\)" -exec mv {} dist/. \; + + - name: Attach artifacts to GitHub tagged draft release + uses: ncipollo/release-action@v1 with: - name: artifact - path: dist + allowUpdates: true + draft: ${{ github.event.action != 'published' }} + artifacts: "dist/*.whl,dist/*.tar.gz" - - uses: pypa/gh-action-pypi-publish@release/v1 + - name: Publish artifacts to PyPI + if: ${{ github.event.action == 'published' }} + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 7e2978b..b46cec8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ *build/ *.egg-info/ *.c +*.so +*.dll +*.dynlib diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp b/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp new file mode 100644 index 0000000..6ceadb8 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorListener.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorListener.h" + +antlr4::ANTLRErrorListener::~ANTLRErrorListener() +{ +} diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorListener.h b/lib/antlr4-cpp-runtime/ANTLRErrorListener.h new file mode 100755 index 0000000..d6efad1 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorListener.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "RecognitionException.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /// How to emit recognition errors (an interface in Java). + class ANTLR4CPP_PUBLIC ANTLRErrorListener { + public: + virtual ~ANTLRErrorListener(); + + /// + /// Upon syntax error, notify any interested parties. This is not how to + /// recover from errors or compute error messages. + /// specifies how to recover from syntax errors and how to compute error + /// messages. This listener's job is simply to emit a computed message, + /// though it has enough information to create its own message in many cases. + ///

+ /// The is non-null for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism). + ///

+ /// + /// What parser got the error. From this + /// object, you can access the context as well + /// as the input stream. + /// + /// The offending token in the input token + /// stream, unless recognizer is a lexer (then it's null). If + /// no viable alternative error, {@code e} has token at which we + /// started production for the decision. + /// + /// The line number in the input where the error occurred. + /// + /// The character position within that line where the error occurred. + /// + /// The message to emit. + /// + /// The exception generated by the parser that led to + /// the reporting of an error. It is null in the case where + /// the parser was able to recover in line without exiting the + /// surrounding rule. + virtual void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line, + size_t charPositionInLine, const std::string &msg, std::exception_ptr e) = 0; + + /** + * This method is called by the parser when a full-context prediction + * results in an ambiguity. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.

+ * + *

When {@code exact} is {@code true}, all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.

+ * + *

When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *

If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) = 0; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *

Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.

+ * + *

For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.

+ * + *

{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.

+ * + *

Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.

+ * + *

This method is not used by lexers.

+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp new file mode 100644 index 0000000..1655a57 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRErrorStrategy.h" + +antlr4::ANTLRErrorStrategy::~ANTLRErrorStrategy() +{ +} diff --git a/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h new file mode 100755 index 0000000..a3eecd1 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRErrorStrategy.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Token.h" + +namespace antlr4 { + + /// + /// The interface for defining strategies to deal with syntax errors encountered + /// during a parse by ANTLR-generated parsers. We distinguish between three + /// different kinds of errors: + /// + /// + /// + /// Implementations of this interface report syntax errors by calling + /// . + ///

+ /// TODO: what to do about lexers + ///

+ class ANTLR4CPP_PUBLIC ANTLRErrorStrategy { + public: + + /// + /// Reset the error handler state for the specified {@code recognizer}. + /// the parser instance + virtual ~ANTLRErrorStrategy(); + + virtual void reset(Parser *recognizer) = 0; + + /** + * This method is called when an unexpected symbol is encountered during an + * inline match operation, such as {@link Parser#match}. If the error + * strategy successfully recovers from the match failure, this method + * returns the {@link Token} instance which should be treated as the + * successful result of the match. + * + *

This method handles the consumption of any tokens - the caller should + * not call {@link Parser#consume} after a successful recovery.

+ * + *

Note that the calling code will not report an error if this method + * returns successfully. The error strategy implementation is responsible + * for calling {@link Parser#notifyErrorListeners} as appropriate.

+ * + * @param recognizer the parser instance + * @throws RecognitionException if the error strategy was not able to + * recover from the unexpected input symbol + */ + virtual Token* recoverInline(Parser *recognizer) = 0; + + /// + /// This method is called to recover from exception {@code e}. This method is + /// called after by the default exception handler + /// generated for a rule method. + /// + /// + /// the parser instance + /// the recognition exception to recover from + /// if the error strategy could not recover from + /// the recognition exception + virtual void recover(Parser *recognizer, std::exception_ptr e) = 0; + + /// + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// . + ///

+ /// The generated code currently contains calls to after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}). + ///

+ /// For an implementation based on Jim Idle's "magic sync" mechanism, see + /// . + ///

+ /// + /// the parser instance + /// if an error is detected by the error + /// strategy but cannot be automatically recovered at the current state in + /// the parsing process + virtual void sync(Parser *recognizer) = 0; + + /// + /// Tests whether or not {@code recognizer} is in the process of recovering + /// from an error. In error recovery mode, adds + /// symbols to the parse tree by calling + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + /// + /// the parser instance + /// {@code true} if the parser is currently recovering from a parse + /// error, otherwise {@code false} + virtual bool inErrorRecoveryMode(Parser *recognizer) = 0; + + /// + /// This method is called by when the parser successfully matches an input + /// symbol. + /// + /// the parser instance + virtual void reportMatch(Parser *recognizer) = 0; + + /// + /// Report any kind of . This method is called by + /// the default exception handler generated for a rule method. + /// + /// the parser instance + /// the recognition exception to report + virtual void reportError(Parser *recognizer, const RecognitionException &e) = 0; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/ANTLRFileStream.cpp b/lib/antlr4-cpp-runtime/ANTLRFileStream.cpp new file mode 100755 index 0000000..674817a --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRFileStream.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "ANTLRFileStream.h" + +using namespace antlr4; + +void ANTLRFileStream::loadFromFile(const std::string &fileName) { + _fileName = fileName; + if (_fileName.empty()) { + return; + } + + std::ifstream stream(fileName, std::ios::binary); + + ANTLRInputStream::load(stream); +} + +std::string ANTLRFileStream::getSourceName() const { + return _fileName; +} diff --git a/lib/antlr4-cpp-runtime/ANTLRFileStream.h b/lib/antlr4-cpp-runtime/ANTLRFileStream.h new file mode 100755 index 0000000..6c7d619 --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRFileStream.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRInputStream.h" + +namespace antlr4 { + + /// This is an ANTLRInputStream that is loaded from a file all at once + /// when you construct the object (or call load()). + // TODO: this class needs testing. + class ANTLR4CPP_PUBLIC ANTLRFileStream : public ANTLRInputStream { + public: + ANTLRFileStream() = default; + ANTLRFileStream(const std::string &) = delete; + ANTLRFileStream(const char *data, size_t length) = delete; + ANTLRFileStream(std::istream &stream) = delete; + + // Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM). + virtual void loadFromFile(const std::string &fileName); + virtual std::string getSourceName() const override; + + private: + std::string _fileName; // UTF-8 encoded file name. + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/ANTLRInputStream.cpp b/lib/antlr4-cpp-runtime/ANTLRInputStream.cpp new file mode 100755 index 0000000..b6470af --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRInputStream.cpp @@ -0,0 +1,180 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include + +#include "Exceptions.h" +#include "misc/Interval.h" +#include "IntStream.h" + +#include "support/Utf8.h" +#include "support/CPPUtils.h" + +#include "ANTLRInputStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +using misc::Interval; + +ANTLRInputStream::ANTLRInputStream() { + InitializeInstanceFields(); +} + +ANTLRInputStream::ANTLRInputStream(std::string_view input): ANTLRInputStream() { + load(input.data(), input.length()); +} + +ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) { + load(data, length); +} + +ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() { + load(stream); +} + +void ANTLRInputStream::load(const std::string &input, bool lenient) { + load(input.data(), input.size(), lenient); +} + +void ANTLRInputStream::load(const char *data, size_t length, bool lenient) { + // Remove the UTF-8 BOM if present. + const char *bom = "\xef\xbb\xbf"; + if (length >= 3 && strncmp(data, bom, 3) == 0) { + data += 3; + length -= 3; + } + if (lenient) { + _data = Utf8::lenientDecode(std::string_view(data, length)); + } else { + auto maybe_utf32 = Utf8::strictDecode(std::string_view(data, length)); + if (!maybe_utf32.has_value()) { + throw IllegalArgumentException("UTF-8 string contains an illegal byte sequence"); + } + _data = std::move(maybe_utf32).value(); + } + p = 0; +} + +void ANTLRInputStream::load(std::istream &stream, bool lenient) { + if (!stream.good() || stream.eof()) // No fail, bad or EOF. + return; + + _data.clear(); + + std::string s((std::istreambuf_iterator(stream)), std::istreambuf_iterator()); + load(s.data(), s.length(), lenient); +} + +void ANTLRInputStream::reset() { + p = 0; +} + +void ANTLRInputStream::consume() { + if (p >= _data.size()) { + assert(LA(1) == IntStream::EOF); + throw IllegalStateException("cannot consume EOF"); + } + + if (p < _data.size()) { + p++; + } +} + +size_t ANTLRInputStream::LA(ssize_t i) { + if (i == 0) { + return 0; // undefined + } + + ssize_t position = static_cast(p); + if (i < 0) { + i++; // e.g., translate LA(-1) to use offset i=0; then _data[p+0-1] + if ((position + i - 1) < 0) { + return IntStream::EOF; // invalid; no char before first char + } + } + + if ((position + i - 1) >= static_cast(_data.size())) { + return IntStream::EOF; + } + + return _data[static_cast((position + i - 1))]; +} + +size_t ANTLRInputStream::LT(ssize_t i) { + return LA(i); +} + +size_t ANTLRInputStream::index() { + return p; +} + +size_t ANTLRInputStream::size() { + return _data.size(); +} + +// Mark/release do nothing. We have entire buffer. +ssize_t ANTLRInputStream::mark() { + return -1; +} + +void ANTLRInputStream::release(ssize_t /* marker */) { +} + +void ANTLRInputStream::seek(size_t index) { + if (index <= p) { + p = index; // just jump; don't update stream state (line, ...) + return; + } + // seek forward, consume until p hits index or n (whichever comes first) + index = std::min(index, _data.size()); + while (p < index) { + consume(); + } +} + +std::string ANTLRInputStream::getText(const Interval &interval) { + if (interval.a < 0 || interval.b < 0) { + return ""; + } + + size_t start = static_cast(interval.a); + size_t stop = static_cast(interval.b); + + + if (stop >= _data.size()) { + stop = _data.size() - 1; + } + + size_t count = stop - start + 1; + if (start >= _data.size()) { + return ""; + } + + auto maybeUtf8 = Utf8::strictEncode(std::u32string_view(_data).substr(start, count)); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +std::string ANTLRInputStream::getSourceName() const { + if (name.empty()) { + return IntStream::UNKNOWN_SOURCE_NAME; + } + return name; +} + +std::string ANTLRInputStream::toString() const { + auto maybeUtf8 = Utf8::strictEncode(_data); + if (!maybeUtf8.has_value()) { + throw IllegalArgumentException("Input stream contains invalid Unicode code points"); + } + return std::move(maybeUtf8).value(); +} + +void ANTLRInputStream::InitializeInstanceFields() { + p = 0; +} diff --git a/lib/antlr4-cpp-runtime/ANTLRInputStream.h b/lib/antlr4-cpp-runtime/ANTLRInputStream.h new file mode 100755 index 0000000..413eade --- /dev/null +++ b/lib/antlr4-cpp-runtime/ANTLRInputStream.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include + +#include "CharStream.h" + +namespace antlr4 { + + // Vacuum all input from a stream and then treat it + // like a string. Can also pass in a string or char[] to use. + // Input is expected to be encoded in UTF-8 and converted to UTF-32 internally. + class ANTLR4CPP_PUBLIC ANTLRInputStream : public CharStream { + protected: + /// The data being scanned. + // UTF-32 + std::u32string _data; + + /// 0..n-1 index into string of next char + size_t p; + + public: + /// What is name or source of this char stream? + std::string name; + + ANTLRInputStream(); + + ANTLRInputStream(std::string_view input); + + ANTLRInputStream(const char *data, size_t length); + ANTLRInputStream(std::istream &stream); + + virtual void load(const std::string &input, bool lenient); + virtual void load(const char *data, size_t length, bool lenient); + virtual void load(std::istream &stream, bool lenient); + + virtual void load(const std::string &input) { load(input, false); } + virtual void load(const char *data, size_t length) { load(data, length, false); } + virtual void load(std::istream &stream) { load(stream, false); } + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + virtual void reset(); + virtual void consume() override; + virtual size_t LA(ssize_t i) override; + virtual size_t LT(ssize_t i); + + /// + /// Return the current input symbol index 0..n where n indicates the + /// last symbol has been read. The index is the index of char to + /// be returned from LA(1). + /// + virtual size_t index() override; + virtual size_t size() override; + + /// + /// mark/release do nothing; we have entire buffer + virtual ssize_t mark() override; + virtual void release(ssize_t marker) override; + + /// + /// consume() ahead until p==index; can't just set p=index as we must + /// update line and charPositionInLine. If we seek backwards, just set p + /// + virtual void seek(size_t index) override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getSourceName() const override; + virtual std::string toString() const override; + + private: + void InitializeInstanceFields(); + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/BailErrorStrategy.cpp b/lib/antlr4-cpp-runtime/BailErrorStrategy.cpp new file mode 100755 index 0000000..5fbc011 --- /dev/null +++ b/lib/antlr4-cpp-runtime/BailErrorStrategy.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Exceptions.h" +#include "ParserRuleContext.h" +#include "InputMismatchException.h" +#include "Parser.h" + +#include "BailErrorStrategy.h" + +using namespace antlr4; + +void BailErrorStrategy::recover(Parser *recognizer, std::exception_ptr e) { + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = e; + if (context->parent == nullptr) + break; + context = static_cast(context->parent); + } while (true); + + try { + std::rethrow_exception(e); // Throw the exception to be able to catch and rethrow nested. +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (RecognitionException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (RecognitionException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +Token* BailErrorStrategy::recoverInline(Parser *recognizer) { + InputMismatchException e(recognizer); + std::exception_ptr exception = std::make_exception_ptr(e); + + ParserRuleContext *context = recognizer->getContext(); + do { + context->exception = exception; + if (context->parent == nullptr) + break; + context = static_cast(context->parent); + } while (true); + + try { + throw e; +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER < 190023026 + } catch (InputMismatchException &inner) { + throw ParseCancellationException(inner.what()); +#else + } catch (InputMismatchException & /*inner*/) { + std::throw_with_nested(ParseCancellationException()); +#endif + } +} + +void BailErrorStrategy::sync(Parser * /*recognizer*/) { +} diff --git a/lib/antlr4-cpp-runtime/BailErrorStrategy.h b/lib/antlr4-cpp-runtime/BailErrorStrategy.h new file mode 100755 index 0000000..2a8c36f --- /dev/null +++ b/lib/antlr4-cpp-runtime/BailErrorStrategy.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "DefaultErrorStrategy.h" + +namespace antlr4 { + + /** + * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors + * by immediately canceling the parse operation with a + * {@link ParseCancellationException}. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. + * + *

+ * This error strategy is useful in the following scenarios.

+ * + *
    + *
  • Two-stage parsing: This error strategy allows the first + * stage of two-stage parsing to immediately terminate if an error is + * encountered, and immediately fall back to the second stage. In addition to + * avoiding wasted work by attempting to recover from errors here, the empty + * implementation of {@link BailErrorStrategy#sync} improves the performance of + * the first stage.
  • + *
  • Silent validation: When syntax errors are not being + * reported or logged, and the parse result is simply ignored if errors occur, + * the {@link BailErrorStrategy} avoids wasting work on recovering from errors + * when the result will be ignored either way.
  • + *
+ * + *

+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}

+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ + class ANTLR4CPP_PUBLIC BailErrorStrategy : public DefaultErrorStrategy { + /// + /// Instead of recovering from exception {@code e}, re-throw it wrapped + /// in a so it is not caught by the + /// rule function catches. Use to get the + /// original . + /// + public: + virtual void recover(Parser *recognizer, std::exception_ptr e) override; + + /// Make sure we don't attempt to recover inline; if the parser + /// successfully recovers, it won't throw an exception. + virtual Token* recoverInline(Parser *recognizer) override; + + /// + /// Make sure we don't attempt to recover from problems in subrules. + virtual void sync(Parser *recognizer) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/BaseErrorListener.cpp b/lib/antlr4-cpp-runtime/BaseErrorListener.cpp new file mode 100755 index 0000000..c035f09 --- /dev/null +++ b/lib/antlr4-cpp-runtime/BaseErrorListener.cpp @@ -0,0 +1,25 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "BaseErrorListener.h" +#include "RecognitionException.h" + +using namespace antlr4; + +void BaseErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/, size_t /*line*/, + size_t /*charPositionInLine*/, const std::string &/*msg*/, std::exception_ptr /*e*/) { +} + +void BaseErrorListener::reportAmbiguity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, bool /*exact*/, const antlrcpp::BitSet &/*ambigAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportAttemptingFullContext(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, const antlrcpp::BitSet &/*conflictingAlts*/, atn::ATNConfigSet * /*configs*/) { +} + +void BaseErrorListener::reportContextSensitivity(Parser * /*recognizer*/, const dfa::DFA &/*dfa*/, size_t /*startIndex*/, + size_t /*stopIndex*/, size_t /*prediction*/, atn::ATNConfigSet * /*configs*/) { +} diff --git a/lib/antlr4-cpp-runtime/BaseErrorListener.h b/lib/antlr4-cpp-runtime/BaseErrorListener.h new file mode 100755 index 0000000..aad2e5d --- /dev/null +++ b/lib/antlr4-cpp-runtime/BaseErrorListener.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "ANTLRErrorListener.h" + +namespace antlrcpp { + class BitSet; +} + +namespace antlr4 { + + /** + * Provides an empty default implementation of {@link ANTLRErrorListener}. The + * default implementation of each method does nothing, but can be overridden as + * necessary. + */ + class ANTLR4CPP_PUBLIC BaseErrorListener : public ANTLRErrorListener { + + virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine, + const std::string &msg, std::exception_ptr e) override; + + virtual void reportAmbiguity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, + const antlrcpp::BitSet &ambigAlts, atn::ATNConfigSet *configs) override; + + virtual void reportAttemptingFullContext(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + const antlrcpp::BitSet &conflictingAlts, atn::ATNConfigSet *configs) override; + + virtual void reportContextSensitivity(Parser *recognizer, const dfa::DFA &dfa, size_t startIndex, size_t stopIndex, + size_t prediction, atn::ATNConfigSet *configs) override; + }; + +} // namespace antlr4 diff --git a/lib/antlr4-cpp-runtime/BufferedTokenStream.cpp b/lib/antlr4-cpp-runtime/BufferedTokenStream.cpp new file mode 100755 index 0000000..241dfe5 --- /dev/null +++ b/lib/antlr4-cpp-runtime/BufferedTokenStream.cpp @@ -0,0 +1,414 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" +#include "Lexer.h" +#include "RuleContext.h" +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "BufferedTokenStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){ + InitializeInstanceFields(); +} + +TokenSource* BufferedTokenStream::getTokenSource() const { + return _tokenSource; +} + +size_t BufferedTokenStream::index() { + return _p; +} + +ssize_t BufferedTokenStream::mark() { + return 0; +} + +void BufferedTokenStream::release(ssize_t /*marker*/) { + // no resources to release +} + +void BufferedTokenStream::reset() { + seek(0); +} + +void BufferedTokenStream::seek(size_t index) { + lazyInit(); + _p = adjustSeekIndex(index); +} + +size_t BufferedTokenStream::size() { + return _tokens.size(); +} + +void BufferedTokenStream::consume() { + bool skipEofCheck = false; + if (!_needSetup) { + if (_fetchedEOF) { + // the last token in tokens is EOF. skip check if p indexes any + // fetched token except the last. + skipEofCheck = _p < _tokens.size() - 1; + } else { + // no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = _p < _tokens.size(); + } + } else { + // not yet initialized + skipEofCheck = false; + } + + if (!skipEofCheck && LA(1) == Token::EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + if (sync(_p + 1)) { + _p = adjustSeekIndex(_p + 1); + } +} + +bool BufferedTokenStream::sync(size_t i) { + if (i + 1 < _tokens.size()) + return true; + size_t n = i - _tokens.size() + 1; // how many more elements we need? + + if (n > 0) { + size_t fetched = fetch(n); + return fetched >= n; + } + + return true; +} + +size_t BufferedTokenStream::fetch(size_t n) { + if (_fetchedEOF) { + return 0; + } + + size_t i = 0; + while (i < n) { + std::unique_ptr t(_tokenSource->nextToken()); + + if (is(t.get())) { + (static_cast(t.get()))->setTokenIndex(_tokens.size()); + } + + _tokens.push_back(std::move(t)); + ++i; + + if (_tokens.back()->getType() == Token::EOF) { + _fetchedEOF = true; + break; + } + } + + return i; +} + +Token* BufferedTokenStream::get(size_t i) const { + if (i >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("token index ") + + std::to_string(i) + + std::string(" out of range 0..") + + std::to_string(_tokens.size() - 1)); + } + return _tokens[i].get(); +} + +std::vector BufferedTokenStream::get(size_t start, size_t stop) { + std::vector subset; + + lazyInit(); + + if (_tokens.empty()) { + return subset; + } + + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + subset.push_back(t); + } + return subset; +} + +size_t BufferedTokenStream::LA(ssize_t i) { + return LT(i)->getType(); +} + +Token* BufferedTokenStream::LB(size_t k) { + if (k > _p) { + return nullptr; + } + return _tokens[_p - k].get(); +} + +Token* BufferedTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(-k); + } + + size_t i = _p + k - 1; + sync(i); + if (i >= _tokens.size()) { // return EOF token + // EOF must be last token + return _tokens.back().get(); + } + + return _tokens[i].get(); +} + +ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) { + return i; +} + +void BufferedTokenStream::lazyInit() { + if (_needSetup) { + setup(); + } +} + +void BufferedTokenStream::setup() { + _needSetup = false; + sync(0); + _p = adjustSeekIndex(0); +} + +void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) { + _tokenSource = tokenSource; + _tokens.clear(); + _fetchedEOF = false; + _needSetup = true; +} + +std::vector BufferedTokenStream::getTokens() { + std::vector result; + for (auto &t : _tokens) + result.push_back(t.get()); + return result; +} + +std::vector BufferedTokenStream::getTokens(size_t start, size_t stop) { + return getTokens(start, stop, std::vector()); +} + +std::vector BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector &types) { + lazyInit(); + if (stop >= _tokens.size() || start >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("start ") + + std::to_string(start) + + std::string(" or stop ") + + std::to_string(stop) + + std::string(" not in 0..") + + std::to_string(_tokens.size() - 1)); + } + + std::vector filteredTokens; + + if (start > stop) { + return filteredTokens; + } + + for (size_t i = start; i <= stop; i++) { + Token *tok = _tokens[i].get(); + + if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) { + filteredTokens.push_back(tok); + } + } + return filteredTokens; +} + +std::vector BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) { + std::vector s; + s.push_back(ttype); + return getTokens(start, stop, s); +} + +ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + return size() - 1; + } + + Token *token = _tokens[i].get(); + while (token->getChannel() != channel) { + if (token->getType() == Token::EOF) { + return i; + } + i++; + sync(i); + token = _tokens[i].get(); + } + return i; +} + +ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + // the EOF token is on every channel + return size() - 1; + } + + while (true) { + Token *token = _tokens[i].get(); + if (token->getType() == Token::EOF || token->getChannel() == channel) { + return i; + } + + if (i == 0) + return -1; + i--; + } + return i; +} + +std::vector BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); + size_t to; + size_t from = tokenIndex + 1; + // if none onchannel to right, nextOnChannel=-1 so set to = last token + if (nextOnChannel == -1) { + to = static_cast(size() - 1); + } else { + to = nextOnChannel; + } + + return filterForChannel(from, to, channel); +} + +std::vector BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) { + return getHiddenTokensToRight(tokenIndex, -1); +} + +std::vector BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + if (tokenIndex == 0) { + // Obviously no tokens can appear before the first token. + return { }; + } + + ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); + if (prevOnChannel == static_cast(tokenIndex - 1)) { + return { }; + } + // if none onchannel to left, prevOnChannel=-1 then from=0 + size_t from = static_cast(prevOnChannel + 1); + size_t to = tokenIndex - 1; + + return filterForChannel(from, to, channel); +} + +std::vector BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) { + return getHiddenTokensToLeft(tokenIndex, -1); +} + +std::vector BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) { + std::vector hidden; + for (size_t i = from; i <= to; i++) { + Token *t = _tokens[i].get(); + if (channel == -1) { + if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) { + hidden.push_back(t); + } + } else { + if (t->getChannel() == static_cast(channel)) { + hidden.push_back(t); + } + } + } + + return hidden; +} + +bool BufferedTokenStream::isInitialized() const { + return !_needSetup; +} + +/** + * Get the text of all tokens in this buffer. + */ +std::string BufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string BufferedTokenStream::getText() { + fill(); + return getText(misc::Interval(0U, size() - 1)); +} + +std::string BufferedTokenStream::getText(const misc::Interval &interval) { + lazyInit(); + size_t start = interval.a; + size_t stop = interval.b; + if (start == INVALID_INDEX || stop == INVALID_INDEX) { + return ""; + } + sync(stop); + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + + std::stringstream ss; + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + ss << t->getText(); + } + return ss.str(); +} + +std::string BufferedTokenStream::getText(RuleContext *ctx) { + return getText(ctx->getSourceInterval()); +} + +std::string BufferedTokenStream::getText(Token *start, Token *stop) { + if (start != nullptr && stop != nullptr) { + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); + } + + return ""; +} + +void BufferedTokenStream::fill() { + lazyInit(); + const size_t blockSize = 1000; + while (true) { + size_t fetched = fetch(blockSize); + if (fetched < blockSize) { + return; + } + } +} + +void BufferedTokenStream::InitializeInstanceFields() { + _needSetup = true; + _fetchedEOF = false; +} diff --git a/lib/antlr4-cpp-runtime/BufferedTokenStream.h b/lib/antlr4-cpp-runtime/BufferedTokenStream.h new file mode 100755 index 0000000..fab74d2 --- /dev/null +++ b/lib/antlr4-cpp-runtime/BufferedTokenStream.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "TokenStream.h" + +namespace antlr4 { + + /** + * This implementation of {@link TokenStream} loads tokens from a + * {@link TokenSource} on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. + * + *

+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a + * {@link CommonTokenStream}.

+ */ + class ANTLR4CPP_PUBLIC BufferedTokenStream : public TokenStream { + public: + BufferedTokenStream(TokenSource *tokenSource); + BufferedTokenStream(const BufferedTokenStream& other) = delete; + + BufferedTokenStream& operator = (const BufferedTokenStream& other) = delete; + + virtual TokenSource* getTokenSource() const override; + virtual size_t index() override; + virtual ssize_t mark() override; + + virtual void release(ssize_t marker) override; + virtual void reset(); + virtual void seek(size_t index) override; + + virtual size_t size() override; + virtual void consume() override; + + virtual Token* get(size_t i) const override; + + /// Get all tokens from start..stop inclusively. + virtual std::vector get(size_t start, size_t stop); + + virtual size_t LA(ssize_t i) override; + virtual Token* LT(ssize_t k) override; + + /// Reset this token stream by setting its token source. + virtual void setTokenSource(TokenSource *tokenSource); + virtual std::vector getTokens(); + virtual std::vector getTokens(size_t start, size_t stop); + + /// + /// Given a start and stop index, return a List of all tokens in + /// the token type BitSet. Return null if no tokens were found. This + /// method looks at both on and off channel tokens. + /// + virtual std::vector getTokens(size_t start, size_t stop, const std::vector &types); + virtual std::vector getTokens(size_t start, size_t stop, size_t ttype); + + /// Collect all tokens on specified channel to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + /// EOF. If channel is -1, find any non default channel token. + virtual std::vector getHiddenTokensToRight(size_t tokenIndex, ssize_t channel); + + /// + /// Collect all hidden tokens (any off-default channel) to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL + /// or EOF. + /// + virtual std::vector getHiddenTokensToRight(size_t tokenIndex); + + /// + /// Collect all tokens on specified channel to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// If channel is -1, find any non default channel token. + /// + virtual std::vector getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel); + + /// + /// Collect all hidden tokens (any off-default channel) to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// + virtual std::vector getHiddenTokensToLeft(size_t tokenIndex); + + virtual std::string getSourceName() const override; + virtual std::string getText() override; + virtual std::string getText(const misc::Interval &interval) override; + virtual std::string getText(RuleContext *ctx) override; + virtual std::string getText(Token *start, Token *stop) override; + + /// Get all tokens from lexer until EOF. + virtual void fill(); + + protected: + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ + TokenSource *_tokenSource; + + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to {@code true}. + */ + std::vector> _tokens; + + /** + * The index into {@link #tokens} of the current token (next token to + * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. + * + *

This field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.

+ */ + // ml: since -1 requires to make this member signed for just this single aspect we use a member _needSetup instead. + // Use bool isInitialized() to find out if this stream has started reading. + size_t _p; + + /** + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: + * + *