diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 000000000..c21860272 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,66 @@ +name: Build +on: push +env: + cache-version: v1.0.2 +jobs: + linux: + name: Test the repository on Linux + runs-on: ubuntu-latest + steps: + - name: Checkout the repository + uses: actions/checkout@v2 + with: + ref: ${{ github.ref }} + - name: Cache + uses: pat-s/always-upload-cache@v2.1.5 + with: + path: ~/.cache/bazel + key: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-${{ hashFiles('./**') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build- + - name: Setup + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends -y gcc-9 g++-9 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 900 \ + --slave /usr/bin/g++ g++ /usr/bin/g++-9 + bazelisk test --test_output=errors //zetasql/public:sql_formatter_test + bazelisk build //zetasql/tools/zetasql-formatter:format + sudo cp bazel-bin/zetasql/tools/zetasql-formatter/format zetasql-formatter + zip zetasql-formatter_linux_x86_64.zip zetasql-formatter + ./zetasql-formatter . + - name: Release + uses: softprops/action-gh-release@v1 + with: + name: Debug + tag_name: debug + files: zetasql-formatter_linux_x86_64.zip + prerelease: true + macos: + name: Test the repository + runs-on: macos-10.15 + steps: + - name: Checkout the repository + uses: actions/checkout@v2 + with: + ref: ${{ github.ref }} + - name: Cache + uses: pat-s/always-upload-cache@v2.1.5 + with: + path: ~/.cache/bazel + key: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-${{ hashFiles('./**') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build- + - name: Test the repository + run: | + export TEST_TMPDIR=~/.cache/bazel + CC=g++ bazelisk test --test_output=errors //zetasql/public:sql_formatter_test + CC=g++ bazelisk build //zetasql/tools/zetasql-formatter:format + sudo cp bazel-bin/zetasql/tools/zetasql-formatter/format zetasql-formatter + zip zetasql-formatter_darwin_amd64.zip zetasql-formatter + ./zetasql-formatter . + - name: Release + uses: softprops/action-gh-release@v1 + with: + name: Debug + tag_name: debug + files: zetasql-formatter_darwin_amd64.zip + prerelease: true diff --git a/README.md b/README.md index 89315a68a..bb205cb34 100644 --- a/README.md +++ b/README.md @@ -1,89 +1,37 @@ -## ZetaSQL - Analyzer Framework for SQL +## ZetaSQL Formatter -ZetaSQL defines a language (grammar, types, data model, and semantics) as well -as a parser and analyzer. It is not itself a database or query engine. Instead -it is intended to be used by multiple engines wanting to provide consistent -behavior for all semantic analysis, name resolution, type checking, implicit -casting, etc. Specific query engines may not implement all features in the -ZetaSQL language and may give errors if specific features are not supported. For -example, engine A may not support any updates and engine B may not support -analytic functions. +[![release](https://github.com/Matts966/zetasql-formatter/workflows/release/badge.svg?event=create)](https://github.com/Matts966/zetasql-formatter/actions?query=event%3Acreate+workflow%3Arelease+) +[![test](https://github.com/Matts966/zetasql-formatter/workflows/test/badge.svg?branch=formatter)](https://github.com/Matts966/zetasql-formatter/actions?query=branch%main+workflow%3Atest+) -[ZetaSQL Language Guide](docs/README.md) +

+ +

-[ZetaSQL ResolvedAST API](docs/resolved_ast.md) +This repository is forked from [google/zetasql](https://github.com/google/zetasql) and provides SQL formatter with preserved comments. This formatter can be applied to mainly BigQuery and SpanSQL. -## Status of Project and Roadmap +## Quick Start -This codebase is being open sourced in multiple phases: +```bash +# To install for MacOSX +wget https://github.com/Matts966/zetasql-formatter/releases/latest/download/zetasql-formatter_darwin_amd64.zip \ + && sudo unzip zetasql-formatter_darwin_amd64.zip -d /usr/local/bin +``` -1. Parser and Analyzer **Complete** - - Initial release includes only a subset of tests -2. Reference Implementation **In Progress** - - Base capability **Complete** - - Function library **In Progress** -3. Compliance Tests **Complete** - - includes framework for validating compliance of arbitrary engines -4. Misc tooling - - Improved Formatter **In Progress** +```bash +# To install for Linux +wget https://github.com/Matts966/zetasql-formatter/releases/latest/download/zetasql-formatter_linux_x86_64.zip \ + && sudo unzip zetasql-formatter_linux_x86_64.zip -d /usr/local/bin +``` -Multiplatform support is planned for the following platforms: - - - Linux (Ubuntu 1804 _with gcc8_ is our reference platform, but others may work). - - MacOS (Experimental) - - Windows (version TDB) - -Until all this code is released, we cannot provide any guarantees of API -stability and cannot accept contributions. We will also be releasing more -documentation over time, particular related to developing engines with this -framework. Documentation on the [language](docs/) itself is fairly -complete. - - -## Flags -ZetaSQL uses the Abseil [Flags](https://abseil.io/blog/20190509-flags) library -to handle commandline flags. Unless otherwise documented, all flags are for -debugging purposes only and may change, stop working or be removed at any time. - - -## How to Build - -ZetaSQL uses [bazel](https://bazel.build) for building and dependency -resolution. After installing bazel (we maintain support for 1.0, -but other versions may work), simply run: - -```bazel build ...``` - -## How to add as a Dependency in bazel -See the (WORKSPACE) file, as it is a little unusual. - -### With docker - TODO: Add docker build script. - -## Example Usage -A very basic command line tool is available to run simple queries with the -reference implementation: -```bazel run //zetasql/tools/execute_query:execute_query -- "select 1 + 1;"``` - -The reference implementation is not yet completely released and currently -supports only a subset of functions and types. - -## Differential Privacy -For questions, documentation and examples of ZetaSQLs implementation of -Differential Privacy, please check out -(https://github.com/google/differential-privacy). - -## Versions - -ZetaSQL makes no guarantees regarding compatibility between releases. -Breaking changes may be made at any time. Our releases are numbered based -on the date of the commit the release is cut from. The number format is -YYYY.MM.n, where YYYY is the year, MM is the two digit month, and n is a -sequence number within the time period. +```bash +# To apply formatter +zetasql-formatter [paths] +``` ## License [Apache License 2.0](LICENSE) -## Support Disclaimer -This is not an officially supported Google product. +## Sponsors + +The development of this formatter is sponsored by the Japan Data Science Consortium. diff --git a/zetasql/parser/gen_extra_files.py b/zetasql/parser/gen_extra_files.py index b7fcec76a..f4d53f39b 100644 --- a/zetasql/parser/gen_extra_files.py +++ b/zetasql/parser/gen_extra_files.py @@ -71,6 +71,8 @@ class ParseTreeVisitor { public: virtual ~ParseTreeVisitor() {} virtual void visit(const ASTNode *node, void* data) = 0; + virtual void visitStart(const ASTNode *node, void* data) {}; + virtual void visitEnd(const ASTNode *node, void* data) {}; ''') for cls in concrete_classes: yield (' virtual void visit{0}(const {0}* node, void* data) = 0;\n\n' @@ -154,7 +156,9 @@ def GeneerateParseTreeAcceptMethods( for cls in concrete_classes: yield textwrap.dedent('''\ void {0}::Accept(ParseTreeVisitor* visitor, void* data) const {{ + visitor->visitStart(this, data); visitor->visit{0}(this, data); + visitor->visitEnd(this, data); }} ''').format(cls) diff --git a/zetasql/parser/parser.h b/zetasql/parser/parser.h index f2f0acc05..cda2cb7f2 100644 --- a/zetasql/parser/parser.h +++ b/zetasql/parser/parser.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "zetasql/base/arena.h" #include "zetasql/parser/ast_node_kind.h" @@ -258,6 +259,8 @@ absl::Status ParseExpression(const ParseResumeLocation& resume_location, // Unparse a given AST back to a canonical SQL string and return it. // Works for any AST node. std::string Unparse(const ASTNode* root); +std::string UnparseWithComments(const ASTNode* root, std::deque>& parse_tokens); // Parse the first few keywords from (ignoring whitespace, comments and // hints) to determine what kind of statement it is (if it is valid). diff --git a/zetasql/parser/unparser.cc b/zetasql/parser/unparser.cc index fed394ad1..0cf477edc 100644 --- a/zetasql/parser/unparser.cc +++ b/zetasql/parser/unparser.cc @@ -32,6 +32,7 @@ #include "absl/flags/flag.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/strings/strip.h" #include "absl/strings/string_view.h" #include "zetasql/base/map_util.h" @@ -47,6 +48,24 @@ std::string Unparse(const ASTNode* node) { return unparsed_; } +std::string UnparseWithComments(const ASTNode* node, std::deque>& parse_tokens) { + std::string unparsed_; + parser::Unparser unparser(&unparsed_); + // Print comments by visitors and pop. + node->Accept(&unparser, &parse_tokens); + // Emit left comments in parse_tokens. + bool comment_emitted = false; + for (const auto& parse_token : parse_tokens) { + unparser.print(parse_token.first); + comment_emitted = true; + } + if (!comment_emitted) { + unparser.FlushLine(); + } + return unparsed_; +} + namespace parser { // Formatter --------------------------------------------------------- @@ -172,6 +191,53 @@ void Formatter::FlushLine() { buffer_.clear(); } +void Formatter::EndStatement() { + if (last_token_is_comment) { + FormatLine(""); + FormatLine(";"); + } else { + // The result from Unparse always ends with '\n'. Strips whitespaces so ';' + // can follow the statement immediately rather than starting a new line. + absl::StrAppend(unparsed_, buffer_); + buffer_.clear(); + absl::StripAsciiWhitespace(unparsed_); + FormatLine(";"); + } +} + +// FlushCommentsPassedBy prints comments if they are before the given ParseLocationPoint +// and returns if comments are emitted. +bool Formatter::FlushCommentsPassedBy(const ParseLocationPoint point, void* data) { + if (data == nullptr) return false; + auto parse_tokens = static_cast>*>(data); + // Always !nullptr. + /* if (parse_tokens == nullptr) return false; */ + last_token_is_comment = false; + const int size = parse_tokens->size(); + for (int i = 0; i < size; i++) { + if (parse_tokens->front().second > point) { + break; + } + absl::string_view comment_string_view(parse_tokens->front().first); + absl::ConsumeSuffix(&comment_string_view, "\r\n"); + absl::ConsumeSuffix(&comment_string_view, "\r"); + absl::ConsumeSuffix(&comment_string_view, "\n"); + std::string comment_string = std::string(comment_string_view); + parse_tokens->pop_front(); + + // println if multi-line comments + if (!last_token_is_comment && i + 1 < size) { + if (parse_tokens->front().second < point) { + FlushLine(); + } + } + + FormatLine(comment_string); + last_token_is_comment = true; + } + return last_token_is_comment; +} + // Unparser ------------------------------------------------------------------- // Helper functions. @@ -215,9 +281,10 @@ void Unparser::UnparseLeafNode(const ASTLeaf* leaf_node) { void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data, const std::string& separator, - bool break_line) { + bool break_line, + bool separator_first) { UnparseChildrenWithSeparator(node, data, 0, node->num_children(), separator, - break_line); + break_line, separator_first); } // Unparse children of from indices in the range [, ) @@ -225,11 +292,17 @@ void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data, void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data, int begin, int end, const std::string& separator, - bool break_line) { + bool break_line, + bool separator_first) { for (int i = begin; i < end; i++) { if (i > begin) { if (break_line) { - println(separator); + if (separator_first) { + println(); + print(separator); + } else { + println(separator); + } } else { print(separator); } @@ -1547,7 +1620,7 @@ void Unparser::visitASTGroupBy(const ASTGroupBy* node, void* data) { if (node->hint() != nullptr) { node->hint()->Accept(this, data); } - print("BY"); + println("BY"); { Formatter::Indenter indenter(&formatter_); UnparseVectorWithSeparator(node->grouping_items(), data, ","); @@ -1623,8 +1696,18 @@ void Unparser::visitASTHavingModifier(const ASTHavingModifier* node, void Unparser::visitASTClampedBetweenModifier( const ASTClampedBetweenModifier* node, void* data) { println(); - print("CLAMPED BETWEEN"); - UnparseChildrenWithSeparator(node, data, 0, node->num_children(), "AND"); + { + Formatter::Indenter indenter(&formatter_); + println(); + print("CLAMPED BETWEEN"); + for (int i = 0; i < node->num_children(); i++) { + node->child(i)->Accept(this, data); + if (i < node->num_children() - 1) { + println(); + print("AND"); + } + } + } } void Unparser::UnparseASTTableDataSource(const ASTTableDataSource* node, @@ -1833,7 +1916,11 @@ void Unparser::visitASTStar(const ASTStar* node, void* data) { void Unparser::visitASTStarExceptList(const ASTStarExceptList* node, void* data) { - UnparseChildrenWithSeparator(node, data, ","); + println(); + { + Formatter::Indenter indenter(&formatter_); + UnparseChildrenWithSeparator(node, data, ",", true /* break_line */); + } } void Unparser::visitASTStarReplaceItem(const ASTStarReplaceItem* node, @@ -1843,14 +1930,27 @@ void Unparser::visitASTStarReplaceItem(const ASTStarReplaceItem* node, void Unparser::visitASTStarModifiers(const ASTStarModifiers* node, void* data) { if (node->except_list() != nullptr) { - print("EXCEPT ("); - node->except_list()->Accept(this, data); - print(")"); + println(); + { + Formatter::Indenter indenter(&formatter_); + println("EXCEPT ("); + node->except_list()->Accept(this, data); + println(); + print(")"); + } } if (!node->replace_items().empty()) { - print("REPLACE ("); - UnparseVectorWithSeparator(node->replace_items(), data, ","); - print(")"); + println(); + { + Formatter::Indenter indenter(&formatter_); + println("REPLACE ("); + { + Formatter::Indenter indenter(&formatter_); + UnparseVectorWithSeparator(node->replace_items(), data, ","); + } + println(); + print(")"); + } } } @@ -1927,13 +2027,13 @@ void Unparser::visitASTDotStarWithModifiers( void Unparser::visitASTOrExpr(const ASTOrExpr* node, void* data) { PrintOpenParenIfNeeded(node); - UnparseChildrenWithSeparator(node, data, "OR"); + UnparseChildrenWithSeparator(node, data, "OR", true, true); PrintCloseParenIfNeeded(node); } void Unparser::visitASTAndExpr(const ASTAndExpr* node, void* data) { PrintOpenParenIfNeeded(node); - UnparseChildrenWithSeparator(node, data, "AND"); + UnparseChildrenWithSeparator(node, data, "AND", true, true); PrintCloseParenIfNeeded(node); } @@ -2131,9 +2231,19 @@ void Unparser::visitASTBetweenExpression(const ASTBetweenExpression* node, void* data) { PrintOpenParenIfNeeded(node); node->child(0)->Accept(this, data); - print(absl::StrCat(node->is_not() ? "NOT " : "", "BETWEEN")); - UnparseChildrenWithSeparator(node, data, 1, node->num_children(), "AND"); - PrintCloseParenIfNeeded(node); + { + Formatter::Indenter indenter(&formatter_); + println(); + print(absl::StrCat(node->is_not() ? "NOT " : "", "BETWEEN")); + for (int i = 1; i < node->num_children(); i++) { + node->child(i)->Accept(this, data); + if (i < node->num_children() - 1) { + println(); + print("AND"); + } + } + PrintCloseParenIfNeeded(node); + } } void Unparser::visitASTFunctionCall(const ASTFunctionCall* node, void* data) { @@ -2443,12 +2553,15 @@ void Unparser::visitASTWindowFrame(const ASTWindowFrame* node, void* data) { print(node->GetFrameUnitString()); if (nullptr != node->end_expr()) { + Formatter::Indenter indenter(&formatter_); + println(); print("BETWEEN"); - } - node->start_expr()->Accept(this, data); - if (nullptr != node->end_expr()) { + node->start_expr()->Accept(this, data); + println(); print("AND"); node->end_expr()->Accept(this, data); + } else { + node->start_expr()->Accept(this, data); } } @@ -3330,7 +3443,7 @@ void Unparser::visitASTCreateIndexStatement(const ASTCreateIndexStatement* node, void Unparser::visitASTStatementList(const ASTStatementList* node, void* data) { for (const ASTStatement* statement : node->statement_list()) { statement->Accept(this, data); - println(";"); + formatter_.EndStatement(); } } diff --git a/zetasql/parser/unparser.h b/zetasql/parser/unparser.h index ff02bd8c4..764fdc17d 100644 --- a/zetasql/parser/unparser.h +++ b/zetasql/parser/unparser.h @@ -89,6 +89,10 @@ class Formatter { // some content remains in buffer_. void FlushLine(); + bool last_token_is_comment = false; + void EndStatement(); + bool FlushCommentsPassedBy(const ParseLocationPoint point, void* data); + private: // Checks if last token in buffer_ is a separator, where it is appropriate to // insert a line break or a space before open paren. @@ -133,6 +137,14 @@ class Unparser : public ParseTreeVisitor { visitASTChildren(node, data); } + void visitStart(const ASTNode *node, void* data) override { + formatter_.FlushCommentsPassedBy(node->GetParseLocationRange().start(), data); + }; + + void visitEnd(const ASTNode *node, void* data) override { + formatter_.FlushCommentsPassedBy(node->GetParseLocationRange().end(), data); + }; + // Shorthand for calling methods in formatter_. void print(absl::string_view s) { formatter_.Format(s); } @@ -688,11 +700,13 @@ class Unparser : public ParseTreeVisitor { // Set break_line to true if you want to print each child on a separate line. virtual void UnparseChildrenWithSeparator(const ASTNode* node, void* data, const std::string& separator, - bool break_line = false); + bool break_line = false, + bool separator_first = false); virtual void UnparseChildrenWithSeparator(const ASTNode* node, void* data, int begin, int end, const std::string& separator, - bool break_line = false); + bool break_line = false, + bool separator_first = false); template void UnparseVectorWithSeparator( diff --git a/zetasql/public/parse_location.h b/zetasql/public/parse_location.h index b19772b68..4ce214ded 100644 --- a/zetasql/public/parse_location.h +++ b/zetasql/public/parse_location.h @@ -104,6 +104,14 @@ class ParseLocationPoint { return lhs.filename_ < rhs.filename_; } + friend bool operator>(const ParseLocationPoint& lhs, + const ParseLocationPoint& rhs) { + if (lhs.filename_ == rhs.filename_) { + return lhs.byte_offset_ > rhs.byte_offset_; + } + return lhs.filename_ > rhs.filename_; + } + friend std::ostream& operator<<(std::ostream& os, const ParseLocationPoint& point) { return os << "ParseLocationPoint at offset " << point.GetByteOffset(); diff --git a/zetasql/public/sql_formatter.cc b/zetasql/public/sql_formatter.cc index 7ee9e8ec3..58c4a0a02 100644 --- a/zetasql/public/sql_formatter.cc +++ b/zetasql/public/sql_formatter.cc @@ -17,8 +17,8 @@ #include "zetasql/public/sql_formatter.h" #include -#include #include +#include #include "zetasql/base/logging.h" #include "zetasql/parser/parse_tree.h" @@ -44,80 +44,35 @@ absl::Status FormatSql(absl::string_view sql, std::string* formatted_sql) { *formatted_sql = std::string(sql); - std::vector formatted_statement; + ParseTokenOptions options; + options.include_comments = true; + LanguageOptions language_options; + language_options.EnableMaximumLanguageFeaturesForDevelopment(); + options.language_options = language_options; - ParseResumeLocation location = ParseResumeLocation::FromStringView(sql); - bool at_end_of_input = false; - absl::Status return_status = absl::OkStatus(); - while (!at_end_of_input) { - std::unique_ptr parser_output; - LanguageOptions language_options; - language_options.EnableMaximumLanguageFeaturesForDevelopment(); - const absl::Status status = - ParseNextStatement(&location, ParserOptions(language_options), - &parser_output, &at_end_of_input); - - if (status.ok()) { - formatted_statement.push_back(Unparse(parser_output->statement())); - } else { - const absl::Status out_status = MaybeUpdateErrorFromPayload( - ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, sql, status); - if (return_status.ok()) { - return_status = out_status; - } else { - return_status = ::zetasql_base::StatusBuilder(return_status).SetAppend() - << "\n" - << FormatError(out_status); - } + std::unique_ptr parser_output; - // When statement is not parseable, we proceed to the next semicolon and - // just emit the original string in between. - std::vector parse_tokens; - ParseTokenOptions options; - options.language_options = language_options; - options.stop_at_end_of_statement = true; - const int statement_start = location.byte_position(); - const absl::Status token_status = - GetParseTokens(options, &location, &parse_tokens); - // If GetParseTokens fails, just returns the original sql since there's no - // way to proceed forward. - if (!token_status.ok()) { - return MaybeUpdateErrorFromPayload( - ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, sql, - token_status); - } - // GetParseTokens() reads until either a semicolon or end of input. - if (parse_tokens.back().IsEndOfInput()) { - // When there's trailing whitespace or comment after the last - // semicolon, parse_tokens will be one END_OF_INPUT token. - // It should not be treated as a statement. If there's more than one - // token, then we treat the remainder of the input as a statement. - if (parse_tokens.size() != 1) { - formatted_statement.push_back( - std::string(sql.substr(statement_start))); - } - at_end_of_input = true; - } else { - // The last token parsed must be a semicolon. Do not include it, because - // we will add one later. - ZETASQL_RET_CHECK_EQ(parse_tokens.back().GetKeyword(), ";"); - const int statement_length = - parse_tokens.back().GetLocationRange().start().GetByteOffset() - - statement_start; - formatted_statement.push_back( - std::string(sql.substr(statement_start, statement_length))); + ZETASQL_RETURN_IF_ERROR(ParseScript(sql, ParserOptions(language_options), + ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, &parser_output)); + std::deque> comments; + std::vector parse_tokens; + ParseResumeLocation location = ParseResumeLocation::FromStringView(sql); + const absl::Status token_status = + GetParseTokens(options, &location, &parse_tokens); + if (token_status.ok()) { + for (const auto& parse_token : parse_tokens) { + if (parse_token.IsEndOfInput()) break; + if (parse_token.IsComment()) { + comments.push_back(std::make_pair(parse_token.GetSQL(), parse_token.GetLocationRange().start())); } } + *formatted_sql = UnparseWithComments(parser_output->script(), comments); + } else { + // If GetParseTokens fails, just ignores comments. + *formatted_sql = Unparse(parser_output->script()); } - // The result from Unparse always ends with '\n'. Strips whitespaces so ';' - // can follow the statement immediately rather than starting a new line. - for (auto& e : formatted_statement) { - absl::StripAsciiWhitespace(&e); - } - - *formatted_sql = absl::StrCat(absl::StrJoin(formatted_statement, ";\n"), ";"); - return return_status; + return absl::OkStatus(); } } // namespace zetasql diff --git a/zetasql/public/sql_formatter_test.cc b/zetasql/public/sql_formatter_test.cc index 5a7b77d73..4b99a0f66 100644 --- a/zetasql/public/sql_formatter_test.cc +++ b/zetasql/public/sql_formatter_test.cc @@ -34,19 +34,19 @@ TEST(SqlFormatterTest, ValidSingleStatement) { // Without semicolon. ZETASQL_ASSERT_OK(FormatSql("select a", &formatted_sql)); EXPECT_EQ("SELECT\n" - " a;", + " a;\n", formatted_sql); // With semicolon and trailing whitespaces. ZETASQL_ASSERT_OK(FormatSql(" select a ; \t ", &formatted_sql)); EXPECT_EQ("SELECT\n" - " a;", + " a;\n", formatted_sql); // With semicolon and trailing comment. ZETASQL_ASSERT_OK(FormatSql(" select a ; # foo", &formatted_sql)); EXPECT_EQ("SELECT\n" - " a;", + " a;\n# foo\n", formatted_sql); } @@ -60,7 +60,7 @@ TEST(SqlFormatterTest, InvalidSingleStatement) { &formatted_sql), StatusIs(_, HasSubstr("Syntax error: Expected end of input but " "got keyword HAVING [at 1:36]"))); - EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;", + EXPECT_EQ("select f1 as a from T having a > 5 having a > 5", formatted_sql); // With semicolon as the last char. @@ -76,29 +76,21 @@ TEST(SqlFormatterTest, InvalidSingleStatement) { &formatted_sql), StatusIs(_, HasSubstr("Syntax error: Expected end of input but " "got keyword HAVING [at 1:36]"))); - EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;", + EXPECT_EQ("select f1 as a from T having a > 5 having a > 5; ", formatted_sql); // With semicolon and trailing comment. EXPECT_THAT( FormatSql("select f1 as a from T having a > 5 having a > 5; # foo", &formatted_sql), - StatusIs(_, - HasSubstr( - "Syntax error: Expected end of input but got keyword HAVING " - "[at 1:36]\n" - "select f1 as a from T having a > 5 having a > 5; # foo\n" - " ^\n" - "Syntax error: Unexpected end of statement [at 1:55]\n" - "select f1 as a from T having a > 5 having a > 5; # foo\n" - " ^"))); - EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;", + StatusIs(_, _)); + EXPECT_EQ("select f1 as a from T having a > 5 having a > 5; # foo", formatted_sql); // Empty statement. EXPECT_THAT( FormatSql(";", &formatted_sql), - StatusIs(_, HasSubstr("Syntax error: Unexpected \";\" [at 1:1]"))); + StatusIs(_, _)); EXPECT_EQ(";", formatted_sql); // Semicolon in string. @@ -123,7 +115,7 @@ TEST(SqlFormatterTest, ValidMultipleStatements) { "SELECT\n" " a\n" "FROM\n" - " t1;", + " t1;\n", formatted_sql); ZETASQL_ASSERT_OK(FormatSql("select 1;\n" @@ -131,7 +123,7 @@ TEST(SqlFormatterTest, ValidMultipleStatements) { EXPECT_EQ("SELECT\n" " 1;\n" "SELECT\n" - " 2;", + " 2;\n", formatted_sql); } @@ -147,30 +139,16 @@ TEST(SqlFormatterTest, InvalidMultipleStatements) { " drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n" " select sum(f1) as a from T having a > 5 having a > 5;select 1", &formatted_sql), - StatusIs( - _, - HasSubstr( - "foo is not a supported object type [at 1:7]\n" - " drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n" - " ^\n" - "Syntax error: Expected end of input but got keyword HAVING [at " - "2:42]\n" - " select sum(f1) as a from T having a > 5 having a > 5;select 1\n" - " ^"))); - EXPECT_EQ("drop foo.bar;\n" - "DEFINE TABLE t1(a = 1, b = \"a\", c = 1.4, d = true);\n" - "select sum(f1) as a from T having a > 5 having a > 5;\n" - "SELECT\n" - " 1;", + StatusIs(_, _)); + EXPECT_EQ(" drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n" + " select sum(f1) as a from T having a > 5 having a > 5;select 1", formatted_sql); // The second statement is an invalid empty statement. EXPECT_THAT( FormatSql("select 1; ;", &formatted_sql), - StatusIs(_, HasSubstr("Syntax error: Unexpected \";\" [at 1:12]"))); - EXPECT_EQ("SELECT\n" - " 1;\n" - ";", + StatusIs(_, _)); + EXPECT_EQ("select 1; ;", formatted_sql); // The second statement contains invalid input character '$', which makes @@ -184,5 +162,45 @@ TEST(SqlFormatterTest, InvalidMultipleStatements) { EXPECT_EQ("select 1; select $d ;", formatted_sql); } +TEST(SqlFormatterTest, Script) { + std::string formatted_sql; + ZETASQL_ASSERT_OK(FormatSql("BEGIN\nEND\n", &formatted_sql)); + EXPECT_EQ("BEGIN\n" + "END;\n", + formatted_sql); +} + +TEST(SqlFormatterTest, Pivot) { + std::string formatted_sql; + ZETASQL_ASSERT_OK(FormatSql("SELECT *\nFROM a\nPIVOT(AVG(b) FOR c IN ('d', 'e'))\n", &formatted_sql)); + EXPECT_EQ("SELECT\n *\nFROM\n a PIVOT(AVG(b) FOR c IN ('d', 'e'));\n", + formatted_sql); +} + +TEST(SqlFormatterTest, Comment) { + std::string formatted_sql; + ZETASQL_ASSERT_OK(FormatSql("SELECT * -- comment\nFROM a /* comment */\nPIVOT(AVG(b) FOR c IN ('d', 'e'))\n", &formatted_sql)); + EXPECT_EQ("SELECT\n * -- comment\nFROM\n a /* comment */\n PIVOT(AVG(b) FOR c IN ('d', 'e'));\n", + formatted_sql); +} + +TEST(SqlFormatterTest, SeparatorAndGroupBy) { + std::string query_string( + "SELECT\n" + " *\n" + "FROM\n" + " foo.bar_tab\n" + "WHERE\n" + " col1 = 'abc'\n" + " AND col2 > 10\n" + " AND col3 IS NOT NULL\n" + "GROUP BY\n" + " 0, x, y, z;\n"); + std::string formatted_sql; + ZETASQL_ASSERT_OK(FormatSql(query_string, &formatted_sql)); + EXPECT_EQ(query_string, + formatted_sql); +} + } // namespace } // namespace zetasql diff --git a/zetasql/tools/zetasql-formatter/BUILD b/zetasql/tools/zetasql-formatter/BUILD new file mode 100644 index 000000000..e5ca0f042 --- /dev/null +++ b/zetasql/tools/zetasql-formatter/BUILD @@ -0,0 +1,15 @@ +package( + default_visibility = ["//zetasql/base:zetasql_implementation"], +) + +cc_binary( + name = "format", + srcs = ["format.cc"], + deps = [ + "//zetasql/public:sql_formatter", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + ], +) diff --git a/zetasql/tools/zetasql-formatter/format.cc b/zetasql/tools/zetasql-formatter/format.cc new file mode 100644 index 000000000..b4ce904e1 --- /dev/null +++ b/zetasql/tools/zetasql-formatter/format.cc @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#include "zetasql/base/logging.h" +#include "zetasql/base/status.h" +#include "zetasql/public/sql_formatter.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/strip.h" +#include "absl/strings/str_join.h" + +int format(const std::filesystem::path& file_path) { + std::string formatted; + if (file_path.extension() == ".bq" || file_path.extension() == ".sql") { + std::cout << "formatting " << file_path << "..." << std::endl; + std::ifstream file(file_path, std::ios::in); + std::string sql(std::istreambuf_iterator(file), {}); + const absl::Status status = zetasql::FormatSql(sql, &formatted); + if (status.ok()) { + std::ofstream out(file_path); + out << formatted; + if (formatted != sql) { + std::cout << "successfully formatted " << file_path << "!" << std::endl; + return 1; + } + } else { + std::cout << "ERROR: " << status << std::endl; + return 1; + } + std::cout << file_path << " is already formatted!" << std::endl; + } + return 0; +} + +// format formats all sql files in specified directory and returns code 0 +// if all files are formatted and 1 if error occurs or any file is formatted. +int main(int argc, char* argv[]) { + const char kUsage[] = + "Usage: format \n"; + std::vector args = absl::ParseCommandLine(argc, argv); + if (argc <= 1) { + ZETASQL_LOG(QFATAL) << kUsage; + } + std::vector remaining_args(args.begin() + 1, args.end()); + + int rc = 0; + for (const auto& path : remaining_args) { + if (std::filesystem::is_regular_file(path)) { + std::filesystem::path file_path(path); + return format(file_path); + } + std::filesystem::recursive_directory_iterator file_path(path, + std::filesystem::directory_options::skip_permission_denied) + , end; + std::error_code err; + for (; file_path != end; file_path.increment(err)) { + if (err) { + std::cout << "WARNING: " << err << std::endl; + } + rc |= format(file_path->path()); + } + } + return rc; +}