diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 000000000..c21860272
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,66 @@
+name: Build
+on: push
+env:
+ cache-version: v1.0.2
+jobs:
+ linux:
+ name: Test the repository on Linux
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout the repository
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ github.ref }}
+ - name: Cache
+ uses: pat-s/always-upload-cache@v2.1.5
+ with:
+ path: ~/.cache/bazel
+ key: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-${{ hashFiles('./**') }}
+ restore-keys: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-
+ - name: Setup
+ run: |
+ sudo apt-get update
+ sudo apt-get install --no-install-recommends -y gcc-9 g++-9
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 900 \
+ --slave /usr/bin/g++ g++ /usr/bin/g++-9
+ bazelisk test --test_output=errors //zetasql/public:sql_formatter_test
+ bazelisk build //zetasql/tools/zetasql-formatter:format
+ sudo cp bazel-bin/zetasql/tools/zetasql-formatter/format zetasql-formatter
+ zip zetasql-formatter_linux_x86_64.zip zetasql-formatter
+ ./zetasql-formatter .
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ with:
+ name: Debug
+ tag_name: debug
+ files: zetasql-formatter_linux_x86_64.zip
+ prerelease: true
+ macos:
+ name: Test the repository
+ runs-on: macos-10.15
+ steps:
+ - name: Checkout the repository
+ uses: actions/checkout@v2
+ with:
+ ref: ${{ github.ref }}
+ - name: Cache
+ uses: pat-s/always-upload-cache@v2.1.5
+ with:
+ path: ~/.cache/bazel
+ key: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-${{ hashFiles('./**') }}
+ restore-keys: ${{ env.cache-version }}-${{ runner.os }}-bazelisk-build-
+ - name: Test the repository
+ run: |
+ export TEST_TMPDIR=~/.cache/bazel
+ CC=g++ bazelisk test --test_output=errors //zetasql/public:sql_formatter_test
+ CC=g++ bazelisk build //zetasql/tools/zetasql-formatter:format
+ sudo cp bazel-bin/zetasql/tools/zetasql-formatter/format zetasql-formatter
+ zip zetasql-formatter_darwin_amd64.zip zetasql-formatter
+ ./zetasql-formatter .
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ with:
+ name: Debug
+ tag_name: debug
+ files: zetasql-formatter_darwin_amd64.zip
+ prerelease: true
diff --git a/README.md b/README.md
index 89315a68a..bb205cb34 100644
--- a/README.md
+++ b/README.md
@@ -1,89 +1,37 @@
-## ZetaSQL - Analyzer Framework for SQL
+## ZetaSQL Formatter
-ZetaSQL defines a language (grammar, types, data model, and semantics) as well
-as a parser and analyzer. It is not itself a database or query engine. Instead
-it is intended to be used by multiple engines wanting to provide consistent
-behavior for all semantic analysis, name resolution, type checking, implicit
-casting, etc. Specific query engines may not implement all features in the
-ZetaSQL language and may give errors if specific features are not supported. For
-example, engine A may not support any updates and engine B may not support
-analytic functions.
+[![release](https://github.com/Matts966/zetasql-formatter/workflows/release/badge.svg?event=create)](https://github.com/Matts966/zetasql-formatter/actions?query=event%3Acreate+workflow%3Arelease+)
+[![test](https://github.com/Matts966/zetasql-formatter/workflows/test/badge.svg?branch=formatter)](https://github.com/Matts966/zetasql-formatter/actions?query=branch%main+workflow%3Atest+)
-[ZetaSQL Language Guide](docs/README.md)
+
+
+
-[ZetaSQL ResolvedAST API](docs/resolved_ast.md)
+This repository is forked from [google/zetasql](https://github.com/google/zetasql) and provides SQL formatter with preserved comments. This formatter can be applied to mainly BigQuery and SpanSQL.
-## Status of Project and Roadmap
+## Quick Start
-This codebase is being open sourced in multiple phases:
+```bash
+# To install for MacOSX
+wget https://github.com/Matts966/zetasql-formatter/releases/latest/download/zetasql-formatter_darwin_amd64.zip \
+ && sudo unzip zetasql-formatter_darwin_amd64.zip -d /usr/local/bin
+```
-1. Parser and Analyzer **Complete**
- - Initial release includes only a subset of tests
-2. Reference Implementation **In Progress**
- - Base capability **Complete**
- - Function library **In Progress**
-3. Compliance Tests **Complete**
- - includes framework for validating compliance of arbitrary engines
-4. Misc tooling
- - Improved Formatter **In Progress**
+```bash
+# To install for Linux
+wget https://github.com/Matts966/zetasql-formatter/releases/latest/download/zetasql-formatter_linux_x86_64.zip \
+ && sudo unzip zetasql-formatter_linux_x86_64.zip -d /usr/local/bin
+```
-Multiplatform support is planned for the following platforms:
-
- - Linux (Ubuntu 1804 _with gcc8_ is our reference platform, but others may work).
- - MacOS (Experimental)
- - Windows (version TDB)
-
-Until all this code is released, we cannot provide any guarantees of API
-stability and cannot accept contributions. We will also be releasing more
-documentation over time, particular related to developing engines with this
-framework. Documentation on the [language](docs/) itself is fairly
-complete.
-
-
-## Flags
-ZetaSQL uses the Abseil [Flags](https://abseil.io/blog/20190509-flags) library
-to handle commandline flags. Unless otherwise documented, all flags are for
-debugging purposes only and may change, stop working or be removed at any time.
-
-
-## How to Build
-
-ZetaSQL uses [bazel](https://bazel.build) for building and dependency
-resolution. After installing bazel (we maintain support for 1.0,
-but other versions may work), simply run:
-
-```bazel build ...```
-
-## How to add as a Dependency in bazel
-See the (WORKSPACE) file, as it is a little unusual.
-
-### With docker
- TODO: Add docker build script.
-
-## Example Usage
-A very basic command line tool is available to run simple queries with the
-reference implementation:
-```bazel run //zetasql/tools/execute_query:execute_query -- "select 1 + 1;"```
-
-The reference implementation is not yet completely released and currently
-supports only a subset of functions and types.
-
-## Differential Privacy
-For questions, documentation and examples of ZetaSQLs implementation of
-Differential Privacy, please check out
-(https://github.com/google/differential-privacy).
-
-## Versions
-
-ZetaSQL makes no guarantees regarding compatibility between releases.
-Breaking changes may be made at any time. Our releases are numbered based
-on the date of the commit the release is cut from. The number format is
-YYYY.MM.n, where YYYY is the year, MM is the two digit month, and n is a
-sequence number within the time period.
+```bash
+# To apply formatter
+zetasql-formatter [paths]
+```
## License
[Apache License 2.0](LICENSE)
-## Support Disclaimer
-This is not an officially supported Google product.
+## Sponsors
+
+The development of this formatter is sponsored by the Japan Data Science Consortium.
diff --git a/zetasql/parser/gen_extra_files.py b/zetasql/parser/gen_extra_files.py
index b7fcec76a..f4d53f39b 100644
--- a/zetasql/parser/gen_extra_files.py
+++ b/zetasql/parser/gen_extra_files.py
@@ -71,6 +71,8 @@ class ParseTreeVisitor {
public:
virtual ~ParseTreeVisitor() {}
virtual void visit(const ASTNode *node, void* data) = 0;
+ virtual void visitStart(const ASTNode *node, void* data) {};
+ virtual void visitEnd(const ASTNode *node, void* data) {};
''')
for cls in concrete_classes:
yield (' virtual void visit{0}(const {0}* node, void* data) = 0;\n\n'
@@ -154,7 +156,9 @@ def GeneerateParseTreeAcceptMethods(
for cls in concrete_classes:
yield textwrap.dedent('''\
void {0}::Accept(ParseTreeVisitor* visitor, void* data) const {{
+ visitor->visitStart(this, data);
visitor->visit{0}(this, data);
+ visitor->visitEnd(this, data);
}}
''').format(cls)
diff --git a/zetasql/parser/parser.h b/zetasql/parser/parser.h
index f2f0acc05..cda2cb7f2 100644
--- a/zetasql/parser/parser.h
+++ b/zetasql/parser/parser.h
@@ -22,6 +22,7 @@
#include
#include
#include
+#include
#include "zetasql/base/arena.h"
#include "zetasql/parser/ast_node_kind.h"
@@ -258,6 +259,8 @@ absl::Status ParseExpression(const ParseResumeLocation& resume_location,
// Unparse a given AST back to a canonical SQL string and return it.
// Works for any AST node.
std::string Unparse(const ASTNode* root);
+std::string UnparseWithComments(const ASTNode* root, std::deque>& parse_tokens);
// Parse the first few keywords from (ignoring whitespace, comments and
// hints) to determine what kind of statement it is (if it is valid).
diff --git a/zetasql/parser/unparser.cc b/zetasql/parser/unparser.cc
index fed394ad1..0cf477edc 100644
--- a/zetasql/parser/unparser.cc
+++ b/zetasql/parser/unparser.cc
@@ -32,6 +32,7 @@
#include "absl/flags/flag.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
+#include "absl/strings/strip.h"
#include "absl/strings/string_view.h"
#include "zetasql/base/map_util.h"
@@ -47,6 +48,24 @@ std::string Unparse(const ASTNode* node) {
return unparsed_;
}
+std::string UnparseWithComments(const ASTNode* node, std::deque>& parse_tokens) {
+ std::string unparsed_;
+ parser::Unparser unparser(&unparsed_);
+ // Print comments by visitors and pop.
+ node->Accept(&unparser, &parse_tokens);
+ // Emit left comments in parse_tokens.
+ bool comment_emitted = false;
+ for (const auto& parse_token : parse_tokens) {
+ unparser.print(parse_token.first);
+ comment_emitted = true;
+ }
+ if (!comment_emitted) {
+ unparser.FlushLine();
+ }
+ return unparsed_;
+}
+
namespace parser {
// Formatter ---------------------------------------------------------
@@ -172,6 +191,53 @@ void Formatter::FlushLine() {
buffer_.clear();
}
+void Formatter::EndStatement() {
+ if (last_token_is_comment) {
+ FormatLine("");
+ FormatLine(";");
+ } else {
+ // The result from Unparse always ends with '\n'. Strips whitespaces so ';'
+ // can follow the statement immediately rather than starting a new line.
+ absl::StrAppend(unparsed_, buffer_);
+ buffer_.clear();
+ absl::StripAsciiWhitespace(unparsed_);
+ FormatLine(";");
+ }
+}
+
+// FlushCommentsPassedBy prints comments if they are before the given ParseLocationPoint
+// and returns if comments are emitted.
+bool Formatter::FlushCommentsPassedBy(const ParseLocationPoint point, void* data) {
+ if (data == nullptr) return false;
+ auto parse_tokens = static_cast>*>(data);
+ // Always !nullptr.
+ /* if (parse_tokens == nullptr) return false; */
+ last_token_is_comment = false;
+ const int size = parse_tokens->size();
+ for (int i = 0; i < size; i++) {
+ if (parse_tokens->front().second > point) {
+ break;
+ }
+ absl::string_view comment_string_view(parse_tokens->front().first);
+ absl::ConsumeSuffix(&comment_string_view, "\r\n");
+ absl::ConsumeSuffix(&comment_string_view, "\r");
+ absl::ConsumeSuffix(&comment_string_view, "\n");
+ std::string comment_string = std::string(comment_string_view);
+ parse_tokens->pop_front();
+
+ // println if multi-line comments
+ if (!last_token_is_comment && i + 1 < size) {
+ if (parse_tokens->front().second < point) {
+ FlushLine();
+ }
+ }
+
+ FormatLine(comment_string);
+ last_token_is_comment = true;
+ }
+ return last_token_is_comment;
+}
+
// Unparser -------------------------------------------------------------------
// Helper functions.
@@ -215,9 +281,10 @@ void Unparser::UnparseLeafNode(const ASTLeaf* leaf_node) {
void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data,
const std::string& separator,
- bool break_line) {
+ bool break_line,
+ bool separator_first) {
UnparseChildrenWithSeparator(node, data, 0, node->num_children(), separator,
- break_line);
+ break_line, separator_first);
}
// Unparse children of from indices in the range [, )
@@ -225,11 +292,17 @@ void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data,
void Unparser::UnparseChildrenWithSeparator(const ASTNode* node, void* data,
int begin, int end,
const std::string& separator,
- bool break_line) {
+ bool break_line,
+ bool separator_first) {
for (int i = begin; i < end; i++) {
if (i > begin) {
if (break_line) {
- println(separator);
+ if (separator_first) {
+ println();
+ print(separator);
+ } else {
+ println(separator);
+ }
} else {
print(separator);
}
@@ -1547,7 +1620,7 @@ void Unparser::visitASTGroupBy(const ASTGroupBy* node, void* data) {
if (node->hint() != nullptr) {
node->hint()->Accept(this, data);
}
- print("BY");
+ println("BY");
{
Formatter::Indenter indenter(&formatter_);
UnparseVectorWithSeparator(node->grouping_items(), data, ",");
@@ -1623,8 +1696,18 @@ void Unparser::visitASTHavingModifier(const ASTHavingModifier* node,
void Unparser::visitASTClampedBetweenModifier(
const ASTClampedBetweenModifier* node, void* data) {
println();
- print("CLAMPED BETWEEN");
- UnparseChildrenWithSeparator(node, data, 0, node->num_children(), "AND");
+ {
+ Formatter::Indenter indenter(&formatter_);
+ println();
+ print("CLAMPED BETWEEN");
+ for (int i = 0; i < node->num_children(); i++) {
+ node->child(i)->Accept(this, data);
+ if (i < node->num_children() - 1) {
+ println();
+ print("AND");
+ }
+ }
+ }
}
void Unparser::UnparseASTTableDataSource(const ASTTableDataSource* node,
@@ -1833,7 +1916,11 @@ void Unparser::visitASTStar(const ASTStar* node, void* data) {
void Unparser::visitASTStarExceptList(const ASTStarExceptList* node,
void* data) {
- UnparseChildrenWithSeparator(node, data, ",");
+ println();
+ {
+ Formatter::Indenter indenter(&formatter_);
+ UnparseChildrenWithSeparator(node, data, ",", true /* break_line */);
+ }
}
void Unparser::visitASTStarReplaceItem(const ASTStarReplaceItem* node,
@@ -1843,14 +1930,27 @@ void Unparser::visitASTStarReplaceItem(const ASTStarReplaceItem* node,
void Unparser::visitASTStarModifiers(const ASTStarModifiers* node, void* data) {
if (node->except_list() != nullptr) {
- print("EXCEPT (");
- node->except_list()->Accept(this, data);
- print(")");
+ println();
+ {
+ Formatter::Indenter indenter(&formatter_);
+ println("EXCEPT (");
+ node->except_list()->Accept(this, data);
+ println();
+ print(")");
+ }
}
if (!node->replace_items().empty()) {
- print("REPLACE (");
- UnparseVectorWithSeparator(node->replace_items(), data, ",");
- print(")");
+ println();
+ {
+ Formatter::Indenter indenter(&formatter_);
+ println("REPLACE (");
+ {
+ Formatter::Indenter indenter(&formatter_);
+ UnparseVectorWithSeparator(node->replace_items(), data, ",");
+ }
+ println();
+ print(")");
+ }
}
}
@@ -1927,13 +2027,13 @@ void Unparser::visitASTDotStarWithModifiers(
void Unparser::visitASTOrExpr(const ASTOrExpr* node, void* data) {
PrintOpenParenIfNeeded(node);
- UnparseChildrenWithSeparator(node, data, "OR");
+ UnparseChildrenWithSeparator(node, data, "OR", true, true);
PrintCloseParenIfNeeded(node);
}
void Unparser::visitASTAndExpr(const ASTAndExpr* node, void* data) {
PrintOpenParenIfNeeded(node);
- UnparseChildrenWithSeparator(node, data, "AND");
+ UnparseChildrenWithSeparator(node, data, "AND", true, true);
PrintCloseParenIfNeeded(node);
}
@@ -2131,9 +2231,19 @@ void Unparser::visitASTBetweenExpression(const ASTBetweenExpression* node,
void* data) {
PrintOpenParenIfNeeded(node);
node->child(0)->Accept(this, data);
- print(absl::StrCat(node->is_not() ? "NOT " : "", "BETWEEN"));
- UnparseChildrenWithSeparator(node, data, 1, node->num_children(), "AND");
- PrintCloseParenIfNeeded(node);
+ {
+ Formatter::Indenter indenter(&formatter_);
+ println();
+ print(absl::StrCat(node->is_not() ? "NOT " : "", "BETWEEN"));
+ for (int i = 1; i < node->num_children(); i++) {
+ node->child(i)->Accept(this, data);
+ if (i < node->num_children() - 1) {
+ println();
+ print("AND");
+ }
+ }
+ PrintCloseParenIfNeeded(node);
+ }
}
void Unparser::visitASTFunctionCall(const ASTFunctionCall* node, void* data) {
@@ -2443,12 +2553,15 @@ void Unparser::visitASTWindowFrame(const ASTWindowFrame* node,
void* data) {
print(node->GetFrameUnitString());
if (nullptr != node->end_expr()) {
+ Formatter::Indenter indenter(&formatter_);
+ println();
print("BETWEEN");
- }
- node->start_expr()->Accept(this, data);
- if (nullptr != node->end_expr()) {
+ node->start_expr()->Accept(this, data);
+ println();
print("AND");
node->end_expr()->Accept(this, data);
+ } else {
+ node->start_expr()->Accept(this, data);
}
}
@@ -3330,7 +3443,7 @@ void Unparser::visitASTCreateIndexStatement(const ASTCreateIndexStatement* node,
void Unparser::visitASTStatementList(const ASTStatementList* node, void* data) {
for (const ASTStatement* statement : node->statement_list()) {
statement->Accept(this, data);
- println(";");
+ formatter_.EndStatement();
}
}
diff --git a/zetasql/parser/unparser.h b/zetasql/parser/unparser.h
index ff02bd8c4..764fdc17d 100644
--- a/zetasql/parser/unparser.h
+++ b/zetasql/parser/unparser.h
@@ -89,6 +89,10 @@ class Formatter {
// some content remains in buffer_.
void FlushLine();
+ bool last_token_is_comment = false;
+ void EndStatement();
+ bool FlushCommentsPassedBy(const ParseLocationPoint point, void* data);
+
private:
// Checks if last token in buffer_ is a separator, where it is appropriate to
// insert a line break or a space before open paren.
@@ -133,6 +137,14 @@ class Unparser : public ParseTreeVisitor {
visitASTChildren(node, data);
}
+ void visitStart(const ASTNode *node, void* data) override {
+ formatter_.FlushCommentsPassedBy(node->GetParseLocationRange().start(), data);
+ };
+
+ void visitEnd(const ASTNode *node, void* data) override {
+ formatter_.FlushCommentsPassedBy(node->GetParseLocationRange().end(), data);
+ };
+
// Shorthand for calling methods in formatter_.
void print(absl::string_view s) { formatter_.Format(s); }
@@ -688,11 +700,13 @@ class Unparser : public ParseTreeVisitor {
// Set break_line to true if you want to print each child on a separate line.
virtual void UnparseChildrenWithSeparator(const ASTNode* node, void* data,
const std::string& separator,
- bool break_line = false);
+ bool break_line = false,
+ bool separator_first = false);
virtual void UnparseChildrenWithSeparator(const ASTNode* node, void* data,
int begin, int end,
const std::string& separator,
- bool break_line = false);
+ bool break_line = false,
+ bool separator_first = false);
template
void UnparseVectorWithSeparator(
diff --git a/zetasql/public/parse_location.h b/zetasql/public/parse_location.h
index b19772b68..4ce214ded 100644
--- a/zetasql/public/parse_location.h
+++ b/zetasql/public/parse_location.h
@@ -104,6 +104,14 @@ class ParseLocationPoint {
return lhs.filename_ < rhs.filename_;
}
+ friend bool operator>(const ParseLocationPoint& lhs,
+ const ParseLocationPoint& rhs) {
+ if (lhs.filename_ == rhs.filename_) {
+ return lhs.byte_offset_ > rhs.byte_offset_;
+ }
+ return lhs.filename_ > rhs.filename_;
+ }
+
friend std::ostream& operator<<(std::ostream& os,
const ParseLocationPoint& point) {
return os << "ParseLocationPoint at offset " << point.GetByteOffset();
diff --git a/zetasql/public/sql_formatter.cc b/zetasql/public/sql_formatter.cc
index 7ee9e8ec3..58c4a0a02 100644
--- a/zetasql/public/sql_formatter.cc
+++ b/zetasql/public/sql_formatter.cc
@@ -17,8 +17,8 @@
#include "zetasql/public/sql_formatter.h"
#include
-#include
#include
+#include
#include "zetasql/base/logging.h"
#include "zetasql/parser/parse_tree.h"
@@ -44,80 +44,35 @@ absl::Status FormatSql(absl::string_view sql, std::string* formatted_sql) {
*formatted_sql = std::string(sql);
- std::vector formatted_statement;
+ ParseTokenOptions options;
+ options.include_comments = true;
+ LanguageOptions language_options;
+ language_options.EnableMaximumLanguageFeaturesForDevelopment();
+ options.language_options = language_options;
- ParseResumeLocation location = ParseResumeLocation::FromStringView(sql);
- bool at_end_of_input = false;
- absl::Status return_status = absl::OkStatus();
- while (!at_end_of_input) {
- std::unique_ptr parser_output;
- LanguageOptions language_options;
- language_options.EnableMaximumLanguageFeaturesForDevelopment();
- const absl::Status status =
- ParseNextStatement(&location, ParserOptions(language_options),
- &parser_output, &at_end_of_input);
-
- if (status.ok()) {
- formatted_statement.push_back(Unparse(parser_output->statement()));
- } else {
- const absl::Status out_status = MaybeUpdateErrorFromPayload(
- ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, sql, status);
- if (return_status.ok()) {
- return_status = out_status;
- } else {
- return_status = ::zetasql_base::StatusBuilder(return_status).SetAppend()
- << "\n"
- << FormatError(out_status);
- }
+ std::unique_ptr parser_output;
- // When statement is not parseable, we proceed to the next semicolon and
- // just emit the original string in between.
- std::vector parse_tokens;
- ParseTokenOptions options;
- options.language_options = language_options;
- options.stop_at_end_of_statement = true;
- const int statement_start = location.byte_position();
- const absl::Status token_status =
- GetParseTokens(options, &location, &parse_tokens);
- // If GetParseTokens fails, just returns the original sql since there's no
- // way to proceed forward.
- if (!token_status.ok()) {
- return MaybeUpdateErrorFromPayload(
- ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, sql,
- token_status);
- }
- // GetParseTokens() reads until either a semicolon or end of input.
- if (parse_tokens.back().IsEndOfInput()) {
- // When there's trailing whitespace or comment after the last
- // semicolon, parse_tokens will be one END_OF_INPUT token.
- // It should not be treated as a statement. If there's more than one
- // token, then we treat the remainder of the input as a statement.
- if (parse_tokens.size() != 1) {
- formatted_statement.push_back(
- std::string(sql.substr(statement_start)));
- }
- at_end_of_input = true;
- } else {
- // The last token parsed must be a semicolon. Do not include it, because
- // we will add one later.
- ZETASQL_RET_CHECK_EQ(parse_tokens.back().GetKeyword(), ";");
- const int statement_length =
- parse_tokens.back().GetLocationRange().start().GetByteOffset() -
- statement_start;
- formatted_statement.push_back(
- std::string(sql.substr(statement_start, statement_length)));
+ ZETASQL_RETURN_IF_ERROR(ParseScript(sql, ParserOptions(language_options),
+ ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET, &parser_output));
+ std::deque> comments;
+ std::vector parse_tokens;
+ ParseResumeLocation location = ParseResumeLocation::FromStringView(sql);
+ const absl::Status token_status =
+ GetParseTokens(options, &location, &parse_tokens);
+ if (token_status.ok()) {
+ for (const auto& parse_token : parse_tokens) {
+ if (parse_token.IsEndOfInput()) break;
+ if (parse_token.IsComment()) {
+ comments.push_back(std::make_pair(parse_token.GetSQL(), parse_token.GetLocationRange().start()));
}
}
+ *formatted_sql = UnparseWithComments(parser_output->script(), comments);
+ } else {
+ // If GetParseTokens fails, just ignores comments.
+ *formatted_sql = Unparse(parser_output->script());
}
- // The result from Unparse always ends with '\n'. Strips whitespaces so ';'
- // can follow the statement immediately rather than starting a new line.
- for (auto& e : formatted_statement) {
- absl::StripAsciiWhitespace(&e);
- }
-
- *formatted_sql = absl::StrCat(absl::StrJoin(formatted_statement, ";\n"), ";");
- return return_status;
+ return absl::OkStatus();
}
} // namespace zetasql
diff --git a/zetasql/public/sql_formatter_test.cc b/zetasql/public/sql_formatter_test.cc
index 5a7b77d73..4b99a0f66 100644
--- a/zetasql/public/sql_formatter_test.cc
+++ b/zetasql/public/sql_formatter_test.cc
@@ -34,19 +34,19 @@ TEST(SqlFormatterTest, ValidSingleStatement) {
// Without semicolon.
ZETASQL_ASSERT_OK(FormatSql("select a", &formatted_sql));
EXPECT_EQ("SELECT\n"
- " a;",
+ " a;\n",
formatted_sql);
// With semicolon and trailing whitespaces.
ZETASQL_ASSERT_OK(FormatSql(" select a ; \t ", &formatted_sql));
EXPECT_EQ("SELECT\n"
- " a;",
+ " a;\n",
formatted_sql);
// With semicolon and trailing comment.
ZETASQL_ASSERT_OK(FormatSql(" select a ; # foo", &formatted_sql));
EXPECT_EQ("SELECT\n"
- " a;",
+ " a;\n# foo\n",
formatted_sql);
}
@@ -60,7 +60,7 @@ TEST(SqlFormatterTest, InvalidSingleStatement) {
&formatted_sql),
StatusIs(_, HasSubstr("Syntax error: Expected end of input but "
"got keyword HAVING [at 1:36]")));
- EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;",
+ EXPECT_EQ("select f1 as a from T having a > 5 having a > 5",
formatted_sql);
// With semicolon as the last char.
@@ -76,29 +76,21 @@ TEST(SqlFormatterTest, InvalidSingleStatement) {
&formatted_sql),
StatusIs(_, HasSubstr("Syntax error: Expected end of input but "
"got keyword HAVING [at 1:36]")));
- EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;",
+ EXPECT_EQ("select f1 as a from T having a > 5 having a > 5; ",
formatted_sql);
// With semicolon and trailing comment.
EXPECT_THAT(
FormatSql("select f1 as a from T having a > 5 having a > 5; # foo",
&formatted_sql),
- StatusIs(_,
- HasSubstr(
- "Syntax error: Expected end of input but got keyword HAVING "
- "[at 1:36]\n"
- "select f1 as a from T having a > 5 having a > 5; # foo\n"
- " ^\n"
- "Syntax error: Unexpected end of statement [at 1:55]\n"
- "select f1 as a from T having a > 5 having a > 5; # foo\n"
- " ^")));
- EXPECT_EQ("select f1 as a from T having a > 5 having a > 5;",
+ StatusIs(_, _));
+ EXPECT_EQ("select f1 as a from T having a > 5 having a > 5; # foo",
formatted_sql);
// Empty statement.
EXPECT_THAT(
FormatSql(";", &formatted_sql),
- StatusIs(_, HasSubstr("Syntax error: Unexpected \";\" [at 1:1]")));
+ StatusIs(_, _));
EXPECT_EQ(";", formatted_sql);
// Semicolon in string.
@@ -123,7 +115,7 @@ TEST(SqlFormatterTest, ValidMultipleStatements) {
"SELECT\n"
" a\n"
"FROM\n"
- " t1;",
+ " t1;\n",
formatted_sql);
ZETASQL_ASSERT_OK(FormatSql("select 1;\n"
@@ -131,7 +123,7 @@ TEST(SqlFormatterTest, ValidMultipleStatements) {
EXPECT_EQ("SELECT\n"
" 1;\n"
"SELECT\n"
- " 2;",
+ " 2;\n",
formatted_sql);
}
@@ -147,30 +139,16 @@ TEST(SqlFormatterTest, InvalidMultipleStatements) {
" drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n"
" select sum(f1) as a from T having a > 5 having a > 5;select 1",
&formatted_sql),
- StatusIs(
- _,
- HasSubstr(
- "foo is not a supported object type [at 1:7]\n"
- " drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n"
- " ^\n"
- "Syntax error: Expected end of input but got keyword HAVING [at "
- "2:42]\n"
- " select sum(f1) as a from T having a > 5 having a > 5;select 1\n"
- " ^")));
- EXPECT_EQ("drop foo.bar;\n"
- "DEFINE TABLE t1(a = 1, b = \"a\", c = 1.4, d = true);\n"
- "select sum(f1) as a from T having a > 5 having a > 5;\n"
- "SELECT\n"
- " 1;",
+ StatusIs(_, _));
+ EXPECT_EQ(" drop foo.bar; define table t1 (a=1,b=\"a\",c=1.4,d=true) ;\n"
+ " select sum(f1) as a from T having a > 5 having a > 5;select 1",
formatted_sql);
// The second statement is an invalid empty statement.
EXPECT_THAT(
FormatSql("select 1; ;", &formatted_sql),
- StatusIs(_, HasSubstr("Syntax error: Unexpected \";\" [at 1:12]")));
- EXPECT_EQ("SELECT\n"
- " 1;\n"
- ";",
+ StatusIs(_, _));
+ EXPECT_EQ("select 1; ;",
formatted_sql);
// The second statement contains invalid input character '$', which makes
@@ -184,5 +162,45 @@ TEST(SqlFormatterTest, InvalidMultipleStatements) {
EXPECT_EQ("select 1; select $d ;", formatted_sql);
}
+TEST(SqlFormatterTest, Script) {
+ std::string formatted_sql;
+ ZETASQL_ASSERT_OK(FormatSql("BEGIN\nEND\n", &formatted_sql));
+ EXPECT_EQ("BEGIN\n"
+ "END;\n",
+ formatted_sql);
+}
+
+TEST(SqlFormatterTest, Pivot) {
+ std::string formatted_sql;
+ ZETASQL_ASSERT_OK(FormatSql("SELECT *\nFROM a\nPIVOT(AVG(b) FOR c IN ('d', 'e'))\n", &formatted_sql));
+ EXPECT_EQ("SELECT\n *\nFROM\n a PIVOT(AVG(b) FOR c IN ('d', 'e'));\n",
+ formatted_sql);
+}
+
+TEST(SqlFormatterTest, Comment) {
+ std::string formatted_sql;
+ ZETASQL_ASSERT_OK(FormatSql("SELECT * -- comment\nFROM a /* comment */\nPIVOT(AVG(b) FOR c IN ('d', 'e'))\n", &formatted_sql));
+ EXPECT_EQ("SELECT\n * -- comment\nFROM\n a /* comment */\n PIVOT(AVG(b) FOR c IN ('d', 'e'));\n",
+ formatted_sql);
+}
+
+TEST(SqlFormatterTest, SeparatorAndGroupBy) {
+ std::string query_string(
+ "SELECT\n"
+ " *\n"
+ "FROM\n"
+ " foo.bar_tab\n"
+ "WHERE\n"
+ " col1 = 'abc'\n"
+ " AND col2 > 10\n"
+ " AND col3 IS NOT NULL\n"
+ "GROUP BY\n"
+ " 0, x, y, z;\n");
+ std::string formatted_sql;
+ ZETASQL_ASSERT_OK(FormatSql(query_string, &formatted_sql));
+ EXPECT_EQ(query_string,
+ formatted_sql);
+}
+
} // namespace
} // namespace zetasql
diff --git a/zetasql/tools/zetasql-formatter/BUILD b/zetasql/tools/zetasql-formatter/BUILD
new file mode 100644
index 000000000..e5ca0f042
--- /dev/null
+++ b/zetasql/tools/zetasql-formatter/BUILD
@@ -0,0 +1,15 @@
+package(
+ default_visibility = ["//zetasql/base:zetasql_implementation"],
+)
+
+cc_binary(
+ name = "format",
+ srcs = ["format.cc"],
+ deps = [
+ "//zetasql/public:sql_formatter",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/flags:parse",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:optional",
+ ],
+)
diff --git a/zetasql/tools/zetasql-formatter/format.cc b/zetasql/tools/zetasql-formatter/format.cc
new file mode 100644
index 000000000..b4ce904e1
--- /dev/null
+++ b/zetasql/tools/zetasql-formatter/format.cc
@@ -0,0 +1,67 @@
+#include
+#include
+#include
+#include
+#include
+
+#include "zetasql/base/logging.h"
+#include "zetasql/base/status.h"
+#include "zetasql/public/sql_formatter.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "absl/strings/strip.h"
+#include "absl/strings/str_join.h"
+
+int format(const std::filesystem::path& file_path) {
+ std::string formatted;
+ if (file_path.extension() == ".bq" || file_path.extension() == ".sql") {
+ std::cout << "formatting " << file_path << "..." << std::endl;
+ std::ifstream file(file_path, std::ios::in);
+ std::string sql(std::istreambuf_iterator(file), {});
+ const absl::Status status = zetasql::FormatSql(sql, &formatted);
+ if (status.ok()) {
+ std::ofstream out(file_path);
+ out << formatted;
+ if (formatted != sql) {
+ std::cout << "successfully formatted " << file_path << "!" << std::endl;
+ return 1;
+ }
+ } else {
+ std::cout << "ERROR: " << status << std::endl;
+ return 1;
+ }
+ std::cout << file_path << " is already formatted!" << std::endl;
+ }
+ return 0;
+}
+
+// format formats all sql files in specified directory and returns code 0
+// if all files are formatted and 1 if error occurs or any file is formatted.
+int main(int argc, char* argv[]) {
+ const char kUsage[] =
+ "Usage: format \n";
+ std::vector args = absl::ParseCommandLine(argc, argv);
+ if (argc <= 1) {
+ ZETASQL_LOG(QFATAL) << kUsage;
+ }
+ std::vector remaining_args(args.begin() + 1, args.end());
+
+ int rc = 0;
+ for (const auto& path : remaining_args) {
+ if (std::filesystem::is_regular_file(path)) {
+ std::filesystem::path file_path(path);
+ return format(file_path);
+ }
+ std::filesystem::recursive_directory_iterator file_path(path,
+ std::filesystem::directory_options::skip_permission_denied)
+ , end;
+ std::error_code err;
+ for (; file_path != end; file_path.increment(err)) {
+ if (err) {
+ std::cout << "WARNING: " << err << std::endl;
+ }
+ rc |= format(file_path->path());
+ }
+ }
+ return rc;
+}