diff --git a/alphasql/BUILD b/alphasql/BUILD index 9e3868f4..a09f22b8 100644 --- a/alphasql/BUILD +++ b/alphasql/BUILD @@ -78,9 +78,23 @@ cc_binary( cc_binary( name = "dag", - srcs = [ - "dag.cc", + srcs = ["dag.cc"], + deps = [ + "@com_google_zetasql//zetasql/analyzer", + "@com_google_zetasql//zetasql/resolved_ast", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + "@com_google_absl//absl/strings", + "@boost//:graph", + ":identifier_resolver", + ":dag_lib", ], +) + + +cc_library( + name = "dag_lib", + hdrs = ["dag_lib.h"], deps = [ "@com_google_zetasql//zetasql/analyzer", "@com_google_zetasql//zetasql/resolved_ast", @@ -93,10 +107,10 @@ cc_binary( ) cc_test( - name = "dag_test", - srcs = ["dag_test.cc"], + name = "dag_lib_test", + srcs = ["dag_lib_test.cc"], deps = [ - ":dag", + ":dag_lib", "@com_google_googletest//:gtest_main", "@boost//:graph", ], diff --git a/alphasql/dag.cc b/alphasql/dag.cc index 451c9d85..67cfb633 100644 --- a/alphasql/dag.cc +++ b/alphasql/dag.cc @@ -15,128 +15,7 @@ // #include - -#include "zetasql/resolved_ast/resolved_ast.h" -#include "zetasql/base/logging.h" -#include "zetasql/base/status.h" -#include "zetasql/base/statusor.h" -#include "zetasql/public/analyzer.h" -#include "alphasql/identifier_resolver.h" -#include "absl/flags/flag.h" -#include "absl/flags/parse.h" -#include "boost/graph/graphviz.hpp" -#include "boost/graph/depth_first_search.hpp" -#include "absl/strings/str_join.h" - -typedef std::pair Edge; - -ABSL_FLAG(std::string, output_path, "", - "Output path for DAG."); -ABSL_FLAG(std::string, external_required_tables_output_path, "", - "Output path for external required tables."); - -struct table_queries { - std::vector create; - std::vector others; -}; - -namespace alphasql { - - using namespace zetasql; - - // Returns if it already has all arenas initialized, or otherwise - // populates as a copy for , creates arenas in and - // returns it. This avoids unnecessary duplication of AnalyzerOptions, which - // might be expensive. - const AnalyzerOptions& GetOptionsWithArenas( - const AnalyzerOptions* options, std::unique_ptr* copy) { - if (options->AllArenasAreInitialized()) { - return *options; - } - *copy = absl::make_unique(*options); - (*copy)->CreateDefaultArenasIfNotSet(); - return **copy; - } - - zetasql_base::StatusOr> ExtractTableNamesFromSQL(const std::string& sql_file_path, - TableNamesSet* table_names) { - LanguageOptions language_options; - language_options.EnableMaximumLanguageFeaturesForDevelopment(); - language_options.SetEnabledLanguageFeatures({FEATURE_V_1_3_ALLOW_DASHES_IN_TABLE_NAME}); - language_options.SetSupportsAllStatementKinds(); - AnalyzerOptions options(language_options); - options.mutable_language()->EnableMaximumLanguageFeaturesForDevelopment(); - options.CreateDefaultArenasIfNotSet(); - - return identifier_resolver::GetNodeKindToTableNamesMap( - sql_file_path, options, table_names); - } - - absl::Status UpdateTableQueriesMapAndVertices(const std::filesystem::path& file_path, - std::map& table_queries_map, - std::set& vertices) { - if (file_path.extension() != ".bq" && file_path.extension() != ".sql") { - // std::cout << "not a sql file " << file_path << "!" << std::endl; - // Skip if not SQL. - return absl::OkStatus(); - } - std::cout << "Reading " << file_path << std::endl; - - TableNamesSet table_names; - auto node_kind_to_table_names_or_status = ExtractTableNamesFromSQL(file_path.string(), &table_names); - if (!node_kind_to_table_names_or_status.ok()) { - return node_kind_to_table_names_or_status.status(); - } - std::map node_kind_to_table_names = node_kind_to_table_names_or_status.value(); - - // Resolve file dependency from DML on DDL. - for (auto const& table_name : node_kind_to_table_names[RESOLVED_CREATE_TABLE_STMT]) { - const std::string table_string = absl::StrJoin(table_name, "."); - table_queries_map[table_string].create.push_back(file_path); - } - for (auto const& table_name : node_kind_to_table_names[RESOLVED_CREATE_TABLE_AS_SELECT_STMT]) { - const std::string table_string = absl::StrJoin(table_name, "."); - table_queries_map[table_string].create.push_back(file_path); - } - - for (auto const& table_name : table_names) { - const std::string table_string = absl::StrJoin(table_name, "."); - table_queries_map[table_string].others.push_back(file_path); - } - - vertices.insert(file_path); - - return absl::OkStatus(); - } - - void UpdateEdges(std::vector& depends_on, - std::vector dependents, std::vector> parents) { - if (!dependents.size()) return; - for (const auto& parent : parents) { - if (!parent.size()) continue; - for (const std::string& p : parent) { - for (const std::string& dep : dependents) { - if (dep != p) { - depends_on.push_back(std::make_pair(dep, p)); - } - } - } - return; - } - } -} - -struct cycle_detector : public boost::dfs_visitor<> { - cycle_detector( bool& has_cycle) - : _has_cycle(has_cycle) { } - - template - void back_edge(Edge, Graph&) { - _has_cycle = true; - } -protected: - bool& _has_cycle; -}; +#include "alphasql/dag_lib.h" int main(int argc, char* argv[]) { const char kUsage[] = diff --git a/alphasql/dag_lib.h b/alphasql/dag_lib.h new file mode 100644 index 00000000..e1c65bf7 --- /dev/null +++ b/alphasql/dag_lib.h @@ -0,0 +1,139 @@ +// +// Copyright 2020 Matts966 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "zetasql/resolved_ast/resolved_ast.h" +#include "zetasql/base/logging.h" +#include "zetasql/base/status.h" +#include "zetasql/base/statusor.h" +#include "zetasql/public/analyzer.h" +#include "alphasql/identifier_resolver.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "boost/graph/graphviz.hpp" +#include "boost/graph/depth_first_search.hpp" +#include "absl/strings/str_join.h" + +typedef std::pair Edge; + +ABSL_FLAG(std::string, output_path, "", + "Output path for DAG."); +ABSL_FLAG(std::string, external_required_tables_output_path, "", + "Output path for external required tables."); + +struct table_queries { + std::vector create; + std::vector others; +}; + +namespace alphasql { + + using namespace zetasql; + + // Returns if it already has all arenas initialized, or otherwise + // populates as a copy for , creates arenas in and + // returns it. This avoids unnecessary duplication of AnalyzerOptions, which + // might be expensive. + const AnalyzerOptions& GetOptionsWithArenas( + const AnalyzerOptions* options, std::unique_ptr* copy) { + if (options->AllArenasAreInitialized()) { + return *options; + } + *copy = absl::make_unique(*options); + (*copy)->CreateDefaultArenasIfNotSet(); + return **copy; + } + + zetasql_base::StatusOr> ExtractTableNamesFromSQL(const std::string& sql_file_path, + TableNamesSet* table_names) { + LanguageOptions language_options; + language_options.EnableMaximumLanguageFeaturesForDevelopment(); + language_options.SetEnabledLanguageFeatures({FEATURE_V_1_3_ALLOW_DASHES_IN_TABLE_NAME}); + language_options.SetSupportsAllStatementKinds(); + AnalyzerOptions options(language_options); + options.mutable_language()->EnableMaximumLanguageFeaturesForDevelopment(); + options.CreateDefaultArenasIfNotSet(); + + return identifier_resolver::GetNodeKindToTableNamesMap( + sql_file_path, options, table_names); + } + + absl::Status UpdateTableQueriesMapAndVertices(const std::filesystem::path& file_path, + std::map& table_queries_map, + std::set& vertices) { + if (file_path.extension() != ".bq" && file_path.extension() != ".sql") { + // std::cout << "not a sql file " << file_path << "!" << std::endl; + // Skip if not SQL. + return absl::OkStatus(); + } + std::cout << "Reading " << file_path << std::endl; + + TableNamesSet table_names; + auto node_kind_to_table_names_or_status = ExtractTableNamesFromSQL(file_path.string(), &table_names); + if (!node_kind_to_table_names_or_status.ok()) { + return node_kind_to_table_names_or_status.status(); + } + std::map node_kind_to_table_names = node_kind_to_table_names_or_status.value(); + + // Resolve file dependency from DML on DDL. + for (auto const& table_name : node_kind_to_table_names[RESOLVED_CREATE_TABLE_STMT]) { + const std::string table_string = absl::StrJoin(table_name, "."); + table_queries_map[table_string].create.push_back(file_path); + } + for (auto const& table_name : node_kind_to_table_names[RESOLVED_CREATE_TABLE_AS_SELECT_STMT]) { + const std::string table_string = absl::StrJoin(table_name, "."); + table_queries_map[table_string].create.push_back(file_path); + } + + for (auto const& table_name : table_names) { + const std::string table_string = absl::StrJoin(table_name, "."); + table_queries_map[table_string].others.push_back(file_path); + } + + vertices.insert(file_path); + + return absl::OkStatus(); + } + + void UpdateEdges(std::vector& depends_on, + std::vector dependents, std::vector> parents) { + if (!dependents.size()) return; + for (const auto& parent : parents) { + if (!parent.size()) continue; + for (const std::string& p : parent) { + for (const std::string& dep : dependents) { + if (dep != p) { + depends_on.push_back(std::make_pair(dep, p)); + } + } + } + return; + } + } +} + +struct cycle_detector : public boost::dfs_visitor<> { + cycle_detector( bool& has_cycle) + : _has_cycle(has_cycle) { } + + template + void back_edge(Edge, Graph&) { + _has_cycle = true; + } +protected: + bool& _has_cycle; +}; diff --git a/alphasql/dag_test.cc b/alphasql/dag_lib_test.cc similarity index 98% rename from alphasql/dag_test.cc rename to alphasql/dag_lib_test.cc index fc6c8889..bd8506a7 100644 --- a/alphasql/dag_test.cc +++ b/alphasql/dag_lib_test.cc @@ -14,7 +14,7 @@ // limitations under the License. // -#include "alphasql/dag.cc" +#include "alphasql/dag_lib.h" #include "boost/graph/depth_first_search.hpp" #include "gtest/gtest.h"