dbt-labs · karunpoudel · Mar 13, 2022 · Mar 16, 2022 · Mar 20, 2022 · Mar 25, 2022
@@ -0,0 +1,8 @@
+kind: Features
+body: Add `--selected-schema-cache` flag to cache schema object of selected models
+  only.
+time: 2022-03-16T00:38:47.8468296-05:00
+custom:
+  Author: karunpoudel
+  Issue: "4688"
+  PR: "4860"
@@ -337,11 +337,12 @@ def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap:
         # databases
         return info_schema_name_map
 
-    def _relations_cache_for_schemas(self, manifest: Manifest) -> None:
+    def _relations_cache_for_schemas(self, manifest: Manifest, cache_schemas: Set[BaseRelation] = None) -> None:
 def _relations_cache_for_schemas(self, manifest): 
     super()._relations_cache_for_schemas(manifest) 
     self._link_cached_relations(manifest) 
 def _relations_cache_for_schemas(self, manifest): 
     super()._relations_cache_for_schemas(manifest) 
     self._link_cached_relations(manifest) 
         """Populate the relations cache for the given schemas. Returns an
         iterable of the schemas populated, as strings.
         """
-        cache_schemas = self._get_cache_schemas(manifest)
+        if not cache_schemas:
+            cache_schemas = self._get_cache_schemas(manifest)
         with executor(self.config) as tpe:
             futures: List[Future[List[BaseRelation]]] = []
             for cache_schema in cache_schemas:
@@ -367,14 +368,15 @@ def _relations_cache_for_schemas(self, manifest: Manifest) -> None:
             cache_update.add((relation.database, relation.schema))
         self.cache.update_schemas(cache_update)
 
-    def set_relations_cache(self, manifest: Manifest, clear: bool = False) -> None:
+    def set_relations_cache(self, manifest: Manifest, clear: bool = False,
+                            required_schemas: Set[BaseRelation] = None) -> None:
         """Run a query that gets a populated cache of the relations in the
         database and set the cache on this adapter.
         """
         with self.cache.lock:
             if clear:
                 self.cache.clear()
-            self._relations_cache_for_schemas(manifest)
+            self._relations_cache_for_schemas(manifest, required_schemas)
 
     @available
     def cache_added(self, relation: Optional[BaseRelation]) -> str:

@@ -253,6 +253,7 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
     use_experimental_parser: Optional[bool] = None
     static_parser: Optional[bool] = None
     indirect_selection: Optional[str] = None
+    selected_schema_cache: Optional[bool] = None
 
 
 @dataclass

@@ -35,6 +35,7 @@
 LOG_CACHE_EVENTS = None
 EVENT_BUFFER_SIZE = 100000
 QUIET = None
+SELECTED_SCHEMA_CACHE = None
 
 # Global CLI defaults. These flags are set from three places:
 # CLI args, environment variables, and user_config (profiles.yml).
@@ -57,6 +58,7 @@
     "LOG_CACHE_EVENTS": False,
     "EVENT_BUFFER_SIZE": 100000,
     "QUIET": False,
+    "SELECTED_SCHEMA_CACHE": False,
 }
 
 
@@ -106,7 +108,7 @@ def set_from_args(args, user_config):
     global STRICT_MODE, FULL_REFRESH, WARN_ERROR, USE_EXPERIMENTAL_PARSER, STATIC_PARSER
     global WRITE_JSON, PARTIAL_PARSE, USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT
     global INDIRECT_SELECTION, VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS
-    global PRINTER_WIDTH, WHICH, LOG_CACHE_EVENTS, EVENT_BUFFER_SIZE, QUIET
+    global PRINTER_WIDTH, WHICH, LOG_CACHE_EVENTS, EVENT_BUFFER_SIZE, QUIET, SELECTED_SCHEMA_CACHE
 
     STRICT_MODE = False  # backwards compatibility
     # cli args without user_config or env var option
@@ -132,6 +134,7 @@ def set_from_args(args, user_config):
     LOG_CACHE_EVENTS = get_flag_value("LOG_CACHE_EVENTS", args, user_config)
     EVENT_BUFFER_SIZE = get_flag_value("EVENT_BUFFER_SIZE", args, user_config)
     QUIET = get_flag_value("QUIET", args, user_config)
+    SELECTED_SCHEMA_CACHE = get_flag_value('SELECTED_SCHEMA_CACHE', args, user_config)
 
 
 def get_flag_value(flag, args, user_config):

@@ -1084,6 +1084,27 @@ def parse_args(args, cls=DBTArgumentParser):
         """,
     )
 
+    schema_cache_flag = p.add_mutually_exclusive_group()
+    schema_cache_flag.add_argument(
+        '--selected-schema-cache',
+        action='store_const',
+        const=True,
+        default=None,
+        dest='selected_schema_cache',
+        help='''
+        Pre cache objects of schema relevant to selected resource only.
+        '''
+    )
+    schema_cache_flag.add_argument(
+        '--no-selected-schema-cache',
+        action='store_const',
+        const=False,
+        dest='selected_schema_cache',
+        help='''
+        Pre cache objects of all schema.
+        '''
+    )
+
     subs = p.add_subparsers(title="Available sub-commands")
 
     base_subparser = _build_base_subparser()

@@ -436,8 +436,9 @@ def defer_to_manifest(self, adapter, selected_uids: AbstractSet[str]):
 
     def before_run(self, adapter, selected_uids: AbstractSet[str]):
         with adapter.connection_named("master"):
-            self.create_schemas(adapter, selected_uids)
-            self.populate_adapter_cache(adapter)
+            required_schemas = self.get_model_schemas(adapter, selected_uids)
+            self.create_schemas(adapter, required_schemas)
+            self.populate_adapter_cache(adapter, required_schemas)
             self.defer_to_manifest(adapter, selected_uids)
             self.safe_run_hooks(adapter, RunHookType.Start, {})
 

@@ -390,8 +390,11 @@ def _mark_dependent_errors(self, node_id, result, cause):
         for dep_node_id in self.graph.get_dependent_nodes(node_id):
             self._skipped_children[dep_node_id] = cause
 
-    def populate_adapter_cache(self, adapter):
-        adapter.set_relations_cache(self.manifest)
+    def populate_adapter_cache(self, adapter, required_schemas: Set[BaseRelation] = None):
+        if flags.SELECTED_SCHEMA_CACHE is True:
+            adapter.set_relations_cache(self.manifest, required_schemas=required_schemas)
+        else:
+            adapter.set_relations_cache(self.manifest)
 
     def before_hooks(self, adapter):
         pass
@@ -489,8 +492,7 @@ def get_model_schemas(self, adapter, selected_uids: Iterable[str]) -> Set[BaseRe
 
         return result
 
-    def create_schemas(self, adapter, selected_uids: Iterable[str]):
-        required_schemas = self.get_model_schemas(adapter, selected_uids)
+    def create_schemas(self, adapter, required_schemas: Set[BaseRelation]):
         # we want the string form of the information schema database
         required_databases: Set[BaseRelation] = set()
         for required in required_schemas: