diff --git a/README.md b/README.md index 8260853..41a9d85 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,22 @@ # tap-pardot Singer tap for replicating Pardot data. +## Create a Config file +``` +{ + "client_id": "secret_client_id", + "client_secret": "secret_client_secret", + "refresh_token": "abc123", + "start_date": "2017-11-02T00:00:00Z", + "api_output_type": "bulk" +} +``` +The `client_id` and `client_secret` keys are your OAuth Salesforce App secrets. The `refresh_token` is a secret created during the OAuth flow. For more info on the Pardot OAuth flow, visit the Pardot [API documentation](https://developer.salesforce.com/docs/marketing/pardot/guide/authentication.html). + +The `start_date` is used by the tap as a bound on the query request, for more information about the format check [Singer best practices for dates](https://github.com/singer-io/getting-started/blob/master/docs/BEST_PRACTICES.md#dates). + +The `api_output_type` is used to define the output on the API call. The default is "bulk" (more information on the "bulk" output call on [Query the Pardot API](https://developer.salesforce.com/docs/marketing/pardot/guide/bulk-data-pull.html#query-the-pardot-api) and [Changing the api response format](https://developer.salesforce.com/docs/marketing/pardot/guide/version-3-4-overview.html#changing-the-api-response-format)). With the bulk API call, the call is optimized, on the other hand, it doesn't return additional data in the response (such as nested objects and custom fields). If additional data is needed, change this variable (to either "simple" or "full"), and add the additional data explicitly to the select statement. + --- Copyright © 2019 Stitch diff --git a/tap_pardot/client.py b/tap_pardot/client.py index 44acdaf..9f3a3ea 100644 --- a/tap_pardot/client.py +++ b/tap_pardot/client.py @@ -216,7 +216,7 @@ def _make_request(self, method, url, params=None): def describe(self, endpoint, **kwargs): url = (ENDPOINT_BASE + self.describe_url).format(endpoint, '{}') - params = {"format": "json", "output": "bulk", **kwargs} + params = {"format": "json", **kwargs} content = self._make_request("get", url, params) @@ -236,7 +236,7 @@ def _fetch(self, method, endpoint, format_params, **kwargs): base_formatting.extend(format_params) url = (ENDPOINT_BASE + self.get_url).format(*base_formatting) - params = {"format": "json", "output": "bulk", **kwargs} + params = {"format": "json", **kwargs} content = self._make_request(method, url, params) diff --git a/tap_pardot/streams.py b/tap_pardot/streams.py index 013dd5f..d5b48bb 100644 --- a/tap_pardot/streams.py +++ b/tap_pardot/streams.py @@ -30,6 +30,9 @@ def get_default_start(self): def get_params(self): return {} + def get_api_output_type(self): + return self.config.get("api_output_type", "bulk") + def get_bookmark(self): return ( singer.bookmarks.get_bookmark( @@ -116,6 +119,7 @@ def get_default_start(self): def get_params(self): return { "created_after": self.config["start_date"], + "output": self.get_api_output_type(), "id_greater_than": self.get_bookmark(), "sort_by": "id", "sort_order": "ascending", @@ -139,6 +143,7 @@ class UpdatedAtReplicationStream(Stream): def get_params(self): return { "updated_after": self.get_bookmark(), + "output": self.get_api_output_type(), "sort_by": "updated_at", "sort_order": "ascending", } @@ -209,6 +214,7 @@ def post_sync(self): def get_params(self): return { "created_after": self.config["start_date"], + "output": self.get_api_output_type(), "id_greater_than": self.get_bookmark("id"), "sort_by": "id", "sort_order": "ascending", @@ -265,6 +271,7 @@ def post_sync(self): def get_params(self): return { "id_greater_than": self.get_bookmark("id"), + "output": self.get_api_output_type(), "updated_after": self.get_bookmark("last_updated"), "sort_by": "id", "sort_order": "ascending", @@ -295,7 +302,10 @@ def post_sync(self): super(ChildStream, self).post_sync() def get_params(self): - return {"offset": self.get_bookmark("offset")} + return { + "offset": self.get_bookmark("offset"), + "output": self.get_api_output_type(), + } def get_records(self, parent_ids): params = {self.parent_id_param: parent_ids, **self.get_params()} @@ -459,6 +469,7 @@ def get_params(self): # filter by updated_after "updated_after": self.get_bookmark("updated_at") or self.config["start_date"], + "output": self.get_api_output_type(), "id_greater_than": self.get_bookmark("id") or 0, "sort_by": "id", "sort_order": "ascending",