diff --git a/README.md b/README.md index d2b0516..45c2643 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # tap-clerk -`tap-clerk` is a Singer tap for Clerk. +`tap-clerk` is a Singer tap for Clerk. +Initial insipiration from [tap-clerk](https://github.com/KeragonHQ/tap-clerk) by KeragonHQ Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps. diff --git a/meltano.yml b/meltano.yml index 0e23696..427a4aa 100644 --- a/meltano.yml +++ b/meltano.yml @@ -1,13 +1,13 @@ version: 1 send_anonymous_usage_stats: true -project_id: "tap-clerk" +project_id: tap-clerk default_environment: test environments: - name: test plugins: extractors: - - name: "tap-clerk" - namespace: "tap_clerk" + - name: tap-clerk + namespace: tap_clerk pip_url: -e . capabilities: - state @@ -18,30 +18,21 @@ plugins: # TODO: Declare settings and their types here: settings: - - name: username - label: Username - description: The username to use for authentication - - - name: password - kind: password - label: Password - description: The password to use for authentication + - name: auth_token + kind: string + description: Auth Token Needed sensitive: true - - - name: start_date - kind: date_iso8601 - label: Start Date - description: Initial date to start extracting data from - - # TODO: Declare required settings here: - settings_group_validation: - - [username, password] - - # TODO: Declare default configuration values here: - config: - start_date: '2010-01-01T00:00:00Z' - + select: + - waitlist.* loaders: - name: target-jsonl variant: andyh1203 pip_url: target-jsonl + - name: target-postgres + variant: meltanolabs + pip_url: meltanolabs-target-postgres + config: + host: localhost + port: 5432 + user: postgres + database: postgres diff --git a/plugins/loaders/target-postgres--meltanolabs.lock b/plugins/loaders/target-postgres--meltanolabs.lock new file mode 100644 index 0000000..fc96654 --- /dev/null +++ b/plugins/loaders/target-postgres--meltanolabs.lock @@ -0,0 +1,288 @@ +{ + "plugin_type": "loaders", + "name": "target-postgres", + "namespace": "target_postgres", + "variant": "meltanolabs", + "label": "Postgres", + "docs": "https://hub.meltano.com/loaders/target-postgres--meltanolabs", + "repo": "https://github.com/MeltanoLabs/target-postgres", + "pip_url": "meltanolabs-target-postgres", + "executable": "target-postgres", + "description": "PostgreSQL database loader", + "logo_url": "https://hub.meltano.com/assets/logos/loaders/postgres.png", + "capabilities": [ + "about", + "activate-version", + "hard-delete", + "schema-flattening", + "stream-maps" + ], + "settings_group_validation": [ + [] + ], + "settings": [ + { + "name": "activate_version", + "kind": "boolean", + "value": true, + "label": "Activate Version", + "description": "If set to false, the tap will ignore activate version messages. If set to true, add_record_metadata must be set to true as well." + }, + { + "name": "add_record_metadata", + "kind": "boolean", + "value": true, + "label": "Add Record Metadata", + "description": "Note that this must be enabled for activate_version to work!This adds _sdc_extracted_at, _sdc_batched_at, and more to every table. See https://sdk.meltano.com/en/latest/implementation/record_metadata.html for more information." + }, + { + "name": "batch_size_rows", + "kind": "integer", + "label": "Batch Size Rows", + "description": "Maximum number of rows in each batch." + }, + { + "name": "database", + "kind": "string", + "label": "Database", + "description": "Database name." + }, + { + "name": "default_target_schema", + "kind": "string", + "value": "$MELTANO_EXTRACT__LOAD_SCHEMA", + "label": "Default Target Schema", + "description": "Postgres schema to send data to, example: tap-clickup" + }, + { + "name": "dialect+driver", + "kind": "string", + "value": "postgresql+psycopg", + "label": "Dialect+Driver", + "description": "DEPRECATED. Dialect+driver see https://docs.sqlalchemy.org/en/20/core/engines.html. Generally just leave this alone." + }, + { + "name": "faker_config.locale", + "kind": "array", + "label": "Faker Locale", + "description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization" + }, + { + "name": "faker_config.seed", + "kind": "string", + "label": "Faker Seed", + "description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator" + }, + { + "name": "flattening_enabled", + "kind": "boolean", + "label": "Enable Schema Flattening", + "description": "'True' to enable schema flattening and automatically expand nested properties." + }, + { + "name": "flattening_max_depth", + "kind": "integer", + "label": "Max Flattening Depth", + "description": "The max depth to flatten schemas." + }, + { + "name": "hard_delete", + "kind": "boolean", + "value": false, + "label": "Hard Delete", + "description": "When activate version is sent from a tap this specefies if we should delete the records that don't match, or mark them with a date in the `_sdc_deleted_at` column. This config option is ignored if `activate_version` is set to false." + }, + { + "name": "host", + "kind": "string", + "label": "Host", + "description": "Hostname for postgres instance." + }, + { + "name": "interpret_content_encoding", + "kind": "boolean", + "value": false, + "label": "Interpret Content Encoding", + "description": "If set to true, the target will interpret the content encoding of the schema to determine how to store the data. Using this option may result in a more efficient storage of the data but may also result in an error if the data is not encoded as expected." + }, + { + "name": "load_method", + "kind": "options", + "value": "append-only", + "label": "Load Method", + "description": "The method to use when loading data into the destination. `append-only` will always write all input records whether that records already exists or not. `upsert` will update existing records and insert new records. `overwrite` will delete all existing records and insert all input records.", + "options": [ + { + "label": "Append Only", + "value": "append-only" + }, + { + "label": "Upsert", + "value": "upsert" + }, + { + "label": "Overwrite", + "value": "overwrite" + } + ] + }, + { + "name": "password", + "kind": "string", + "label": "Password", + "description": "Password used to authenticate.", + "sensitive": true + }, + { + "name": "port", + "kind": "integer", + "value": 5432, + "label": "Port", + "description": "The port on which postgres is awaiting connections." + }, + { + "name": "process_activate_version_messages", + "kind": "boolean", + "value": true, + "label": "Process `ACTIVATE_VERSION` messages", + "description": "Whether to process `ACTIVATE_VERSION` messages." + }, + { + "name": "sanitize_null_text_characters", + "kind": "boolean", + "value": false, + "label": "Sanitize Null Text Characters", + "description": "If set to true, the target will sanitize null characters in char/text/varchar fields, as they are not supported by Postgres. See [postgres documentation](https://www.postgresql.org/docs/current/functions-string.html) for more information about chr(0) not being supported." + }, + { + "name": "sqlalchemy_url", + "kind": "string", + "label": "SQLAlchemy URL", + "description": "DEPRECATED. SQLAlchemy connection string. This will override using host, user, password, port, dialect, and all ssl settings. Note that you must escape password special characters properly. See https://docs.sqlalchemy.org/en/20/core/engines.html#escaping-special-characters-such-as-signs-in-passwords" + }, + { + "name": "ssh_tunnel.enable", + "kind": "boolean", + "value": false, + "label": "SSH Tunnel Enable", + "description": "Enable an ssh tunnel (also known as bastion host), see the other ssh_tunnel.* properties for more details" + }, + { + "name": "ssh_tunnel.host", + "kind": "string", + "label": "SSH Tunnel Host", + "description": "Host of the bastion host, this is the host we'll connect to via ssh" + }, + { + "name": "ssh_tunnel.port", + "kind": "integer", + "value": 22, + "label": "SSH Tunnel Port", + "description": "Port to connect to bastion host" + }, + { + "name": "ssh_tunnel.private_key", + "kind": "string", + "label": "SSH Tunnel Private Key", + "description": "Private Key for authentication to the bastion host", + "sensitive": true + }, + { + "name": "ssh_tunnel.private_key_password", + "kind": "string", + "label": "SSH Tunnel Private Key Password", + "description": "Private Key Password, leave None if no password is set", + "sensitive": true + }, + { + "name": "ssh_tunnel.username", + "kind": "string", + "label": "SSH Tunnel Username", + "description": "Username to connect to bastion host" + }, + { + "name": "ssl_certificate_authority", + "kind": "string", + "value": "~/.postgresql/root.crl", + "label": "SSL Certificate Authority", + "description": "The certificate authority that should be used to verify the server's identity. Can be provided either as the certificate itself (in .env) or as a filepath to the certificate." + }, + { + "name": "ssl_client_certificate", + "kind": "string", + "value": "~/.postgresql/postgresql.crt", + "label": "SSL Client Certificate", + "description": "The certificate that should be used to verify your identity to the server. Can be provided either as the certificate itself (in .env) or as a filepath to the certificate." + }, + { + "name": "ssl_client_certificate_enable", + "kind": "boolean", + "value": false, + "label": "SSL Client Certificate Enable", + "description": "Whether or not to provide client-side certificates as a method of authentication to the server. Use ssl_client_certificate and ssl_client_private_key for further customization. To use SSL to verify the server's identity, use ssl_enable instead." + }, + { + "name": "ssl_client_private_key", + "kind": "string", + "value": "~/.postgresql/postgresql.key", + "label": "SSL Client Private Key", + "description": "The private key for the certificate you provided. Can be provided either as the certificate itself (in .env) or as a filepath to the certificate.", + "sensitive": true + }, + { + "name": "ssl_enable", + "kind": "boolean", + "value": false, + "label": "SSL Enable", + "description": "Whether or not to use ssl to verify the server's identity. Use ssl_certificate_authority and ssl_mode for further customization. To use a client certificate to authenticate yourself to the server, use ssl_client_certificate_enable instead." + }, + { + "name": "ssl_mode", + "kind": "string", + "value": "verify-full", + "label": "SSL Mode", + "description": "SSL Protection method, see [postgres documentation](https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION) for more information. Must be one of disable, allow, prefer, require, verify-ca, or verify-full." + }, + { + "name": "ssl_storage_directory", + "kind": "string", + "value": ".secrets", + "label": "SSL Storage Directory", + "description": "The folder in which to store SSL certificates provided as raw values. When a certificate/key is provided as a raw value instead of as a filepath, it must be written to a file before it can be used. This configuration option determines where that file is created." + }, + { + "name": "stream_map_config", + "kind": "object", + "label": "User Stream Map Configuration", + "description": "User-defined config values to be used within map expressions." + }, + { + "name": "stream_maps", + "kind": "object", + "label": "Stream Maps", + "description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)." + }, + { + "name": "use_copy", + "kind": "boolean", + "value": false, + "label": "Use COPY", + "description": "Use the COPY command to insert data. This is usually faster than INSERT statements. This option is only available for the postgresql+psycopg dialect+driver." + }, + { + "name": "user", + "kind": "string", + "label": "User", + "description": "User name used to authenticate." + }, + { + "name": "validate_records", + "kind": "boolean", + "value": true, + "label": "Validate Records", + "description": "Whether to validate the schema of the incoming streams." + } + ], + "dialect": "postgres", + "target_schema": "$TARGET_POSTGRES_SCHEMA" +} diff --git a/tap_clerk/client.py b/tap_clerk/client.py index 20b5749..61898c9 100644 --- a/tap_clerk/client.py +++ b/tap_clerk/client.py @@ -8,7 +8,7 @@ from singer_sdk.authenticators import BearerTokenAuthenticator from singer_sdk.helpers.jsonpath import extract_jsonpath -from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002 +from singer_sdk.pagination import BaseOffsetPaginator from singer_sdk.streams import RESTStream if t.TYPE_CHECKING: @@ -24,16 +24,16 @@ class ClerkStream(RESTStream): """Clerk stream class.""" # Update this value if necessary or override `parse_response`. - records_jsonpath = "$[*]" + records_jsonpath = "$.data[*]" # Update this value if necessary or override `get_new_paginator`. next_page_token_jsonpath = "$.next_page" # noqa: S105 + API_LIMIT_PAGE_SIZE = 500 @property def url_base(self) -> str: """Return the API URL root, configurable via tap settings.""" - # TODO: hardcode a value here, or retrieve it from self.config - return "https://api.mysample.com" + return "https://api.clerk.com/v1" @property def authenticator(self) -> BearerTokenAuthenticator: @@ -47,72 +47,17 @@ def authenticator(self) -> BearerTokenAuthenticator: token=self.config.get("auth_token", ""), ) - @property - def http_headers(self) -> dict: - """Return the http headers needed. - - Returns: - A dictionary of HTTP headers. - """ - # If not using an authenticator, you may also provide inline auth headers: - # headers["Private-Token"] = self.config.get("auth_token") # noqa: ERA001 - return {} - - def get_new_paginator(self) -> BaseAPIPaginator: - """Create a new pagination helper instance. - - If the source API can make use of the `next_page_token_jsonpath` - attribute, or it contains a `X-Next-Page` header in the response - then you can remove this method. - - If you need custom pagination that uses page numbers, "next" links, or - other approaches, please read the guide: https://sdk.meltano.com/en/v0.25.0/guides/pagination-classes.html. + def get_new_paginator(self) -> BaseOffsetPaginator: + return BaseOffsetPaginator(start_value=0, page_size=self.API_LIMIT_PAGE_SIZE) - Returns: - A pagination helper instance. - """ - return super().get_new_paginator() - - def get_url_params( - self, - context: Context | None, # noqa: ARG002 - next_page_token: t.Any | None, # noqa: ANN401 - ) -> dict[str, t.Any]: - """Return a dictionary of values to be used in URL parameterization. - - Args: - context: The stream context. - next_page_token: The next page index or value. - - Returns: - A dictionary of URL query parameters. - """ - params: dict = {} + def get_url_params(self, context: dict | None, next_page_token: Any | None) -> dict[str, Any]: + params: dict = {"limit": self.API_LIMIT_PAGE_SIZE} if next_page_token: - params["page"] = next_page_token + params["offset"] = next_page_token if self.replication_key: - params["sort"] = "asc" - params["order_by"] = self.replication_key + params["order_by"] = f'-{self.replication_key}' return params - def prepare_request_payload( - self, - context: Context | None, # noqa: ARG002 - next_page_token: t.Any | None, # noqa: ARG002, ANN401 - ) -> dict | None: - """Prepare the data payload for the REST API request. - - By default, no payload will be sent (return None). - - Args: - context: The stream context. - next_page_token: The next page index or value. - - Returns: - A dictionary with the JSON body for a POST requests. - """ - # TODO: Delete this method if no payload is required. (Most REST APIs.) - return None def parse_response(self, response: requests.Response) -> t.Iterable[dict]: """Parse the response and return an iterator of result records. @@ -123,25 +68,7 @@ def parse_response(self, response: requests.Response) -> t.Iterable[dict]: Yields: Each record from the source. """ - # TODO: Parse response body and return a set of records. yield from extract_jsonpath( self.records_jsonpath, input=response.json(parse_float=decimal.Decimal), ) - - def post_process( - self, - row: dict, - context: Context | None = None, # noqa: ARG002 - ) -> dict | None: - """As needed, append or transform raw data to match expected structure. - - Args: - row: An individual record from the stream. - context: The stream context. - - Returns: - The updated record dictionary, or ``None`` to skip the record. - """ - # TODO: Delete this method if not needed. - return row diff --git a/tap_clerk/streams.py b/tap_clerk/streams.py index a3b5e20..78d9df2 100644 --- a/tap_clerk/streams.py +++ b/tap_clerk/streams.py @@ -1,66 +1,272 @@ -"""Stream type classes for tap-clerk.""" - from __future__ import annotations +import sys import typing as t -from importlib import resources -from singer_sdk import typing as th # JSON Schema typing helpers +import requests +from singer_sdk import typing as th +from singer_sdk.helpers.jsonpath import extract_jsonpath from tap_clerk.client import ClerkStream -# TODO: Delete this is if not using json files for schema definition -SCHEMAS_DIR = resources.files(__package__) / "schemas" -# TODO: - Override `UsersStream` and `GroupsStream` with your own stream definition. -# - Copy-paste as many times as needed to create multiple stream types. +if sys.version_info >= (3, 9): + import importlib.resources as importlib_resources +else: + import importlib_resources -class UsersStream(ClerkStream): - """Define custom stream.""" +class OrganizationsStream(ClerkStream): + """Organizations stream class.""" + name = "organizations" + path = "/organizations" + primary_keys: t.ClassVar[list[str]] = ["id"] + replication_key = None + schema = th.PropertiesList( + th.Property("object", th.StringType), + th.Property("id", th.StringType), + th.Property("name", th.StringType), + th.Property("slug", th.StringType), + th.Property("members_count", th.IntegerType), + th.Property("max_allowed_memberships", th.IntegerType), + th.Property("admin_delete_enabled", th.BooleanType), + th.Property("public_metadata", th.ObjectType(additional_properties=True)), + th.Property("private_metadata", th.ObjectType(additional_properties=True)), + th.Property("created_by", th.StringType), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType), + ).to_dict() + + def get_child_context(self, record: dict, context: t.Optional[dict]) -> dict: + return { "organization_id": record["id"] } +class OrganizationMembershipStream(ClerkStream): + """OrganizationMembership stream class.""" + name = "organization_membership" + parent_stream_type = OrganizationsStream + ignore_parent_replication_keys = True + path = "/organizations/{organization_id}/memberships" + primary_keys = ["id"] + replication_key = None + schema = th.PropertiesList( + th.Property("id", th.StringType, description="The unique identifier for the membership"), + th.Property("object", th.StringType, description="Type of the object, should be 'organization_membership'"), + th.Property("role", th.StringType, description="Role within the organization"), + th.Property("permissions", th.ArrayType(th.StringType), description="List of permissions"), + th.Property("public_metadata", th.ObjectType(additional_properties=True), description="Public metadata"), + th.Property("private_metadata", th.ObjectType(additional_properties=True), description="Private metadata"), + th.Property("organization", th.ObjectType( + th.Property("object", th.StringType), + th.Property("id", th.StringType), + th.Property("name", th.StringType), + th.Property("slug", th.StringType), + th.Property("members_count", th.IntegerType), + th.Property("max_allowed_memberships", th.IntegerType), + th.Property("admin_delete_enabled", th.BooleanType), + th.Property("public_metadata", th.ObjectType(additional_properties=True)), + th.Property("private_metadata", th.ObjectType(additional_properties=True)), + th.Property("created_by", th.StringType), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType) + ), description="Embedded organization object"), + th.Property("public_user_data", th.ObjectType( + th.Property("user_id", th.StringType), + th.Property("first_name", th.StringType), + th.Property("last_name", th.StringType), + th.Property("profile_image_url", th.StringType), + th.Property("image_url", th.StringType), + th.Property("has_image", th.BooleanType), + th.Property("identifier", th.StringType) + ), description="Public data of the user associated with the membership"), + th.Property("created_at", th.IntegerType, description="Timestamp of when the membership was created"), + th.Property("updated_at", th.IntegerType, description="Timestamp of when the membership was last updated") + ).to_dict() + +class OrganizationInvitationsStream(ClerkStream): + """Organization invitations stream class.""" + name = "organization_invitations" + path = "/organization_invitations" + primary_keys = ["id"] + replication_key = None + schema = th.PropertiesList( + th.Property("id", th.StringType, description="The unique identifier for the invitation"), + th.Property("object", th.StringType, description="Type of the object, should be 'organization_invitation'"), + th.Property("email_address", th.StringType, description="Email address of the invited user"), + th.Property("role", th.StringType, description="Role assigned to the invited user"), + th.Property("role_name", th.StringType, description="Name of the role assigned"), + th.Property("organization_id", th.StringType, description="ID of the organization"), + th.Property("status", th.StringType, description="Status of the invitation (pending, accepted, revoked)"), + th.Property("public_metadata", th.ObjectType(additional_properties=True), description="Public metadata"), + th.Property("private_metadata", th.ObjectType(additional_properties=True), description="Private metadata"), + th.Property("created_at", th.IntegerType, description="Timestamp of when the invitation was created"), + th.Property("updated_at", th.IntegerType, description="Timestamp of when the invitation was last updated") + ).to_dict() + +class UsersStream(ClerkStream): + """Users stream class.""" name = "users" path = "/users" primary_keys: t.ClassVar[list[str]] = ["id"] replication_key = None - # Optionally, you may also use `schema_filepath` in place of `schema`: - # schema_filepath = SCHEMAS_DIR / "users.json" # noqa: ERA001 schema = th.PropertiesList( - th.Property("name", th.StringType), - th.Property( - "id", - th.StringType, - description="The user's system ID", - ), - th.Property( - "age", - th.IntegerType, - description="The user's age in years", - ), - th.Property( - "email", - th.StringType, - description="The user's email address", - ), - th.Property("street", th.StringType), - th.Property("city", th.StringType), - th.Property( - "state", - th.StringType, - description="State name in ISO 3166-2 format", - ), - th.Property("zip", th.StringType), + th.Property("id", th.StringType, description="The unique identifier for a user"), + th.Property("object", th.StringType), + th.Property("external_id", th.StringType), + th.Property("primary_email_address_id", th.StringType), + th.Property("primary_phone_number_id", th.StringType), + th.Property("primary_web3_wallet_id", th.StringType), + th.Property("username", th.StringType), + th.Property("first_name", th.StringType), + th.Property("last_name", th.StringType), + th.Property("profile_image_url", th.StringType), + th.Property("image_url", th.StringType), + th.Property("has_image", th.BooleanType), + th.Property("public_metadata", th.ObjectType(additional_properties=True)), + th.Property("private_metadata", th.ObjectType(additional_properties=True)), + th.Property("unsafe_metadata", th.ObjectType(additional_properties=True)), + th.Property("email_addresses", th.ArrayType(th.ObjectType( + th.Property("id", th.StringType), + th.Property("object", th.StringType), + th.Property("email_address", th.StringType), + th.Property("reserved", th.BooleanType), + th.Property("verification", th.ObjectType( + th.Property("status", th.StringType), + th.Property("strategy", th.StringType), + th.Property("attempts", th.IntegerType), + th.Property("expire_at", th.IntegerType) + )), + th.Property("linked_to", th.ArrayType(th.ObjectType( + th.Property("type", th.StringType), + th.Property("id", th.StringType) + ))), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType) + ))), + th.Property("phone_numbers", th.ArrayType(th.ObjectType( + th.Property("id", th.StringType), + th.Property("object", th.StringType), + th.Property("phone_number", th.StringType), + th.Property("reserved_for_second_factor", th.BooleanType), + th.Property("default_second_factor", th.BooleanType), + th.Property("reserved", th.BooleanType), + th.Property("verification", th.ObjectType( + th.Property("status", th.StringType), + th.Property("strategy", th.StringType), + th.Property("attempts", th.IntegerType), + th.Property("expire_at", th.IntegerType) + )), + th.Property("linked_to", th.ArrayType(th.ObjectType( + th.Property("type", th.StringType), + th.Property("id", th.StringType) + ))), + th.Property("backup_codes", th.ArrayType(th.StringType)), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType) + ))), + th.Property("web3_wallets", th.ArrayType(th.ObjectType( + th.Property("id", th.StringType), + th.Property("object", th.StringType), + th.Property("web3_wallet", th.StringType), + th.Property("verification", th.ObjectType( + th.Property("status", th.StringType), + th.Property("strategy", th.StringType), + th.Property("nonce", th.StringType), + th.Property("attempts", th.IntegerType), + th.Property("expire_at", th.IntegerType) + )), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType) + ))), + th.Property("passkeys", th.ArrayType(th.ObjectType( + th.Property("id", th.StringType), + th.Property("object", th.StringType), + th.Property("name", th.StringType), + th.Property("last_used_at", th.IntegerType), + th.Property("verification", th.ObjectType( + th.Property("status", th.StringType), + th.Property("strategy", th.StringType), + th.Property("nonce", th.StringType), + th.Property("attempts", th.IntegerType), + th.Property("expire_at", th.IntegerType) + )) + ))), + th.Property("password_enabled", th.BooleanType), + th.Property("two_factor_enabled", th.BooleanType), + th.Property("totp_enabled", th.BooleanType), + th.Property("backup_code_enabled", th.BooleanType), + th.Property("mfa_enabled_at", th.IntegerType), + th.Property("mfa_disabled_at", th.IntegerType), + th.Property("external_accounts", th.ArrayType(th.ObjectType(additional_properties=True))), + th.Property("saml_accounts", th.ArrayType(th.ObjectType( + th.Property("id", th.StringType), + th.Property("object", th.StringType), + th.Property("provider", th.StringType), + th.Property("active", th.BooleanType), + th.Property("email_address", th.StringType), + th.Property("first_name", th.StringType), + th.Property("last_name", th.StringType), + th.Property("provider_user_id", th.StringType), + th.Property("public_metadata", th.ObjectType(additional_properties=True)), + th.Property("verification", th.ObjectType( + th.Property("status", th.StringType), + th.Property("strategy", th.StringType), + th.Property("external_verification_redirect_url", th.StringType), + th.Property("error", th.ObjectType( + th.Property("message", th.StringType), + th.Property("long_message", th.StringType), + th.Property("code", th.StringType), + th.Property("meta", th.ObjectType(additional_properties=True)), + th.Property("clerk_trace_id", th.StringType) + )), + th.Property("expire_at", th.IntegerType), + th.Property("attempts", th.IntegerType) + )) + ))), + th.Property("last_sign_in_at", th.IntegerType), + th.Property("banned", th.BooleanType), + th.Property("locked", th.BooleanType), + th.Property("lockout_expires_in_seconds", th.IntegerType), + th.Property("verification_attempts_remaining", th.IntegerType), + th.Property("updated_at", th.IntegerType), + th.Property("created_at", th.IntegerType), + th.Property("delete_self_enabled", th.BooleanType), + th.Property("create_organization_enabled", th.BooleanType), + th.Property("create_organizations_limit", th.IntegerType), + th.Property("last_active_at", th.IntegerType), + th.Property("legal_accepted_at", th.IntegerType) ).to_dict() + def get_url_params(self, context: dict | None, next_page_token: t.Any | None) -> dict[str, t.Any]: + params: dict = {"limit": self.API_LIMIT_PAGE_SIZE} + if next_page_token: + params["offset"] = next_page_token + if self.replication_key: + params["order_by"] = f'+{self.replication_key}' + self.logger.info(f"QUERY PARAMS: {params}") + return params -class GroupsStream(ClerkStream): - """Define custom stream.""" - - name = "groups" - path = "/groups" +class WaitlistEntriesStream(ClerkStream): + """Waitlist entries stream class.""" + name = "waitlist" + path = "/waitlist_entries" primary_keys: t.ClassVar[list[str]] = ["id"] - replication_key = "modified" + replication_key = None schema = th.PropertiesList( - th.Property("name", th.StringType), + th.Property("object", th.StringType), th.Property("id", th.StringType), - th.Property("modified", th.DateTimeType), - ).to_dict() + th.Property("email_address", th.StringType), + th.Property("status", th.StringType), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType), + th.Property("invitation", th.ObjectType( + th.Property("object", th.StringType), + th.Property("id", th.StringType), + th.Property("email_address", th.StringType), + th.Property("public_metadata", th.ObjectType(additional_properties=True)), + th.Property("revoked", th.BooleanType), + th.Property("status", th.StringType), + th.Property("url", th.StringType), + th.Property("expires_at", th.IntegerType), + th.Property("created_at", th.IntegerType), + th.Property("updated_at", th.IntegerType) + )) + ).to_dict() \ No newline at end of file diff --git a/tap_clerk/tap.py b/tap_clerk/tap.py index bbbe69e..42805ac 100644 --- a/tap_clerk/tap.py +++ b/tap_clerk/tap.py @@ -1,69 +1,31 @@ -"""Clerk tap class.""" - from __future__ import annotations from singer_sdk import Tap -from singer_sdk import typing as th # JSON schema typing helpers - -# TODO: Import your custom stream types here: from tap_clerk import streams - +from singer_sdk import typing as th class TapClerk(Tap): """Clerk tap class.""" - name = "tap-clerk" - # TODO: Update this section with the actual config values you expect: config_jsonschema = th.PropertiesList( th.Property( "auth_token", th.StringType, required=True, - secret=True, # Flag config as protected. - title="Auth Token", - description="The token to authenticate against the API service", - ), - th.Property( - "project_ids", - th.ArrayType(th.StringType), - required=True, - title="Project IDs", - description="Project IDs to replicate", - ), - th.Property( - "start_date", - th.DateTimeType, - description="The earliest record date to sync", - ), - th.Property( - "api_url", - th.StringType, - title="API URL", - default="https://api.mysample.com", - description="The url for the API service", - ), - th.Property( - "user_agent", - th.StringType, - description=( - "A custom User-Agent header to send with each request. Default is " - "'/'" - ), + secret=True, + description="The token to authenticate against the Clerk API service", ), ).to_dict() def discover_streams(self) -> list[streams.ClerkStream]: - """Return a list of discovered streams. - - Returns: - A list of discovered streams. - """ return [ - streams.GroupsStream(self), + streams.OrganizationsStream(self), + streams.OrganizationMembershipStream(self), streams.UsersStream(self), + streams.WaitlistEntriesStream(self), + streams.OrganizationInvitationsStream(self), ] - if __name__ == "__main__": TapClerk.cli()