From 849cbd91e5ed991c925d96a5172afd6e6f462661 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Mon, 28 Aug 2023 12:34:36 -0700 Subject: [PATCH 01/12] Add minimal CI with Maven and GHA --- .github/workflows/ci.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..498a32e01 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: ci + +on: + push: + branches: + - main + +defaults: + run: + shell: bash --noprofile --norc -euo pipefail {0} + +env: + # Use Maven wrapper from repo with Maven version and other configs + MAVEN: ./mvnw -B + +jobs: + build: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Checkout source + uses: actions/checkout@v3 + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'temurin' + - name: Build with Maven + run: $MAVEN clean verify From d7422a076b618805ac17e3df36a98c9965d0c511 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Wed, 23 Aug 2023 12:06:12 -0700 Subject: [PATCH 02/12] Set version to 3-SNAPSHOT --- baseapp/pom.xml | 3 +-- gateway-ha/pom.xml | 2 +- pom.xml | 2 +- proxyserver/pom.xml | 4 +--- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/baseapp/pom.xml b/baseapp/pom.xml index 52598f5b4..25ba07bae 100644 --- a/baseapp/pom.xml +++ b/baseapp/pom.xml @@ -7,7 +7,7 @@ io.trino.gateway trinogateway-parent - 1.9.5 + 3-SNAPSHOT ../ @@ -89,4 +89,3 @@ - diff --git a/gateway-ha/pom.xml b/gateway-ha/pom.xml index f541b281e..41308b3bb 100644 --- a/gateway-ha/pom.xml +++ b/gateway-ha/pom.xml @@ -8,7 +8,7 @@ io.trino.gateway trinogateway-parent - 1.9.5 + 3-SNAPSHOT ../ diff --git a/pom.xml b/pom.xml index 65f0e8260..0a9cc41ab 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ trinogateway-parent trinogateway-parent pom - 1.9.5 + 3-SNAPSHOT 17 diff --git a/proxyserver/pom.xml b/proxyserver/pom.xml index 7180338f4..66d5ef0c9 100644 --- a/proxyserver/pom.xml +++ b/proxyserver/pom.xml @@ -8,7 +8,7 @@ io.trino.gateway trinogateway-parent - 1.9.5 + 3-SNAPSHOT ../ @@ -20,7 +20,6 @@ UTF-8 - org.eclipse.jetty @@ -82,4 +81,3 @@ - From 0434fefb139d9bfd1460dda4af8d1ae229e39549 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Tue, 29 Aug 2023 14:01:11 -0700 Subject: [PATCH 03/12] Improve documentation setup - Break up readme content - Move content around to multiple pages - No content modifications - Format paragraphs in source to 80 column width - Add simplistic navigation --- README.md | 675 +----------------------------------- docs/design.md | 39 ++- docs/development.md | 102 ++++++ docs/gateway-api.md | 103 ++++++ docs/operation.md | 52 +++ docs/references.md | 26 ++ docs/resource-groups-api.md | 199 +++++++++++ docs/routing-rules.md | 270 +++++++++++++++ docs/security.md | 99 ++++++ 9 files changed, 896 insertions(+), 669 deletions(-) create mode 100644 docs/development.md create mode 100644 docs/gateway-api.md create mode 100644 docs/operation.md create mode 100644 docs/references.md create mode 100644 docs/resource-groups-api.md create mode 100644 docs/routing-rules.md create mode 100644 docs/security.md diff --git a/README.md b/README.md index a15ce7fd9..0f9244478 100644 --- a/README.md +++ b/README.md @@ -1,664 +1,15 @@ # trino-gateway -A load balancer / proxy / gateway for trino compute engine. - -## How to setup a dev environment - -Step 1: setup mysql. Install docker with docker-compose and run the below command when setting up first time: - -#### Run the services - mysqldb, two instances of trino - -- This setup helps you develop and test any routing rules for the trino -- Both trino services would have a single `system` catalog -- Add the catalog properties files in ` bin/localdev/coordinator/` for additional catalogs - -``` -cd localdev -docker-compose up -d -``` - -#### Check the "Status' of the services by - -`docker-compose ps` - -#### Create the schema for the backends, once mysqldb becomes healthy - -`docker-compose exec mysqldb sh -c "mysql -uroot -proot123 -hmysqldb -Dtrinogateway < /etc/mysql/gateway-ha-persistence.sql"` - -#### Add the backends for mysqldb - -`docker-compose exec mysqldb sh -c "mysql -uroot -proot123 -hmysqldb -Dtrinogateway < /etc/mysql/add_backends.sql"` - -#### Create the schema for the backends, once postgres becomes healthy - -`docker-compose exec postgres sh -c 'PGPASSWORD="P0stG&es" psql -h localhost -p 5432 -U trino_gateway_db_admin -d trino_gateway_db -f /etc/postgresql/gateway-ha-persistence-postgres.sql'` - -#### Add the backends for postgres - -`docker-compose exec postgres sh -c 'PGPASSWORD="P0stG&es" psql -h localhost -p 5432 -U trino_gateway_db_admin -d trino_gateway_db -f /etc/postgresql/add_backends_postgres.sql'` - -It would add 2 trino backend records which can be used for the development and testing - -### Build and run - -Please note these steps have been verified with JDK 8 and 11. Higher versions of Java might run into unexpected issues. - -run `mvn clean install` to build `trino-gateway` - -Edit the [config file](/gateway-ha/gateway-ha-config.yml) and update the mysql db information. - -``` -cd gateway-ha/target/ -java -jar gateway-ha-{{VERSION}}-jar-with-dependencies.jar server ../gateway-ha-config.yml -``` - -If you encounter a `Failed to connect to JDBC URL` error, this may be due to newer versions of java disabling certain algorithms -when using SSL/TLS, in particular `TLSv1` and `TLSv1.1`. This will cause `Bad handshake` errors when connecting to the MySQL server. -To enable `TLSv1` and `TLSv1.1` open the following file in any editor (`sudo` access needed): - -``` -/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/security/java.security -``` - -Search for `jdk.tls.disabledAlgorithms`, it should look something like this: - -``` -jdk.tls.disabledAlgorithms=SSLv3, TLSv1, TLSv1.1, RC4, DES, MD5withRSA, \ - DH keySize < 1024, EC keySize < 224, 3DES_EDE_CBC, anon, NULL, \ - include jdk.disabled.namedCurves -``` - -Remove `TLSv1, TLSv1.1` and redo the above steps to build and run `trino-gateway`. - -If you see test failures while building `trino-gateway` or in an IDE, please run `mvn process-classes` to instrument javalite models -which are used by the tests . Ref [javalite-examples](https://github.com/javalite/javalite-examples/tree/master/simple-example#instrumentation) for more details. - -## Gateway API - -### Add or update a backend - -```$xslt -curl -X POST http://localhost:8080/entity?entityType=GATEWAY_BACKEND \ - -d '{ "name": "trino-3", \ - "proxyTo": "http://localhost:8083",\ - "active": true, \ - "routingGroup": "adhoc" \ - }' -``` - -If the backend URL is different from the `proxyTo` URL (for example if they are internal vs. external hostnames). You can use the optional `externalUrl` field to override the link in the Active Backends page. - -```$xslt -curl -X POST http://localhost:8080/entity?entityType=GATEWAY_BACKEND \ - -d '{ "name": "trino-3", \ - "proxyTo": "http://localhost:8083",\ - "active": true, \ - "routingGroup": "adhoc" \ - "externalUrl": "http://localhost:8084",\ - }' -``` - -### Get all backends behind the gateway - -```$xslt -curl -X GET http://localhost:8080/entity/GATEWAY_BACKEND -[ - { - "name": "trino-1", - "proxyTo": "http://localhost:8081", - "active": true, - "routingGroup": "adhoc", - "externalUrl": "http://localhost:8081" - }, - { - "name": "trino-2", - "proxyTo": "http://localhost:8082", - "active": true, - "routingGroup": "adhoc", - "externalUrl": "http://localhost:8082" - }, - { - "name": "trino-3", - "proxyTo": "http://localhost:8083", - "active": true, - "routingGroup": "adhoc", - "externalUrl": "http://localhost:8084" - } -] -``` - -### Delete a backend from the gateway - -```$xslt -curl -X POST -d "trino3" http://localhost:8080/gateway/backend/modify/delete -``` - -### Deactivate a backend - -```$xslt -curl -X POST http://localhost:8080/gateway/backend/deactivate/trino2 -``` - -### Get all active backend behind the Gateway - -`curl -X GET http://localhost:8080/gateway/backend/active` - -``` -[ - { - "name": "trino-1", - "proxyTo": "http://localhost:8081", - "active": true, - "routingGroup": "adhoc", - "externalUrl": "http://localhost:8081" - } -] -``` - -### Activate a backend - -`curl -X POST http://localhost:8080/gateway/backend/activate/trino2` - -### Query History UI - check query plans etc. - -trinoGateway records history of recent queries and displays links to check query details page in respective trino cluster. -![trino.gateway.io](/docs/assets/trinogateway_query_history.png) - -### Gateway Admin UI - add and modify backend information - -The Gateway admin page is used to configure the gateway to multiple backends. Existing backend information can also be modified using the same. -![trino.gateway.io/entity](/docs/assets/trinogateway_ha_admin.png) - -## Resource Groups API - -For resource group and selector apis, we can now specify a query parameter with the request supporting multiple trino databases for different trino backends. This allows a user to configure a db for every trino backend with their own resource groups and selector tables. To use this, just specify the query parameter ?useSchema= to the request. Example, to list all resource groups, - -```$xslt -curl -X GET http://localhost:8080/trino/resourcegroup/read/{INSERT_ID_HERE}?useSchema=newdatabasename -``` - -### Add a resource group - -To add a single resource group, specify all relevant fields in the body. Resource group id should not be specified since the database should autoincrement it. - -```$xslt -curl -X POST http://localhost:8080/trino/resourcegroup/create \ - -d '{ - "name": "resourcegroup1", \ - "softMemoryLimit": "100%", \ - "maxQueued": 100, \ - "softConcurrencyLimit": 100, \ - "hardConcurrencyLimit": 100, \ - "schedulingPolicy": null, \ - "schedulingWeight": null, \ - "jmxExport": null, \ - "softCpuLimit": null, \ - "hardCpuLimit": null, \ - "parent": null, \ - "environment": "test" \ - }' -``` - -### Get existing resource group(s) - -If no resourceGroupId (type long) is specified, then all existing resource groups are fetched. - -```$xslt -curl -X GET http://localhost:8080/trino/resourcegroup/read/{INSERT_ID_HERE} -``` - -### Update a resource group - -Specify all columns in the body, which will overwrite properties for the resource group with that specific resourceGroupId. - -```$xslt -curl -X POST http://localhost:8080/trino/resourcegroup/update \ - -d '{ "resourceGroupId": 1, \ - "name": "resourcegroup_updated", \ - "softMemoryLimit": "80%", \ - "maxQueued": 50, \ - "softConcurrencyLimit": 40, \ - "hardConcurrencyLimit": 60, \ - "schedulingPolicy": null, \ - "schedulingWeight": null, \ - "jmxExport": null, \ - "softCpuLimit": null, \ - "hardCpuLimit": null, \ - "parent": null, \ - "environment": "test" \ - }' -``` - -### Delete a resource group - -To delete a resource group, specify the corresponding resourceGroupId (type long). - -```$xslt -curl -X POST http://localhost:8080/trino/resourcegroup/delete/{INSERT_ID_HERE} -``` - -### Add a selector - -To add a single selector, specify all relevant fields in the body. Resource group id should not be specified since the database should autoincrement it. - -```$xslt -curl -X POST http://localhost:8080/trino/selector/create \ - -d '{ - "priority": 1, \ - "userRegex": "selector1", \ - "sourceRegex": "resourcegroup1", \ - "queryType": "insert" \ - }' -``` - -### Get existing selectors(s) - -If no resourceGroupId (type long) is specified, then all existing selectors are fetched. - -```$xslt -curl -X GET http://localhost:8080/trino/selector/read/{INSERT_ID_HERE} -``` - -### Update a selector - -To update a selector, the existing selector must be specified with all relevant fields under "current". The updated version of that selector is specified under "update", with all relevant fields included. If the selector under "current" does not exist, a new selector will be created with the details under "update". Both "current" and "update" must be included to update a selector. - -```$xslt -curl -X POST http://localhost:8080/trino/selector/update \ - -d '{ "current": { - "resourceGroupId": 1, \ - "priority": 1, \ - "userRegex": "selector1", \ - "sourceRegex": "resourcegroup1", \ - "queryType": "insert" \ - }, - "update": { - "resourceGroupId": 1, \ - "priority": 2, \ - "userRegex": "selector1_updated", \ - "sourceRegex": "resourcegroup1", \ - "queryType": null \ - } -}' -``` - -### Delete a selector - -To delete a selector, specify all relevant fields in the body. - -```$xslt -curl -X POST http://localhost:8080/trino/selector/delete \ - -d '{ "resourceGroupId": 1, \ - "priority": 2, \ - "userRegex": "selector1_updated", \ - "sourceRegex": "resourcegroup1", \ - "queryType": null \ - }' -``` - -### Add a global property - -To add a single global property, specify all relevant fields in the body. - -```$xslt -curl -X POST http://localhost:8080/trino/globalproperty/create \ - -d '{ - "name": "cpu_quota_period", \ - "value": "1h" \ - }' -``` - -### Get existing global properties - -If no name (type String) is specified, then all existing global properties are fetched. - -```$xslt -curl -X GET http://localhost:8080/trino/globalproperty/read/{INSERT_NAME_HERE} -``` - -### Update a global property - -Specify all columns in the body, which will overwrite properties for the global property with that specific name. - -```$xslt -curl -X POST http://localhost:8080/trino/globalproperty/update \ - -d '{ - "name": "cpu_quota_period", \ - "value": "2h" \ - }' -``` - -### Delete a global property - -To delete a global property, specify the corresponding name (type String). - -```$xslt -curl -X POST http://localhost:8080/trino/globalproperty/delete/{INSERT_NAME_HERE} -``` - -## Graceful shutdown - -trino gateway supports graceful shutdown of trino clusters. Even when a cluster is deactivated, any submitted query states can still be retrieved based on the Query ID. - -To graceful shutdown a trino cluster without query losses, the steps are: - -1. Set the backend to deactivate state, this prevents any new incoming queries from getting assigned to the backend. -2. Poll the trino backend coorinator URL until the queued query count and the running query count both hit 0. -3. Terminate the trino Coordinator & Worker Java process. - -To gracefully shutdown a single worker process, see [this](https://trino.io/docs/current/admin/graceful-shutdown.html) for the operations. - -## Routing Rules Engine - -By default, trino-gateway reads the `X-Trino-Routing-Group` request header to route requests. -If this header is not specified, requests are sent to default routing group (adhoc). - -The routing rules engine feature enables you to write custom logic to route requests based on the request info such as any of the [request headers](https://trino.io/docs/current/develop/client-protocol.html#client-request-headers). -Routing rules are separated from trino-gateway application code to a configuration file, allowing for dynamic rule changes. - -### Defining your routing rules - -To express and fire routing rules, we use the [easy-rules](https://github.com/j-easy/easy-rules) engine. These rules should be stored in a YAML file. -Rules consist of a name, description, condition, and list of actions. If the condition of a particular rule evaluates to true, its actions are fired. - -```yaml ---- -name: "airflow" -description: "if query from airflow, route to etl group" -condition: 'request.getHeader("X-Trino-Source") == "airflow"' -actions: - - 'result.put("routingGroup", "etl")' ---- -name: "airflow special" -description: "if query from airflow with special label, route to etl-special group" -condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' -actions: - - 'result.put("routingGroup", "etl-special")' -``` - -In the condition, you can access the methods of a [HttpServletRequest](https://docs.oracle.com/javaee/6/api/javax/servlet/http/HttpServletRequest.html) object called `request`. -There should be at least one action of the form `result.put(\"routingGroup\", \"foo\")` which says that if a request satisfies the condition, it should be routed to `foo`. - -The condition and actions are written in [MVEL](http://mvel.documentnode.com/), an expression language with Java-like syntax. -In most cases, users can write their conditions/actions in Java syntax and expect it to work. There are some MVEL-specific operators that could be useful though. -For example, instead of doing a null-check before accessing the `String.contains` method like this: - -```yaml -condition: 'request.getHeader("X-Trino-Client-Tags") != null && request.getHeader("X-Trino-Client-Tags").contains("label=foo")' -``` - -You can use the `contains` operator - -```yaml -condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=foo"' -``` - -If no rules match, then request is routed to adhoc. - -### Execution of Rules - -All rules whose conditions are satisfied will fire. For example, in the "airflow" and "airflow special" example rules given above, a query with source `airflow` and label `special` -will satisfy both rules. The `routingGroup` is set to `etl` and then to `etl-special` because of the order in which the rules of defined. -If we swap the order of the rules, then we would possibly get `etl` instead, which is undesirable. - -One could solve this by writing the rules such that they're atomic (any query will match exactly one rule). For example we can change the first rule to - -```yaml ---- -name: "airflow" -description: "if query from airflow, route to etl group" -condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") == null' -actions: - - 'result.put("routingGroup", "etl")' ---- -``` - -This could be hard to maintain as we add more rules. To have better control over the execution of rules, we could use rule priorities and composite rules. -Overall, with priorities, composite rules, and the constructs that MVEL support, you should likely be able to express your routing logic. - -#### Rule Priority - -We can assign an integer value `priority` to a rule. The lower this integer is, the earlier it will fire. -If the priority is not specified, the priority is defaulted to INT_MAX. -We can add priorities to our airflow and airflow special rule like so: - -```yaml ---- -name: "airflow" -description: "if query from airflow, route to etl group" -priority: 0 -condition: 'request.getHeader("X-Trino-Source") == "airflow"' -actions: - - 'result.put("routingGroup", "etl")' ---- -name: "airflow special" -description: "if query from airflow with special label, route to etl-special group" -priority: 1 -condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' -actions: - - 'result.put("routingGroup", "etl-special")' -``` - -Note that both rules will still fire. The difference is that we've guaranteed that the first rule (priority 0) is fired before the second rule (priority 1). Thus `routingGroup` -is set to `etl` and then to `etl-special`, so the `routingGroup` will always be `etl-special` in the end. - -Above, the more specific rules have less priority since we want them to be the last to set `routingGroup`. This is a little counterintuitive. -To further control the execution of rules, for example to have only one rule fire, we can use composite rules. - -##### Composite Rules - -First, please refer to easy-rule composite rules docs: https://github.com/j-easy/easy-rules/wiki/defining-rules#composite-rules - -Above, we saw how to control the order of rule execution using priorities. In addition to this, we could have only the first rule matched to be -fired (the highest priority one) and the rest ignored. We can use `ActivationRuleGroup` to achieve this. - -```yaml ---- -name: "airflow rule group" -description: "routing rules for query from airflow" -compositeRuleType: "ActivationRuleGroup" -composingRules: - - name: "airflow special" - description: "if query from airflow with special label, route to etl-special group" - priority: 0 - condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' - actions: - - 'result.put("routingGroup", "etl-special")' - - name: "airflow" - description: "if query from airflow, route to etl group" - priority: 1 - condition: 'request.getHeader("X-Trino-Source") == "airflow"' - actions: - - 'result.put("routingGroup", "etl")' -``` - -Note that the priorities have switched. The more specific rule has a higher priority, since we want it to be fired first. -A query coming from airflow with special label is matched to the "airflow special" rule first, since it's higher priority, -and the second rule is ignored. A query coming from airflow with no labels does not match the first rule, and is then tested and matched to the second rule. - -We can also use `ConditionalRuleGroup` and `ActivationRuleGroup` to implement an if/else workflow. -The following logic in pseudocode: - -``` -if source == "airflow": - if clientTags["label"] == "foo": - return "etl-foo" - else if clientTags["label"] = "bar": - return "etl-bar" - else - return "etl" -``` - -Can be implemented with these rules: - -```yaml -name: "airflow rule group" -description: "routing rules for query from airflow" -compositeRuleType: "ConditionalRuleGroup" -composingRules: - - name: "main condition" - description: "source is airflow" - priority: 0 # rule with the highest priority acts as main condition - condition: 'request.getHeader("X-Trino-Source") == "airflow"' - actions: - - "" - - name: "airflow subrules" - compositeRuleType: "ActivationRuleGroup" # use ActivationRuleGroup to simulate if/else - composingRules: - - name: "label foo" - description: "label client tag is foo" - priority: 0 - condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=foo"' - actions: - - 'result.put("routingGroup", "etl-foo")' - - name: "label bar" - description: "label client tag is bar" - priority: 0 - condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=bar"' - actions: - - 'result.put("routingGroup", "etl-bar")' - - name: "airflow default" - description: "airflow queries default to etl" - condition: "true" - actions: - - 'result.put("routingGroup", "etl")' -``` - -##### If statements (MVEL Flow Control) - -Above, we saw how we can use `ConditionalRuleGroup` and `ActivationRuleGroup` to implement and `if/else` workflow. -We could also take advantage of the fact that MVEL supports `if` statements and other flow control (loops, etc). -The following logic in pseudocode: - -``` -if source == "airflow": - if clientTags["label"] == "foo": - return "etl-foo" - else if clientTags["label"] = "bar": - return "etl-bar" - else - return "etl" -``` - -Can be implemented with these rules: - -```yaml ---- -name: "airflow rules" -description: "if query from airflow" -condition: "request.getHeader(\"X-Trino-Source\") == \"airflow\"" -actions: - - "if (request.getHeader(\"X-Trino-Client-Tags\") contains \"label=foo\") { - result.put(\"routingGroup\", \"etl-foo\") - } - else "if (request.getHeader(\"X-Trino-Client-Tags\") contains \"label=bar\") { - result.put(\"routingGroup\", \"etl-bar\") - } - else { - result.put(\"routingGroup\", \"etl\") - }" -``` - -### Enabling routing rules engine - -To enable routing rules engine, find the following lines in `gateway-ha-config.yml`. -Set `rulesEngineEnabled` to True and `rulesConfigPath` to the path to your rules config file. - -``` -routingRules: - rulesEngineEnabled: true - rulesConfigPath: "src/test/resources/rules/routing_rules.yml" # replace with path to your rules config file -``` - -### Authentication - -The authentication would happen on https protocol only. Add the `authentication:` section in the config file. The default authentication type is set using `defaultType: "form"` -Following types of the authentications are supported. - -#### odic - -It can be configured as below - -``` - oauth: - issuer: - clientId: - clientSecret: - tokenEndpoint: - authorizationEndpoint: - jwkEndpoint: - redirectUrl: - userIdField: - scopes: - - s1 - - s2 - - s3 -``` - -#### form - -The authentication happens with the pre-defined users from the configuration file. -To define the preset user use the following section. - -``` -presetUsers: - user1: - password: - privileges: "lb_admin, lb_user" - user2: - password: - privileges: "lb_api" -``` - -Provide a key pair ins RSA format - -``` - form: - selfSignKeyPair: - privateKeyRSA: | - - publicKeyRSA: | - -``` - -### Authorization - -The roles supported by trino load balancer - -- admin : Allows access to the Editor tab, which can be used to configure the backends - -- user : Allows access to the rest of the website - -- api : Allows access to to rest apis to configure the backends - -Users with attributes next to the role will be giving those privileges the users. -User attributes from LDAP is supported or you can use the preset users defined in the yaml file. -Authorization is supported via LDAP user attributes - -``` -authorization: - admin: 'lb_admin' - user: 'lb_user' - api: "lb_api" - ldapHost: - ldapPort: - ldapBindDn: - ldapSearch: - ldapPassword: -``` - -## Contributing - -Want to help build trino Gateway? Check out our [contributing documentation](CONTRIBUTING.md) - -## References :sparkles: - -[Lyft](https://eng.lyft.com/trino-infrastructure-at-lyft-b10adb9db01) - -[Pinterest](https://medium.com/pinterest-engineering/trino-at-pinterest-a8bda7515e52) - -[Zomato](https://www.zomato.com/blog/powering-data-analytics-with-trino) - -[Shopify](https://shopify.engineering/faster-trino-query-execution-infrastructure) - -{{Your org here}} +A load balancer, proxy server, and configurable routing gateway for multiple +[Trino](https://trino.io) clusters. + +Find out more details from our documentation: + +* [Design](./docs/design.md) +* [Development](./docs/development.md) +* [Security](./docs/security.md) +* [Operation](./docs/operation.md) +* [Gateway API](./docs/gateway-api.md) +* [Resource groups API](./docs/resource-groups-api.md) +* [Routing rules](./docs/routing-rules.md) +* [References](./docs/references.md) diff --git a/docs/design.md b/docs/design.md index 4b5c60534..055afd0d4 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1,11 +1,36 @@ -Trino Gateway has 3 components -------------------------------- -1. BaseApp - It provides boilerplate code to add/remove pluggable components with config and metrics registration module. -![BaseApp Class Diagram](assets/BaseApp-classes.png) +**trino-gateway documentation** -2. ProxyServer - Its a library built on top of jetty proxy which provides a proxy server with a pluggable proxy-handler. -![ProxyServer Class Diagram](assets/ProxyServer-classes.png) + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Design + +Trino Gateway has three main components: + +1. BaseApp - It provides boilerplate code to add/remove pluggable components + with config and metrics registration module. + +![BaseApp Class Diagram](assets/BaseApp-classes.png) + +2. ProxyServer - Its a library built on top of jetty proxy which provides a + proxy server with a pluggable proxy-handler. + +![ProxyServer Class Diagram](assets/ProxyServer-classes.png) + +3. Gateway - This component acts as container for proxy-server and plugs in + ProxyHanders to provide proxy, routing and load balancing functionalities. It + also exposes few end points and UI to activate, deactivate backends and view + query history for recently submitted queries. -3. Gateway - This component acts as container for proxy-server and plugs in ProxyHanders to provide proxy, routing and load balancing functionalities. It also exposes few end points and UI to activate, deactivate backends and view query history for recently submitted queries. ![TrinoGateway Class Diagram](assets/TrinoGateway-classes.png) diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 000000000..ff780107c --- /dev/null +++ b/docs/development.md @@ -0,0 +1,102 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Development + +## How to setup a dev environment + +Step 1: setup mysql. Install docker with docker-compose and run the below +command when setting up first time: + +#### Run the services - mysqldb, two instances of trino + +- This setup helps you develop and test any routing rules for the trino +- Both trino services would have a single `system` catalog +- Add the catalog properties files in ` bin/localdev/coordinator/` for + additional catalogs + +``` +cd localdev +docker-compose up -d +``` + +#### Check the "Status' of the services by + +`docker-compose ps` + +#### Create the schema for the backends, once mysqldb becomes healthy + +`docker-compose exec mysqldb sh -c "mysql -uroot -proot123 -hmysqldb -Dtrinogateway < /etc/mysql/gateway-ha-persistence.sql"` + +#### Add the backends for mysqldb + +`docker-compose exec mysqldb sh -c "mysql -uroot -proot123 -hmysqldb -Dtrinogateway < /etc/mysql/add_backends.sql"` + +#### Create the schema for the backends, once postgres becomes healthy + +`docker-compose exec postgres sh -c 'PGPASSWORD="P0stG&es" psql -h localhost -p 5432 -U trino_gateway_db_admin -d trino_gateway_db -f /etc/postgresql/gateway-ha-persistence-postgres.sql'` + +#### Add the backends for postgres + +`docker-compose exec postgres sh -c 'PGPASSWORD="P0stG&es" psql -h localhost -p 5432 -U trino_gateway_db_admin -d trino_gateway_db -f /etc/postgresql/add_backends_postgres.sql'` + +It would add 2 trino backend records which can be used for the development and +testing + +### Build and run + +Please note these steps have been verified with JDK 8 and 11. Higher versions of +Java might run into unexpected issues. + +run `mvn clean install` to build `trino-gateway` + +Edit the [config file](/gateway-ha/gateway-ha-config.yml) and update the mysql +db information. + +``` +cd gateway-ha/target/ +java -jar gateway-ha-{{VERSION}}-jar-with-dependencies.jar server ../gateway-ha-config.yml +``` + +If you encounter a `Failed to connect to JDBC URL` error, this may be due to +newer versions of java disabling certain algorithms when using SSL/TLS, in +particular `TLSv1` and `TLSv1.1`. This will cause `Bad handshake` errors when +connecting to the MySQL server. To enable `TLSv1` and `TLSv1.1` open the +following file in any editor (`sudo` access needed): + +``` +/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/security/java.security +``` + +Search for `jdk.tls.disabledAlgorithms`, it should look something like this: + +``` +jdk.tls.disabledAlgorithms=SSLv3, TLSv1, TLSv1.1, RC4, DES, MD5withRSA, \ + DH keySize < 1024, EC keySize < 224, 3DES_EDE_CBC, anon, NULL, \ + include jdk.disabled.namedCurves +``` + +Remove `TLSv1, TLSv1.1` and redo the above steps to build and run `trino-gateway`. + +If you see test failures while building `trino-gateway` or in an IDE, please run +`mvn process-classes` to instrument javalite models which are used by the tests +. Ref +[javalite-examples](https://github.com/javalite/javalite-examples/tree/master/simple-example#instrumentation) +for more details. + +## Contributing + +Want to help build trino Gateway? Check out our [contributing +documentation](CONTRIBUTING.md) diff --git a/docs/gateway-api.md b/docs/gateway-api.md new file mode 100644 index 000000000..18071315a --- /dev/null +++ b/docs/gateway-api.md @@ -0,0 +1,103 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Gateway API + +## Add or update a backend + +```$xslt +curl -X POST http://localhost:8080/entity?entityType=GATEWAY_BACKEND \ + -d '{ "name": "trino-3", \ + "proxyTo": "http://localhost:8083",\ + "active": true, \ + "routingGroup": "adhoc" \ + }' +``` + +If the backend URL is different from the `proxyTo` URL (for example if they are +internal vs. external hostnames). You can use the optional `externalUrl` field +to override the link in the Active Backends page. + +```$xslt +curl -X POST http://localhost:8080/entity?entityType=GATEWAY_BACKEND \ + -d '{ "name": "trino-3", \ + "proxyTo": "http://localhost:8083",\ + "active": true, \ + "routingGroup": "adhoc" \ + "externalUrl": "http://localhost:8084",\ + }' +``` + +## Get all backends behind the gateway + +```$xslt +curl -X GET http://localhost:8080/entity/GATEWAY_BACKEND +[ + { + "name": "trino-1", + "proxyTo": "http://localhost:8081", + "active": true, + "routingGroup": "adhoc", + "externalUrl": "http://localhost:8081" + }, + { + "name": "trino-2", + "proxyTo": "http://localhost:8082", + "active": true, + "routingGroup": "adhoc", + "externalUrl": "http://localhost:8082" + }, + { + "name": "trino-3", + "proxyTo": "http://localhost:8083", + "active": true, + "routingGroup": "adhoc", + "externalUrl": "http://localhost:8084" + } +] +``` + +## Delete a backend from the gateway + +```$xslt +curl -X POST -d "trino3" http://localhost:8080/gateway/backend/modify/delete +``` + +## Deactivate a backend + +```$xslt +curl -X POST http://localhost:8080/gateway/backend/deactivate/trino2 +``` + +## Get all active backend behind the Gateway + +`curl -X GET http://localhost:8080/gateway/backend/active` + +``` +[ + { + "name": "trino-1", + "proxyTo": "http://localhost:8081", + "active": true, + "routingGroup": "adhoc", + "externalUrl": "http://localhost:8081" + } +] +``` + +## Activate a backend + +`curl -X POST http://localhost:8080/gateway/backend/activate/trino2` + diff --git a/docs/operation.md b/docs/operation.md new file mode 100644 index 000000000..61480327a --- /dev/null +++ b/docs/operation.md @@ -0,0 +1,52 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Operation + +The following aspects apply to managing Trino Gateway and the connected Trino +clusters. + +## Query History UI - check query plans etc. + +trinoGateway records history of recent queries and displays links to check query +details page in respective trino cluster. +![trino.gateway.io](/docs/assets/trinogateway_query_history.png) + +## Gateway Admin UI - add and modify backend information + +The Gateway admin page is used to configure the gateway to multiple backends. +Existing backend information can also be modified using the same. + +![trino.gateway.io/entity](/docs/assets/trinogateway_ha_admin.png) + + +## Graceful shutdown + +trino gateway supports graceful shutdown of trino clusters. Even when a cluster +is deactivated, any submitted query states can still be retrieved based on the +Query ID. + +To graceful shutdown a trino cluster without query losses, the steps are: + +1. Set the backend to deactivate state, this prevents any new incoming queries + from getting assigned to the backend. +2. Poll the trino backend coorinator URL until the queued query count and the + running query count both hit 0. +3. Terminate the trino Coordinator & Worker Java process. + +To gracefully shutdown a single worker process, see +[this](https://trino.io/docs/current/admin/graceful-shutdown.html) for the +operations. + diff --git a/docs/references.md b/docs/references.md new file mode 100644 index 000000000..7d1641006 --- /dev/null +++ b/docs/references.md @@ -0,0 +1,26 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# References :sparkles: + +[Lyft](https://eng.lyft.com/trino-infrastructure-at-lyft-b10adb9db01) + +[Pinterest](https://medium.com/pinterest-engineering/trino-at-pinterest-a8bda7515e52) + +[Zomato](https://www.zomato.com/blog/powering-data-analytics-with-trino) + +[Shopify](https://shopify.engineering/faster-trino-query-execution-infrastructure) + +{{Your org here}} \ No newline at end of file diff --git a/docs/resource-groups-api.md b/docs/resource-groups-api.md new file mode 100644 index 000000000..6e0378731 --- /dev/null +++ b/docs/resource-groups-api.md @@ -0,0 +1,199 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Resource groups API + +For resource group and selector apis, we can now specify a query parameter with +the request supporting multiple trino databases for different trino backends. +This allows a user to configure a db for every trino backend with their own +resource groups and selector tables. To use this, just specify the query +parameter ?useSchema= to the request. Example, to list all resource +groups, + +```$xslt +curl -X GET http://localhost:8080/trino/resourcegroup/read/{INSERT_ID_HERE}?useSchema=newdatabasename +``` + +## Add a resource group + +To add a single resource group, specify all relevant fields in the body. +Resource group id should not be specified since the database should +autoincrement it. + +```$xslt +curl -X POST http://localhost:8080/trino/resourcegroup/create \ + -d '{ + "name": "resourcegroup1", \ + "softMemoryLimit": "100%", \ + "maxQueued": 100, \ + "softConcurrencyLimit": 100, \ + "hardConcurrencyLimit": 100, \ + "schedulingPolicy": null, \ + "schedulingWeight": null, \ + "jmxExport": null, \ + "softCpuLimit": null, \ + "hardCpuLimit": null, \ + "parent": null, \ + "environment": "test" \ + }' +``` + +## Get existing resource group(s) + +If no resourceGroupId (type long) is specified, then all existing resource +groups are fetched. + +```$xslt +curl -X GET http://localhost:8080/trino/resourcegroup/read/{INSERT_ID_HERE} +``` + +## Update a resource group + +Specify all columns in the body, which will overwrite properties for the +resource group with that specific resourceGroupId. + +```$xslt +curl -X POST http://localhost:8080/trino/resourcegroup/update \ + -d '{ "resourceGroupId": 1, \ + "name": "resourcegroup_updated", \ + "softMemoryLimit": "80%", \ + "maxQueued": 50, \ + "softConcurrencyLimit": 40, \ + "hardConcurrencyLimit": 60, \ + "schedulingPolicy": null, \ + "schedulingWeight": null, \ + "jmxExport": null, \ + "softCpuLimit": null, \ + "hardCpuLimit": null, \ + "parent": null, \ + "environment": "test" \ + }' +``` + +## Delete a resource group + +To delete a resource group, specify the corresponding resourceGroupId (type +long). + +```$xslt +curl -X POST http://localhost:8080/trino/resourcegroup/delete/{INSERT_ID_HERE} +``` + +## Add a selector + +To add a single selector, specify all relevant fields in the body. Resource +group id should not be specified since the database should autoincrement it. + +```$xslt +curl -X POST http://localhost:8080/trino/selector/create \ + -d '{ + "priority": 1, \ + "userRegex": "selector1", \ + "sourceRegex": "resourcegroup1", \ + "queryType": "insert" \ + }' +``` + +## Get existing selectors(s) + +If no resourceGroupId (type long) is specified, then all existing selectors are +fetched. + +```$xslt +curl -X GET http://localhost:8080/trino/selector/read/{INSERT_ID_HERE} +``` + +## Update a selector + +To update a selector, the existing selector must be specified with all relevant +fields under "current". The updated version of that selector is specified under +"update", with all relevant fields included. If the selector under "current" +does not exist, a new selector will be created with the details under "update". +Both "current" and "update" must be included to update a selector. + +```$xslt +curl -X POST http://localhost:8080/trino/selector/update \ + -d '{ "current": { + "resourceGroupId": 1, \ + "priority": 1, \ + "userRegex": "selector1", \ + "sourceRegex": "resourcegroup1", \ + "queryType": "insert" \ + }, + "update": { + "resourceGroupId": 1, \ + "priority": 2, \ + "userRegex": "selector1_updated", \ + "sourceRegex": "resourcegroup1", \ + "queryType": null \ + } +}' +``` + +## Delete a selector + +To delete a selector, specify all relevant fields in the body. + +```$xslt +curl -X POST http://localhost:8080/trino/selector/delete \ + -d '{ "resourceGroupId": 1, \ + "priority": 2, \ + "userRegex": "selector1_updated", \ + "sourceRegex": "resourcegroup1", \ + "queryType": null \ + }' +``` + +## Add a global property + +To add a single global property, specify all relevant fields in the body. + +```$xslt +curl -X POST http://localhost:8080/trino/globalproperty/create \ + -d '{ + "name": "cpu_quota_period", \ + "value": "1h" \ + }' +``` + +## Get existing global properties + +If no name (type String) is specified, then all existing global properties are +fetched. + +```$xslt +curl -X GET http://localhost:8080/trino/globalproperty/read/{INSERT_NAME_HERE} +``` + +## Update a global property + +Specify all columns in the body, which will overwrite properties for the global +property with that specific name. + +```$xslt +curl -X POST http://localhost:8080/trino/globalproperty/update \ + -d '{ + "name": "cpu_quota_period", \ + "value": "2h" \ + }' +``` + +## Delete a global property + +To delete a global property, specify the corresponding name (type String). + +```$xslt +curl -X POST http://localhost:8080/trino/globalproperty/delete/{INSERT_NAME_HERE} +``` diff --git a/docs/routing-rules.md b/docs/routing-rules.md new file mode 100644 index 000000000..ea8233862 --- /dev/null +++ b/docs/routing-rules.md @@ -0,0 +1,270 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Routing rules + +Trino Gateway includes a routing rules engine. + +By default, trino-gateway reads the `X-Trino-Routing-Group` request header to +route requests. If this header is not specified, requests are sent to default +routing group (adhoc). + +The routing rules engine feature enables you to write custom logic to route +requests based on the request info such as any of the [request +headers](https://trino.io/docs/current/develop/client-protocol.html#client-request-headers). +Routing rules are separated from trino-gateway application code to a +configuration file, allowing for dynamic rule changes. + +### Defining your routing rules + +To express and fire routing rules, we use the +[easy-rules](https://github.com/j-easy/easy-rules) engine. These rules should be +stored in a YAML file. Rules consist of a name, description, condition, and list +of actions. If the condition of a particular rule evaluates to true, its actions +are fired. + +```yaml +--- +name: "airflow" +description: "if query from airflow, route to etl group" +condition: 'request.getHeader("X-Trino-Source") == "airflow"' +actions: + - 'result.put("routingGroup", "etl")' +--- +name: "airflow special" +description: "if query from airflow with special label, route to etl-special group" +condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' +actions: + - 'result.put("routingGroup", "etl-special")' +``` + +In the condition, you can access the methods of a +[HttpServletRequest](https://docs.oracle.com/javaee/6/api/javax/servlet/http/HttpServletRequest.html) +object called `request`. There should be at least one action of the form +`result.put(\"routingGroup\", \"foo\")` which says that if a request satisfies +the condition, it should be routed to `foo`. + +The condition and actions are written in [MVEL](http://mvel.documentnode.com/), +an expression language with Java-like syntax. In most cases, users can write +their conditions/actions in Java syntax and expect it to work. There are some +MVEL-specific operators that could be useful though. For example, instead of +doing a null-check before accessing the `String.contains` method like this: + +```yaml +condition: 'request.getHeader("X-Trino-Client-Tags") != null && request.getHeader("X-Trino-Client-Tags").contains("label=foo")' +``` + +You can use the `contains` operator + +```yaml +condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=foo"' +``` + +If no rules match, then request is routed to adhoc. + +### Execution of Rules + +All rules whose conditions are satisfied will fire. For example, in the +"airflow" and "airflow special" example rules given above, a query with source +`airflow` and label `special` will satisfy both rules. The `routingGroup` is set +to `etl` and then to `etl-special` because of the order in which the rules of +defined. If we swap the order of the rules, then we would possibly get `etl` +instead, which is undesirable. + +One could solve this by writing the rules such that they're atomic (any query +will match exactly one rule). For example we can change the first rule to + +```yaml +--- +name: "airflow" +description: "if query from airflow, route to etl group" +condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") == null' +actions: + - 'result.put("routingGroup", "etl")' +--- +``` + +This could be hard to maintain as we add more rules. To have better control over +the execution of rules, we could use rule priorities and composite rules. +Overall, with priorities, composite rules, and the constructs that MVEL support, +you should likely be able to express your routing logic. + +#### Rule Priority + +We can assign an integer value `priority` to a rule. The lower this integer is, +the earlier it will fire. If the priority is not specified, the priority is +defaulted to INT_MAX. We can add priorities to our airflow and airflow special +rule like so: + +```yaml +--- +name: "airflow" +description: "if query from airflow, route to etl group" +priority: 0 +condition: 'request.getHeader("X-Trino-Source") == "airflow"' +actions: + - 'result.put("routingGroup", "etl")' +--- +name: "airflow special" +description: "if query from airflow with special label, route to etl-special group" +priority: 1 +condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' +actions: + - 'result.put("routingGroup", "etl-special")' +``` + +Note that both rules will still fire. The difference is that we've guaranteed +that the first rule (priority 0) is fired before the second rule (priority 1). +Thus `routingGroup` is set to `etl` and then to `etl-special`, so the +`routingGroup` will always be `etl-special` in the end. + +Above, the more specific rules have less priority since we want them to be the +last to set `routingGroup`. This is a little counterintuitive. To further +control the execution of rules, for example to have only one rule fire, we can +use composite rules. + +##### Composite Rules + +First, please refer to easy-rule composite rules docs: +https://github.com/j-easy/easy-rules/wiki/defining-rules#composite-rules + +Above, we saw how to control the order of rule execution using priorities. In +addition to this, we could have only the first rule matched to be fired (the +highest priority one) and the rest ignored. We can use `ActivationRuleGroup` to +achieve this. + +```yaml +--- +name: "airflow rule group" +description: "routing rules for query from airflow" +compositeRuleType: "ActivationRuleGroup" +composingRules: + - name: "airflow special" + description: "if query from airflow with special label, route to etl-special group" + priority: 0 + condition: 'request.getHeader("X-Trino-Source") == "airflow" && request.getHeader("X-Trino-Client-Tags") contains "label=special"' + actions: + - 'result.put("routingGroup", "etl-special")' + - name: "airflow" + description: "if query from airflow, route to etl group" + priority: 1 + condition: 'request.getHeader("X-Trino-Source") == "airflow"' + actions: + - 'result.put("routingGroup", "etl")' +``` + +Note that the priorities have switched. The more specific rule has a higher +priority, since we want it to be fired first. A query coming from airflow with +special label is matched to the "airflow special" rule first, since it's higher +priority, and the second rule is ignored. A query coming from airflow with no +labels does not match the first rule, and is then tested and matched to the +second rule. + +We can also use `ConditionalRuleGroup` and `ActivationRuleGroup` to implement an +if/else workflow. The following logic in pseudocode: + +``` +if source == "airflow": + if clientTags["label"] == "foo": + return "etl-foo" + else if clientTags["label"] = "bar": + return "etl-bar" + else + return "etl" +``` + +Can be implemented with these rules: + +```yaml +name: "airflow rule group" +description: "routing rules for query from airflow" +compositeRuleType: "ConditionalRuleGroup" +composingRules: + - name: "main condition" + description: "source is airflow" + priority: 0 # rule with the highest priority acts as main condition + condition: 'request.getHeader("X-Trino-Source") == "airflow"' + actions: + - "" + - name: "airflow subrules" + compositeRuleType: "ActivationRuleGroup" # use ActivationRuleGroup to simulate if/else + composingRules: + - name: "label foo" + description: "label client tag is foo" + priority: 0 + condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=foo"' + actions: + - 'result.put("routingGroup", "etl-foo")' + - name: "label bar" + description: "label client tag is bar" + priority: 0 + condition: 'request.getHeader("X-Trino-Client-Tags") contains "label=bar"' + actions: + - 'result.put("routingGroup", "etl-bar")' + - name: "airflow default" + description: "airflow queries default to etl" + condition: "true" + actions: + - 'result.put("routingGroup", "etl")' +``` + +##### If statements (MVEL Flow Control) + +Above, we saw how we can use `ConditionalRuleGroup` and `ActivationRuleGroup` to +implement and `if/else` workflow. We could also take advantage of the fact that +MVEL supports `if` statements and other flow control (loops, etc). The following +logic in pseudocode: + +``` +if source == "airflow": + if clientTags["label"] == "foo": + return "etl-foo" + else if clientTags["label"] = "bar": + return "etl-bar" + else + return "etl" +``` + +Can be implemented with these rules: + +```yaml +--- +name: "airflow rules" +description: "if query from airflow" +condition: "request.getHeader(\"X-Trino-Source\") == \"airflow\"" +actions: + - "if (request.getHeader(\"X-Trino-Client-Tags\") contains \"label=foo\") { + result.put(\"routingGroup\", \"etl-foo\") + } + else "if (request.getHeader(\"X-Trino-Client-Tags\") contains \"label=bar\") { + result.put(\"routingGroup\", \"etl-bar\") + } + else { + result.put(\"routingGroup\", \"etl\") + }" +``` + +### Enabling routing rules engine + +To enable routing rules engine, find the following lines in +`gateway-ha-config.yml`. Set `rulesEngineEnabled` to True and `rulesConfigPath` +to the path to your rules config file. + +``` +routingRules: + rulesEngineEnabled: true + rulesConfigPath: "src/test/resources/rules/routing_rules.yml" # replace with path to your rules config file +``` + diff --git a/docs/security.md b/docs/security.md new file mode 100644 index 000000000..0a9e55c2f --- /dev/null +++ b/docs/security.md @@ -0,0 +1,99 @@ +**trino-gateway documentation** + + + + + + + + + + + + +
DesignDevelopmentSecurityOperationGateway APIResource groups APIRouting rulesReferences
+ +# Security + +## Authentication + +The authentication would happen on https protocol only. Add the +`authentication:` section in the config file. The default authentication type is +set using `defaultType: "form"` Following types of the authentications are +supported. + +### odic + +It can be configured as below + +``` + oauth: + issuer: + clientId: + clientSecret: + tokenEndpoint: + authorizationEndpoint: + jwkEndpoint: + redirectUrl: + userIdField: + scopes: + - s1 + - s2 + - s3 +``` + +### form + +The authentication happens with the pre-defined users from the configuration +file. To define the preset user use the following section. + +``` +presetUsers: + user1: + password: + privileges: "lb_admin, lb_user" + user2: + password: + privileges: "lb_api" +``` + +Provide a key pair ins RSA format + +``` + form: + selfSignKeyPair: + privateKeyRSA: | + + publicKeyRSA: | + +``` + +## Authorization + +The roles supported by trino load balancer + +- admin : Allows access to the Editor tab, which can be used to configure the + backends + +- user : Allows access to the rest of the website + +- api : Allows access to to rest apis to configure the backends + +Users with attributes next to the role will be giving those privileges the +users. User attributes from LDAP is supported or you can use the preset users +defined in the yaml file. Authorization is supported via LDAP user attributes + +``` +authorization: + admin: 'lb_admin' + user: 'lb_user' + api: "lb_api" + ldapHost: + ldapPort: + ldapBindDn: + ldapSearch: + ldapPassword: +``` + + + From fa120b366489bac9d229fdd63baf18b980f6a455 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Thu, 31 Aug 2023 11:45:48 -0700 Subject: [PATCH 04/12] Fix internal project dependency --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 0a9cc41ab..7287bc692 100644 --- a/pom.xml +++ b/pom.xml @@ -34,12 +34,12 @@ io.trino.gateway baseapp - 1.9.5 + ${project.version} io.trino.gateway proxyserver - 1.9.5 + ${project.version} org.eclipse.jetty From c744f6a4d01c16382e0833646922106ddde1b4d3 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Thu, 31 Aug 2023 12:38:49 -0700 Subject: [PATCH 05/12] Add GHA build for pull requests to main --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 498a32e01..4f4d873f7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,9 @@ on: push: branches: - main + pull_request: + branches: + - main defaults: run: From 8e8b1c31b64677ff12bda83edd0d9ab33d7c1b4a Mon Sep 17 00:00:00 2001 From: Will Morrison Date: Thu, 24 Aug 2023 22:05:19 -0400 Subject: [PATCH 06/12] Move VM options to jvm.config --- .mvn/jvm.config | 1 + gateway-ha/pom.xml | 9 --------- mvnw | 4 +--- 3 files changed, 2 insertions(+), 12 deletions(-) create mode 100644 .mvn/jvm.config diff --git a/.mvn/jvm.config b/.mvn/jvm.config new file mode 100644 index 000000000..b7763506d --- /dev/null +++ b/.mvn/jvm.config @@ -0,0 +1 @@ +--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED diff --git a/gateway-ha/pom.xml b/gateway-ha/pom.xml index 41308b3bb..b4ccb0ac6 100644 --- a/gateway-ha/pom.xml +++ b/gateway-ha/pom.xml @@ -254,15 +254,6 @@
- - org.apache.maven.plugins - maven-surefire-plugin - 3.1.2 - - --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED - - - diff --git a/mvnw b/mvnw index 4fff22f62..8d937f4c1 100755 --- a/mvnw +++ b/mvnw @@ -280,9 +280,7 @@ if [ -n "$wrapperSha256Sum" ]; then fi fi -#This option is required by the javalite db-migrator-maven-plugin, which does not support adding VM options in its config -REQUIRED_BUILD_OPTIONS='--add-opens=java.base/java.net=ALL-UNNAMED' -MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $REQUIRED_BUILD_OPTIONS $MAVEN_OPTS" +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" # For Cygwin, switch paths to Windows format before running java if $cygwin; then From d31bf709989337862b33466386e9232b32e03ca0 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Wed, 6 Sep 2023 09:09:02 -0700 Subject: [PATCH 07/12] Add JVM config for test runs - Open package access for integration tests - Add note about same content in jvm.config --- gateway-ha/pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gateway-ha/pom.xml b/gateway-ha/pom.xml index b4ccb0ac6..9048b9ce7 100644 --- a/gateway-ha/pom.xml +++ b/gateway-ha/pom.xml @@ -238,6 +238,14 @@ + + org.apache.maven.plugins + maven-surefire-plugin + + + --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED + + org.javalite db-migrator-maven-plugin From f3ca83c81fd13828dce673ee21746fa8e0ff07ec Mon Sep 17 00:00:00 2001 From: Jaeho Yoo Date: Wed, 6 Sep 2023 15:54:43 +0900 Subject: [PATCH 08/12] Allow /v1/node to be routed by gateway --- .../io/trino/gateway/ha/handler/QueryIdCachingProxyHandler.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gateway-ha/src/main/java/io/trino/gateway/ha/handler/QueryIdCachingProxyHandler.java b/gateway-ha/src/main/java/io/trino/gateway/ha/handler/QueryIdCachingProxyHandler.java index 0bcca287d..2159dc78e 100644 --- a/gateway-ha/src/main/java/io/trino/gateway/ha/handler/QueryIdCachingProxyHandler.java +++ b/gateway-ha/src/main/java/io/trino/gateway/ha/handler/QueryIdCachingProxyHandler.java @@ -33,6 +33,7 @@ public class QueryIdCachingProxyHandler extends ProxyHandler { public static final String V1_STATEMENT_PATH = "/v1/statement"; public static final String V1_QUERY_PATH = "/v1/query"; public static final String V1_INFO_PATH = "/v1/info"; + public static final String V1_NODE_PATH = "/v1/node"; public static final String UI_API_STATS_PATH = "/ui/api/stats"; public static final String UI_LOGIN_PATH = "/ui/login"; public static final String UI_API_QUEUED_LIST_PATH = "/ui/api/query?state=QUEUED"; @@ -215,6 +216,7 @@ private boolean isPathWhiteListed(String path) { || path.startsWith(V1_QUERY_PATH) || path.startsWith(TRINO_UI_PATH) || path.startsWith(V1_INFO_PATH) + || path.startsWith(V1_NODE_PATH) || path.startsWith(UI_API_STATS_PATH) || path.startsWith(OAUTH_PATH); } From fbfb546a526b9eb2757a2e3b7916a89c161b0ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9C=A0=EC=9E=AC=ED=98=B8=5BJaeho=20Yoo=5D=5BData=20Anal?= =?UTF-8?q?ytics=20Solution=5D?= Date: Mon, 4 Sep 2023 12:15:40 +0900 Subject: [PATCH 09/12] Improve security documentation --- docs/security.md | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/docs/security.md b/docs/security.md index 0a9e55c2f..612f7d828 100644 --- a/docs/security.md +++ b/docs/security.md @@ -22,11 +22,13 @@ The authentication would happen on https protocol only. Add the set using `defaultType: "form"` Following types of the authentications are supported. -### odic +### OAuth/OpenIDConnect It can be configured as below ``` +authentication: + defaultType: "oauth" oauth: issuer: clientId: @@ -42,7 +44,7 @@ It can be configured as below - s3 ``` -### form +### Form/Basic Auth The authentication happens with the pre-defined users from the configuration file. To define the preset user use the following section. @@ -57,17 +59,27 @@ presetUsers: privileges: "lb_api" ``` -Provide a key pair ins RSA format +Also provide a random key pair in RSA format. ``` +authentication: + defaultType: "form" form: selfSignKeyPair: - privateKeyRSA: | - - publicKeyRSA: | - + privateKeyRsa: + publicKeyRsa: ``` +### Form/LDAP + +``` +authentication: + defaultType: "form" + form: + ldapConfigPath: +``` + + ## Authorization The roles supported by trino load balancer From ca1ed8581ca4a76408ba56164639c23b2644de2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9C=A0=EC=9E=AC=ED=98=B8=5BJaeho=20Yoo=5D=5BData=20Anal?= =?UTF-8?q?ytics=20Solution=5D?= Date: Tue, 12 Sep 2023 11:30:51 +0900 Subject: [PATCH 10/12] Update server command in development doc --- docs/development.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development.md b/docs/development.md index ff780107c..2ff2aa1cb 100644 --- a/docs/development.md +++ b/docs/development.md @@ -67,7 +67,7 @@ db information. ``` cd gateway-ha/target/ -java -jar gateway-ha-{{VERSION}}-jar-with-dependencies.jar server ../gateway-ha-config.yml +java --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED -jar gateway-ha-{{VERSION}}-jar-with-dependencies.jar server ../gateway-ha-config.yml ``` If you encounter a `Failed to connect to JDBC URL` error, this may be due to From 2a9e26f8130c5494a79c5830e501fa122b1585d9 Mon Sep 17 00:00:00 2001 From: Will Morrison Date: Fri, 1 Sep 2023 10:03:40 -0400 Subject: [PATCH 11/12] Update development instructions for Java 17 --- docs/development.md | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/docs/development.md b/docs/development.md index 2ff2aa1cb..c77c2dc37 100644 --- a/docs/development.md +++ b/docs/development.md @@ -57,10 +57,11 @@ testing ### Build and run -Please note these steps have been verified with JDK 8 and 11. Higher versions of -Java might run into unexpected issues. +This project requires Java 17. Note that higher version of Java have not been +verified and may run into unexpected issues. -run `mvn clean install` to build `trino-gateway` +Run `./mvnw clean install` to build `trino-gateway`. VM options required for +compilation and testing are specified in `.mvn/jvm.config`. Edit the [config file](/gateway-ha/gateway-ha-config.yml) and update the mysql db information. @@ -70,17 +71,20 @@ cd gateway-ha/target/ java --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED -jar gateway-ha-{{VERSION}}-jar-with-dependencies.jar server ../gateway-ha-config.yml ``` -If you encounter a `Failed to connect to JDBC URL` error, this may be due to -newer versions of java disabling certain algorithms when using SSL/TLS, in -particular `TLSv1` and `TLSv1.1`. This will cause `Bad handshake` errors when -connecting to the MySQL server. To enable `TLSv1` and `TLSv1.1` open the -following file in any editor (`sudo` access needed): +If you encounter a `Failed to connect to JDBC URL` error with the MySQL backend, +this may be due to newer versions of Java disabling certain algorithms when +using SSL/TLS, in particular `TLSv1` and `TLSv1.1`. This causes `Bad handshake` +errors when connecting to the MySQL server. You can avoid this by enabling +`TLSv1` and `TLSv1.1` in your JDK, or by adding `sslMode=DISABLED` to your +connection string. + +To enable TLS1 and 1.1, in ``` -/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/security/java.security +${JAVA_HOME}/jre/lib/security/java.security ``` -Search for `jdk.tls.disabledAlgorithms`, it should look something like this: +search for `jdk.tls.disabledAlgorithms`, it should look something like this: ``` jdk.tls.disabledAlgorithms=SSLv3, TLSv1, TLSv1.1, RC4, DES, MD5withRSA, \ @@ -88,11 +92,12 @@ jdk.tls.disabledAlgorithms=SSLv3, TLSv1, TLSv1.1, RC4, DES, MD5withRSA, \ include jdk.disabled.namedCurves ``` -Remove `TLSv1, TLSv1.1` and redo the above steps to build and run `trino-gateway`. +Remove `TLSv1, TLSv1.1` and redo the above steps to build and run +`trino-gateway`. If you see test failures while building `trino-gateway` or in an IDE, please run -`mvn process-classes` to instrument javalite models which are used by the tests -. Ref +`mvn process-classes` to instrument javalite models which are used by the tests. +Refer to the [javalite-examples](https://github.com/javalite/javalite-examples/tree/master/simple-example#instrumentation) for more details. From d2abb53272eef1e3aa741eb3da21957ee47dd646 Mon Sep 17 00:00:00 2001 From: Manfred Moser Date: Tue, 12 Sep 2023 11:46:58 -0700 Subject: [PATCH 12/12] Add doc for contributing --- .github/CONTRIBUTING.md | 14 ++++++++++++++ README.md | 1 + docs/development.md | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 .github/CONTRIBUTING.md diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 000000000..768b35b82 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,14 @@ +# Contributing + +## Contributor License Agreement ("CLA") + +In order to accept your pull request, we need you to [submit a CLA](https://github.com/trinodb/cla). + +## License + +By contributing, you agree that your contributions will be licensed under the +[Apache License Version 2.0 (APLv2)](../LICENSE). + +## Contribution process + +See the suggested [process for contributions](https://trino.io/development/process.html). diff --git a/README.md b/README.md index 0f9244478..14a86d576 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,4 @@ Find out more details from our documentation: * [Resource groups API](./docs/resource-groups-api.md) * [Routing rules](./docs/routing-rules.md) * [References](./docs/references.md) +* [Contributing](./.github/CONTRIBUTING.md) diff --git a/docs/development.md b/docs/development.md index c77c2dc37..fa0e9e69a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -103,5 +103,5 @@ for more details. ## Contributing -Want to help build trino Gateway? Check out our [contributing -documentation](CONTRIBUTING.md) +Want to help build Trino Gateway? Check out our [contributing +documentation](../.github/CONTRIBUTING.md)