Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
findingrish committed May 8, 2024
2 parents 26eb44d + dded473 commit 81dc604
Show file tree
Hide file tree
Showing 43 changed files with 641 additions and 236 deletions.
1 change: 1 addition & 0 deletions distribution/bin/find-missing-backports.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def find_next_url(links):
if len(sys.argv) != 5:
sys.stderr.write('usage: program <github-username> <previous-release-branch> <current-release-branch> <milestone-number>\n')
sys.stderr.write(" e.g., program myusername 0.17.0 0.18.0 30")
sys.stderr.write(" e.g., The milestone number for Druid 30 is 56, since the milestone has the url https://github.com/apache/druid/milestone/56\n")
sys.stderr.write(" It is also necessary to set a GIT_TOKEN environment variable containing a personal access token.")
sys.exit(1)

Expand Down
3 changes: 2 additions & 1 deletion distribution/bin/tag-missing-milestones.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
if len(sys.argv) != 5:
sys.stderr.write('usage: program <github-username> <previous-release-commit> <new-release-commit> <milestone-number-to-tag>\n')
sys.stderr.write(" e.g., program myusername 75c70c2ccc 29f3a328da 30\n")
sys.stderr.write(" e.g., The milestone number for Druid 30 is 56, since the milestone has the url https://github.com/apache/druid/milestone/56\n")
sys.stderr.write(" It is also necessary to set a GIT_TOKEN environment variable containing a personal access token.\n")
sys.exit(1)

Expand Down Expand Up @@ -56,7 +57,7 @@
url = "https://api.github.com/repos/apache/druid/issues/{}".format(pr_number)
requests.patch(url, json=milestone_json, auth=(github_username, os.environ["GIT_TOKEN"]))
else:
print("Skipping Pull Request {} since it's already tagged with milestone {}".format(pr_number, milestone))
print("Skipping Pull Request {} since it's already tagged with milestone {}".format(pr_number, pr['milestone']['number']))

except Exception as e:
print("Got exception for commit: {} ex: {}".format(sha, e))
Expand Down
8 changes: 4 additions & 4 deletions distribution/docker/Dockerfile.mysql
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ FROM $DRUID_RELEASE

WORKDIR /opt/druid/extensions/mysql-metadata-storage

ARG MYSQL_URL=https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar
ARG MYSQL_JAR=mysql-connector-java-5.1.49.jar
# https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar.sha1
ARG MYSQL_SHA=cf76d2e4c9c3782a85c15c87bec5772b34ffd0e5
ARG MYSQL_URL=https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar
ARG MYSQL_JAR=mysql-connector-j-8.2.0.jar
# https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar.sha1
ARG MYSQL_SHA=56d34aea30915904b1c883f1cfae731dd2df6029

RUN wget -q ${MYSQL_URL} \
&& echo "${MYSQL_SHA} ${MYSQL_JAR}" | sha1sum -c \
Expand Down
3 changes: 3 additions & 0 deletions distribution/docker/druid.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ if [ -n "$DRUID_MAXDIRECTMEMORYSIZE" ]; then setJavaKey ${SERVICE} -XX:MaxDirect
# However this behavior is not part of the spec and is thus implementation specific
JAVA_OPTS="$(cat $SERVICE_CONF_DIR/jvm.config | xargs) $JAVA_OPTS"

# Specify node type used for log4j2.xml
JAVA_OPTS="-Ddruid.node.type=$SERVICE $JAVA_OPTS"

if [ -n "$DRUID_LOG_LEVEL" ]
then
sed -ri 's/"info"/"'$DRUID_LOG_LEVEL'"/g' $COMMON_CONF_DIR/log4j2.xml
Expand Down
10 changes: 5 additions & 5 deletions docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -631,11 +631,11 @@ You can use the following properties to specify permissible JDBC options for:

These properties do not apply to metadata storage connections.

|Property|Possible values|Description|Default|
|--------|---------------|-----------|-------|
|`druid.access.jdbc.enforceAllowedProperties`|Boolean|When true, Druid applies `druid.access.jdbc.allowedProperties` to JDBC connections starting with `jdbc:postgresql:`, `jdbc:mysql:`, or `jdbc:mariadb:`. When false, Druid allows any kind of JDBC connections without JDBC property validation. This config is for backward compatibility especially during upgrades since enforcing allow list can break existing ingestion jobs or lookups based on JDBC. This config is deprecated and will be removed in a future release.|true|
|`druid.access.jdbc.allowedProperties`|List of JDBC properties|Defines a list of allowed JDBC properties. Druid always enforces the list for all JDBC connections starting with `jdbc:postgresql:`, `jdbc:mysql:`, and `jdbc:mariadb:` if `druid.access.jdbc.enforceAllowedProperties` is set to true.<br/><br/>This option is tested against MySQL connector 5.1.49, MariaDB connector 2.7.4, and PostgreSQL connector 42.2.14. Other connector versions might not work.|`["useSSL", "requireSSL", "ssl", "sslmode"]`|
|`druid.access.jdbc.allowUnknownJdbcUrlFormat`|Boolean|When false, Druid only accepts JDBC connections starting with `jdbc:postgresql:` or `jdbc:mysql:`. When true, Druid allows JDBC connections to any kind of database, but only enforces `druid.access.jdbc.allowedProperties` for PostgreSQL and MySQL/MariaDB.|true|
|Property|Possible values| Description |Default|
|--------|---------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------|
|`druid.access.jdbc.enforceAllowedProperties`|Boolean| When true, Druid applies `druid.access.jdbc.allowedProperties` to JDBC connections starting with `jdbc:postgresql:`, `jdbc:mysql:`, or `jdbc:mariadb:`. When false, Druid allows any kind of JDBC connections without JDBC property validation. This config is for backward compatibility especially during upgrades since enforcing allow list can break existing ingestion jobs or lookups based on JDBC. This config is deprecated and will be removed in a future release. |true|
|`druid.access.jdbc.allowedProperties`|List of JDBC properties| Defines a list of allowed JDBC properties. Druid always enforces the list for all JDBC connections starting with `jdbc:postgresql:`, `jdbc:mysql:`, and `jdbc:mariadb:` if `druid.access.jdbc.enforceAllowedProperties` is set to true.<br/><br/>This option is tested against MySQL connector 8.2.0, MariaDB connector 2.7.4, and PostgreSQL connector 42.2.14. Other connector versions might not work. |`["useSSL", "requireSSL", "ssl", "sslmode"]`|
|`druid.access.jdbc.allowUnknownJdbcUrlFormat`|Boolean| When false, Druid only accepts JDBC connections starting with `jdbc:postgresql:` or `jdbc:mysql:`. When true, Druid allows JDBC connections to any kind of database, but only enforces `druid.access.jdbc.allowedProperties` for PostgreSQL and MySQL/MariaDB. |true|

### Task logging

Expand Down
4 changes: 2 additions & 2 deletions docs/development/extensions-core/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ This extension can use Oracle's MySQL JDBC driver which is not included in the D
install it separately. There are a few ways to obtain this library:

- It can be downloaded from the MySQL site at: https://dev.mysql.com/downloads/connector/j/
- It can be fetched from Maven Central at: https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar
- It can be fetched from Maven Central at: https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar
- It may be available through your package manager, e.g. as `libmysql-java` on APT for a Debian-based OS

This fetches the MySQL connector JAR file with a name like `mysql-connector-java-5.1.49.jar`.
This fetches the MySQL connector JAR file with a name like `mysql-connector-j-8.2.0.jar`.

Copy or symlink this file inside the folder `extensions/mysql-metadata-storage` under the distribution root directory.

Expand Down
2 changes: 1 addition & 1 deletion docs/multi-stage-query/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ The following table lists the context parameters for the MSQ task engine:
| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` |
| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section. | `taskReport` |
| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. `REALTIME` cannot be used while writing data into the same datasource it is read from.| `NONE` |
| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
Expand Down
2 changes: 1 addition & 1 deletion docs/operations/use_sbt_to_build_fat_jar.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ libraryDependencies ++= Seq(
"com.fasterxml.jackson.jaxrs" % "jackson-jaxrs-smile-provider" % "2.3.0",
"com.fasterxml.jackson.module" % "jackson-module-jaxb-annotations" % "2.3.0",
"com.sun.jersey" % "jersey-servlet" % "1.17.1",
"mysql" % "mysql-connector-java" % "5.1.34",
"mysql" % "mysql-connector-java" % "8.2.0",
"org.scalatest" %% "scalatest" % "2.2.3" % "test",
"org.mockito" % "mockito-core" % "1.10.19" % "test"
)
Expand Down
1 change: 1 addition & 0 deletions docs/querying/query-context.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ See [SQL query context](sql-query-context.md) for other query context parameters
|`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the Broker. The Broker will always prune unnecessary segments from the input scan based on a filter on time intervals, but if the data is further partitioned with hash or range partitioning, this option will enable additional pruning based on a filter on secondary partition dimensions.|
|`debug`| `false` | Flag indicating whether to enable debugging outputs for the query. When set to false, no additional logs will be produced (logs produced will be entirely dependent on your logging level). When set to true, the following addition logs will be produced:<br />- Log the stack trace of the exception (if any) produced by the query |
|`setProcessingThreadNames`|`true`| Whether processing thread names will be set to `queryType_dataSource_intervals` while processing a query. This aids in interpreting thread dumps, and is on by default. Query overhead can be reduced slightly by setting this to `false`. This has a tiny effect in most scenarios, but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. |
|`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge two Project operators when inlining expressions causes complexity to increase. Implemented as a workaround to exception `There are not enough rules to produce a node with desired properties: convention=DRUID, sort=[]` thrown after rejecting the merge of two projects.|

## Parameters by query type

Expand Down
4 changes: 2 additions & 2 deletions extensions-core/lookups-cached-global/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
<scope>test</scope>
</dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,46 +156,6 @@ public boolean isEnforceAllowedProperties()
}
);
}

@Test
public void testWhenInvalidUrlFormat()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Invalid URL format for MySQL: [jdbc:mysql:/invalid-url::3006]");
new JdbcExtractionNamespace(
new MetadataStorageConnectorConfig()
{
@Override
public String getConnectURI()
{
return "jdbc:mysql:/invalid-url::3006";
}
},
TABLE_NAME,
KEY_NAME,
VAL_NAME,
TS_COLUMN,
"some filter",
new Period(10),
null,
0,
null,
new JdbcAccessSecurityConfig()
{
@Override
public Set<String> getAllowedProperties()
{
return ImmutableSet.of("valid_key1", "valid_key2");
}

@Override
public boolean isEnforceAllowedProperties()
{
return true;
}
}
);
}
}

public static class PostgreSqlTest
Expand Down
4 changes: 2 additions & 2 deletions extensions-core/lookups-cached-single/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
<scope>test</scope>
</dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,41 +139,6 @@ public boolean isEnforceAllowedProperties()
}
);
}

@Test
public void testWhenInvalidUrlFormat()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Invalid URL format for MySQL: [jdbc:mysql:/invalid-url::3006]");
new JdbcDataFetcher(
new MetadataStorageConnectorConfig()
{
@Override
public String getConnectURI()
{
return "jdbc:mysql:/invalid-url::3006";
}
},
TABLE_NAME,
KEY_COLUMN,
VALUE_COLUMN,
100,
new JdbcAccessSecurityConfig()
{
@Override
public Set<String> getAllowedProperties()
{
return ImmutableSet.of("valid_key1", "valid_key2");
}

@Override
public boolean isEnforceAllowedProperties()
{
return true;
}
}
);
}
}

public static class PostgreSqlTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
import org.apache.druid.msq.util.DimensionSchemaUtils;
import org.apache.druid.msq.util.IntervalUtils;
import org.apache.druid.msq.util.MSQFutureUtils;
import org.apache.druid.msq.util.MSQTaskQueryMakerUtils;
import org.apache.druid.msq.util.MultiStageQueryContext;
import org.apache.druid.msq.util.PassthroughAggregatorFactory;
import org.apache.druid.query.Query;
Expand Down Expand Up @@ -1691,6 +1692,8 @@ private static QueryDefinition makeQueryDefinition(
throw new ISE("Column names are not unique: [%s]", columnMappings.getOutputColumnNames());
}

MSQTaskQueryMakerUtils.validateRealtimeReindex(querySpec);

if (columnMappings.hasOutputColumn(ColumnHolder.TIME_COLUMN_NAME)) {
// We know there's a single time column, because we've checked columnMappings.hasUniqueOutputColumnNames().
final int timeColumn = columnMappings.getOutputColumnsByName(ColumnHolder.TIME_COLUMN_NAME).getInt(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ public static boolean isExport(final MSQSpec querySpec)
}

/**
* Returns true if the task reads from the same table as the destionation. In this case, we would prefer to fail
* Returns true if the task reads from the same table as the destination. In this case, we would prefer to fail
* instead of reading any unused segments to ensure that old data is not read.
*/
public static boolean isReplaceInputDataSourceTask(MSQSpec querySpec)
Expand Down
Loading

0 comments on commit 81dc604

Please sign in to comment.