-
Notifications
You must be signed in to change notification settings - Fork 919
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[KYUUBI #6862] Spark 3.3: MaxScanStrategy supports DSv2
### Why are the changes needed? Backport #5852 to Spark 3.3, to enhance MaxScanStrategy to include support for the datasourcev2 in Spark 3.3 ### How was this patch tested? Add some UTs ### Was this patch authored or co-authored using generative AI tooling? No Closes #6862 from zhaohehuhu/dev-1225. Closes #6862 c745eda [zhaohehuhu] MaxScanStrategy supports DSv2 in Spark 3.3 Authored-by: zhaohehuhu <[email protected]> Signed-off-by: Cheng Pan <[email protected]>
- Loading branch information
1 parent
f844afa
commit 117e56c
Showing
4 changed files
with
184 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
...3-3/src/test/scala/org/apache/spark/sql/ReportStatisticsAndPartitionAwareDataSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import java.util.OptionalLong | ||
|
||
import org.apache.spark.sql.connector.{RangeInputPartition, SimpleBatchTable, SimpleScanBuilder, SimpleWritableDataSource} | ||
import org.apache.spark.sql.connector.catalog.Table | ||
import org.apache.spark.sql.connector.expressions.{Expressions, FieldReference, Transform} | ||
import org.apache.spark.sql.connector.read._ | ||
import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning} | ||
import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
||
class ReportStatisticsAndPartitionAwareDataSource extends SimpleWritableDataSource { | ||
|
||
class MyScanBuilder( | ||
val partitionKeys: Seq[String]) extends SimpleScanBuilder | ||
with SupportsReportStatistics with SupportsReportPartitioning { | ||
|
||
override def estimateStatistics(): Statistics = { | ||
new Statistics { | ||
override def sizeInBytes(): OptionalLong = OptionalLong.of(80) | ||
|
||
override def numRows(): OptionalLong = OptionalLong.of(10) | ||
|
||
} | ||
} | ||
|
||
override def planInputPartitions(): Array[InputPartition] = { | ||
Array(RangeInputPartition(0, 5), RangeInputPartition(5, 10)) | ||
} | ||
|
||
override def outputPartitioning(): Partitioning = { | ||
new KeyGroupedPartitioning(partitionKeys.map(FieldReference(_)).toArray, 10) | ||
} | ||
} | ||
|
||
override def getTable(options: CaseInsensitiveStringMap): Table = { | ||
new SimpleBatchTable { | ||
override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { | ||
new MyScanBuilder(Seq("i")) | ||
} | ||
|
||
override def partitioning(): Array[Transform] = { | ||
Array(Expressions.identity("i")) | ||
} | ||
} | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
...-extension-spark-3-3/src/test/scala/org/apache/spark/sql/ReportStatisticsDataSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import java.util.OptionalLong | ||
|
||
import org.apache.spark.sql.connector._ | ||
import org.apache.spark.sql.connector.catalog.Table | ||
import org.apache.spark.sql.connector.read._ | ||
import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
||
class ReportStatisticsDataSource extends SimpleWritableDataSource { | ||
|
||
class MyScanBuilder extends SimpleScanBuilder | ||
with SupportsReportStatistics { | ||
|
||
override def estimateStatistics(): Statistics = { | ||
new Statistics { | ||
override def sizeInBytes(): OptionalLong = OptionalLong.of(80) | ||
|
||
override def numRows(): OptionalLong = OptionalLong.of(10) | ||
} | ||
} | ||
|
||
override def planInputPartitions(): Array[InputPartition] = { | ||
Array(RangeInputPartition(0, 5), RangeInputPartition(5, 10)) | ||
} | ||
|
||
} | ||
|
||
override def getTable(options: CaseInsensitiveStringMap): Table = { | ||
new SimpleBatchTable { | ||
override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { | ||
new MyScanBuilder | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters