Skip to content

Commit

Permalink
[test](show_data) test the correctness of data statistics in cloud mo…
Browse files Browse the repository at this point in the history
…de (#44947)
  • Loading branch information
pingchunzhang authored and Your Name committed Dec 12, 2024
1 parent 5890d03 commit 02e7f0d
Show file tree
Hide file tree
Showing 8 changed files with 473 additions and 0 deletions.
Binary file added aazcp.tar.gz
Binary file not shown.
7 changes: 7 additions & 0 deletions regression-test/conf/regression-conf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,10 @@ kerberosHdfsPort=8820
metaServiceToken = "greedisgood9999"
instanceId = "default_instance_id"
multiClusterInstance = "default_instance_id"

storageProvider = "oss"
cbsS3Ak = "*******"
cbsS3Sk = "*******"
cbsS3Endpoint = "oss-cn-beijing.aliyuncs.com"
cbsS3Bucket = "test-bucket"
cbsS3Prefix = "test-cluster-prefix"
5 changes: 5 additions & 0 deletions regression-test/framework/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,11 @@ under the License.
<!--Regression tests need to include this jar-->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.18.1</version>
</dependency>

</dependencies>
</project>
169 changes: 169 additions & 0 deletions regression-test/plugins/aliyunOssSdk.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@


// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import org.apache.doris.regression.suite.Suite;
import org.apache.doris.regression.Config;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSS;
import com.aliyun.oss.OSSClientBuilder;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.DeleteObjectsRequest;
import com.aliyun.oss.model.DeleteObjectsResult;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;

import org.slf4j.Logger
import org.slf4j.LoggerFactory
import groovy.util.logging.Slf4j

Suite.metaClass.initOssClient = { String accessKeyId, String accessKeySecret, String endpoint ->
return new OSSClientBuilder().build(endpoint, accessKeyId, accessKeySecret)
}

Suite.metaClass.listOssObjectWithPrefix = { OSS client, String bucketName, String prefix="" ->
try {
ObjectListing objectListing = null;
String nextMarker = null;
final int maxKeys = 500;
List<OSSObjectSummary> sums = null;

if (!client.doesBucketExist(bucketName)) {
logger.info("no bucket named ${bucketName} in ${endpoint}")
return
}

// Gets all object with specified marker by paging. Each page will have up to 100 entries.
logger.info("List all objects with prefix:");
nextMarker = null;
do {
objectListing = client.listObjects(new ListObjectsRequest(bucketName).
withPrefix(prefix).withMarker(nextMarker).withMaxKeys(maxKeys));

sums = objectListing.getObjectSummaries();
for (OSSObjectSummary s : sums) {
logger.info("\t" + s.getKey());
}

nextMarker = objectListing.getNextMarker();

} while (objectListing.isTruncated());
} catch (OSSException oe) {
logger.error("Caught an OSSException, which means your request made it to OSS, "
+ "but was rejected with an error response for some reason.");
logger.error("Error Message: " + oe.getErrorMessage());
logger.error("Error Code: " + oe.getErrorCode());
logger.error("Request ID: " + oe.getRequestId());
logger.error("Host ID: " + oe.getHostId());
} catch (ClientException ce) {
logger.error("Caught an ClientException, which means the client encountered "
+ "a serious internal problem while trying to communicate with OSS, "
+ "such as not being able to access the network.");
logger.error("Error Message: " + ce.getMessage());
} finally {
/*
* Do not forget to shut down the client finally to release all allocated resources.
*/
//client.shutdown();
logger.info("Done!")
}

}

// get file size in a specific directory
Suite.metaClass.calculateFolderLength = { OSS client, String bucketName, String folder ->
long size = 0L;
ObjectListing objectListing = null;
do {
// The default value for MaxKey is 100, and the maximum value is 1000
ListObjectsRequest request = new ListObjectsRequest(bucketName).withPrefix(folder).withMaxKeys(1000);
if (objectListing != null) {
request.setMarker(objectListing.getNextMarker());
}
objectListing = client.listObjects(request);
List<OSSObjectSummary> sums = objectListing.getObjectSummaries();
for (OSSObjectSummary s : sums) {
size += s.getSize();
}
} while (objectListing.isTruncated());
return size;
}

Suite.metaClass.shutDownOssClient = { OSS client ->
client.shutdown();
}



Suite.metaClass.getOssAllDirSizeWithPrefix = { OSS client, String bucketName, String prefix="" ->
try {
if (!client.doesBucketExist(bucketName)) {
logger.info("no bucket named ${bucketName} in ${endpoint}")
return
}

// Gets all object with specified marker by paging. Each page will have up to 100 entries.
logger.info("List all objects with prefix:");
ObjectListing objectListing = null;
do {
// By default, list 100 files or directories at a time
ListObjectsRequest request = new ListObjectsRequest(bucketName).withDelimiter("/").withPrefix(prefix);
if (objectListing != null) {
request.setMarker(objectListing.getNextMarker());
}
objectListing = client.listObjects(request);
List<String> folders = objectListing.getCommonPrefixes();
for (String folder : folders) {
logger.info(folder + " : " + (calculateFolderLength(client, bucketName, folder) / (1024 * 1024 * 1024)) + "GB");
}
List<OSSObjectSummary> sums = objectListing.getObjectSummaries();
for (OSSObjectSummary s : sums) {
logger.info(s.getKey() + " : " + (s.getSize() / (1024 * 1024 * 1024)) + "GB");
}
} while (objectListing.isTruncated());

} catch (OSSException oe) {
logger.error("Caught an OSSException, which means your request made it to OSS, "
+ "but was rejected with an error response for some reason.");
logger.error("Error Message: " + oe.getErrorMessage());
logger.error("Error Code: " + oe.getErrorCode());
logger.error("Request ID: " + oe.getRequestId());
logger.error("Host ID: " + oe.getHostId());
} catch (ClientException ce) {
logger.error("Caught an ClientException, which means the client encountered "
+ "a serious internal problem while trying to communicate with OSS, "
+ "such as not being able to access the network.");
logger.error("Error Message: " + ce.getMessage());
} finally {
/*
* Do not forget to shut down the client finally to release all allocated resources.
*/
//client.shutdown();
logger.info("Done!")
}
}



2 changes: 2 additions & 0 deletions regression-test/suites/show_data/ddl/lineitem_delete.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DELETE from ${table} where L_ORDERKEY >= 0;

25 changes: 25 additions & 0 deletions regression-test/suites/show_data/ddl/lineitem_dup.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
CREATE TABLE IF NOT EXISTS lineitem_mow (
L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL
)
DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER)
DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
)


25 changes: 25 additions & 0 deletions regression-test/suites/show_data/ddl/lineitem_mow.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
CREATE TABLE IF NOT EXISTS lineitem_mow (
L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL
)
UNIQUE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER)
DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
)


Loading

0 comments on commit 02e7f0d

Please sign in to comment.