Skip to content

Commit

Permalink
fix some compiler check
Browse files Browse the repository at this point in the history
  • Loading branch information
suxiaogang223 committed Dec 24, 2024
1 parent 3b5310c commit 863aa3d
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 73 deletions.
3 changes: 2 additions & 1 deletion be/src/util/string_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#include <fast_float/fast_float.h>
#include <fast_float/parse_number.h>
#include <glog/logging.h>
#include <stdlib.h>

#include <cstdlib>
// IWYU pragma: no_include <bits/std_abs.h>
#include <cmath> // IWYU pragma: keep
#include <cstdint>
Expand All @@ -40,6 +40,7 @@
#include "runtime/large_int_value.h"
#include "runtime/primitive_type.h"
#include "vec/common/int_exp.h"
#include "vec/common/string_utils/string_utils.h"
#include "vec/core/extended_types.h"
#include "vec/core/wide_integer.h"
#include "vec/data_types/data_type_decimal.h"
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public N
_push_false_to_nullmap(num);
}

void insert_null_elements(int num) {
void insert_null_elements(size_t num) {
get_nested_column().insert_many_defaults(num);
get_null_map_column().insert_many_vals(1, num);
_has_null = true;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ inline static NullMap revert_null_map(const NullMap* null_bytemap, size_t start,
}

res.resize(end - start);
auto* __restrict src_data = (*null_bytemap).data();
const auto* __restrict src_data = (*null_bytemap).data();
auto* __restrict res_data = res.data();
for (size_t i = 0; i < res.size(); ++i) {
res_data[i] = !src_data[i + start];
Expand Down
8 changes: 4 additions & 4 deletions be/src/vec/exec/format/arrow/arrow_stream_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ Status ArrowStreamReader::get_next_block(Block* block, size_t* read_rows, bool*

// convert arrow batch to block
auto columns = block->mutate_columns();
int batch_size = out_batches.size();
for (int i = 0; i < batch_size; i++) {
size_t batch_size = out_batches.size();
for (size_t i = 0; i < batch_size; i++) {
arrow::RecordBatch& batch = *out_batches[i];
int num_rows = batch.num_rows();
int num_columns = batch.num_columns();
auto num_rows = batch.num_rows();
auto num_columns = batch.num_columns();
for (int c = 0; c < num_columns; ++c) {
arrow::Array* column = batch.column(c).get();

Expand Down
20 changes: 9 additions & 11 deletions be/src/vec/exec/format/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <cstddef>
#include <map>
#include <memory>
#include <new>
#include <ostream>
#include <utility>

Expand All @@ -45,7 +44,6 @@
#include "runtime/types.h"
#include "util/string_util.h"
#include "util/utf8_check.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type_factory.hpp"
Expand Down Expand Up @@ -342,7 +340,7 @@ Status CsvReader::init_reader(bool is_load) {
(_state != nullptr && _state->trim_tailing_spaces_for_external_table_query());

_options.escape_char = _escape;
if (_params.file_attributes.text_params.collection_delimiter.size() == 0) {
if (_params.file_attributes.text_params.collection_delimiter.empty()) {
switch (_text_serde_type) {
case TTextSerdeType::JSON_TEXT_SERDE:
_options.collection_delim = ',';
Expand All @@ -356,7 +354,7 @@ Status CsvReader::init_reader(bool is_load) {
} else {
_options.collection_delim = _params.file_attributes.text_params.collection_delimiter[0];
}
if (_params.file_attributes.text_params.mapkv_delimiter.size() == 0) {
if (_params.file_attributes.text_params.mapkv_delimiter.empty()) {
switch (_text_serde_type) {
case TTextSerdeType::JSON_TEXT_SERDE:
_options.map_key_delim = ':';
Expand Down Expand Up @@ -476,7 +474,7 @@ Status CsvReader::init_reader(bool is_load) {
} else {
// For load task, the column order is same as file column order
int i = 0;
for (auto& desc [[maybe_unused]] : _file_slot_descs) {
for (const auto& desc [[maybe_unused]] : _file_slot_descs) {
_col_idxs.push_back(i++);
}
}
Expand Down Expand Up @@ -576,7 +574,7 @@ Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {

Status CsvReader::get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
std::unordered_set<std::string>* missing_cols) {
for (auto& slot : _file_slot_descs) {
for (const auto& slot : _file_slot_descs) {
name_to_type->emplace(slot->col_name(), slot->type());
}
return Status::OK();
Expand Down Expand Up @@ -796,7 +794,7 @@ void CsvReader::_split_line(const Slice& line) {
Status CsvReader::_check_array_format(std::vector<Slice>& split_values, bool* is_success) {
// if not the array format, filter this line and return error url
for (int j = 0; j < _file_slot_descs.size(); ++j) {
auto slot_desc = _file_slot_descs[j];
auto* slot_desc = _file_slot_descs[j];
if (!slot_desc->is_materialized()) {
continue;
}
Expand Down Expand Up @@ -885,7 +883,7 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* is_parse_name) {
_not_trim_enclose = (!_trim_double_quotes && _enclose == '\"');
_options.converted_from_string = _trim_double_quotes;
_options.escape_char = _escape;
if (_params.file_attributes.text_params.collection_delimiter.size() == 0) {
if (_params.file_attributes.text_params.collection_delimiter.empty()) {
switch (_text_serde_type) {
case TTextSerdeType::JSON_TEXT_SERDE:
_options.collection_delim = ',';
Expand All @@ -899,7 +897,7 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* is_parse_name) {
} else {
_options.collection_delim = _params.file_attributes.text_params.collection_delimiter[0];
}
if (_params.file_attributes.text_params.mapkv_delimiter.size() == 0) {
if (_params.file_attributes.text_params.mapkv_delimiter.empty()) {
switch (_text_serde_type) {
case TTextSerdeType::JSON_TEXT_SERDE:
_options.collection_delim = ':';
Expand Down Expand Up @@ -977,8 +975,8 @@ Status CsvReader::_parse_col_names(std::vector<std::string>* col_names) {
}
ptr = _remove_bom(ptr, size);
_split_line(Slice(ptr, size));
for (size_t idx = 0; idx < _split_values.size(); ++idx) {
col_names->emplace_back(_split_values[idx].to_string());
for (auto _split_value : _split_values) {
col_names->emplace_back(_split_value.to_string());
}
return Status::OK();
}
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/format/csv/csv_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ class CsvReader : public GenericReader {
vectorized::DataTypeSerDeSPtrs _serdes;
vectorized::DataTypeSerDe::FormatOptions _options;

int _value_separator_length;
int _line_delimiter_length;
size_t _value_separator_length;
size_t _line_delimiter_length;
bool _trim_double_quotes = false;
bool _trim_tailing_spaces = false;
// `should_not_trim` is to manage the case that: user do not expect to trim double quotes but enclose is double quotes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ Status NewPlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool
return Status::OK();
}
_line_reader_ctx->refresh();
int found_line_delimiter = 0;
size_t found_line_delimiter = 0;
size_t offset = 0;
bool stream_end = true;
while (!done()) {
Expand Down
16 changes: 7 additions & 9 deletions be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
// under the License.

#pragma once
#include <stdint.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <memory>
#include <string>
#include <utility>
Expand All @@ -29,7 +27,6 @@
#include "exec/line_reader.h"
#include "io/fs/file_reader_writer_fwd.h"
#include "util/runtime_profile.h"
#include "util/slice.h"

namespace doris {
#include "common/compile_check_begin.h"
Expand Down Expand Up @@ -61,9 +58,9 @@ class BaseTextLineReaderContext : public TextLineReaderContextIf {
using FindDelimiterFunc = const uint8_t* (*)(const uint8_t*, size_t, const char*, size_t);

public:
explicit BaseTextLineReaderContext(const std::string& line_delimiter_,
explicit BaseTextLineReaderContext(std::string line_delimiter_,
const size_t line_delimiter_len_, const bool keep_cr_)
: line_delimiter(line_delimiter_),
: line_delimiter(std::move(line_delimiter_)),
line_delimiter_len(line_delimiter_len_),
keep_cr(keep_cr_) {
use_memmem = line_delimiter_len != 1 || line_delimiter != "\n" || keep_cr;
Expand Down Expand Up @@ -114,9 +111,10 @@ class BaseTextLineReaderContext : public TextLineReaderContextIf {
int mask_carriage_return = _mm256_movemask_epi8(cmp_carriage_return);

if (mask_newline != 0 || mask_carriage_return != 0) {
int pos_lf = (mask_newline != 0) ? i + __builtin_ctz(mask_newline) : INT32_MAX;
int pos_cr = (mask_carriage_return != 0) ? i + __builtin_ctz(mask_carriage_return)
: INT32_MAX;
size_t pos_lf = (mask_newline != 0) ? i + __builtin_ctz(mask_newline) : INT32_MAX;
size_t pos_cr = (mask_carriage_return != 0)
? i + __builtin_ctz(mask_carriage_return)
: INT32_MAX;
if (pos_lf < pos_cr) {
return start + pos_lf;
} else if (pos_cr < pos_lf) {
Expand Down
39 changes: 17 additions & 22 deletions be/src/vec/exec/format/json/new_json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,18 @@
#include <gen_cpp/PlanNodes_types.h>
#include <gen_cpp/Types_types.h>
#include <glog/logging.h>
#include <inttypes.h>
#include <rapidjson/error/en.h>
#include <rapidjson/reader.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <simdjson/simdjson.h> // IWYU pragma: keep
#include <stdio.h>
#include <string.h>

#include <algorithm>
#include <cinttypes>
#include <cstdio>
#include <cstring>
#include <map>
#include <memory>
#include <ostream>
#include <string_view>
#include <utility>

Expand All @@ -50,22 +49,17 @@
#include "runtime/descriptors.h"
#include "runtime/runtime_state.h"
#include "runtime/types.h"
#include "util/defer_op.h"
#include "util/slice.h"
#include "util/uid_util.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_map.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_struct.h"
#include "vec/common/assert_cast.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/exec/format/file_reader/new_plain_text_line_reader.h"
#include "vec/exec/scan/vscanner.h"
#include "vec/json/simd_json_parser.h"

namespace doris::io {
#include "common/compile_check_begin.h"
Expand Down Expand Up @@ -808,7 +802,8 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
}
} else {
it = objectValue.FindMember(
rapidjson::Value(slot_desc->col_name().c_str(), slot_desc->col_name().size()));
rapidjson::Value(slot_desc->col_name().c_str(),
cast_set<rapidjson::SizeType>(slot_desc->col_name().size())));
}

if (it != objectValue.MemberEnd()) {
Expand Down Expand Up @@ -978,7 +973,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator
const auto& struct_value = value->GetObject();

auto sub_serdes = data_serde->get_nested_serdes();
auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
auto* struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);

std::map<std::string, size_t> sub_col_name_to_idx;
for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) {
Expand All @@ -992,7 +987,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator
"Json file struct column `{}` subfield name isn't a String", column_name);
}

auto sub_key_char = sub.name.GetString();
const auto* sub_key_char = sub.name.GetString();
auto sub_key_length = sub.name.GetStringLength();

std::string sub_key(sub_key_char, sub_key_length);
Expand All @@ -1006,7 +1001,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator
}

for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) {
auto sub_value = sub_values[sub_col_idx];
const auto* sub_value = sub_values[sub_col_idx];

const auto& sub_col_type = type_desc.children[sub_col_idx];

Expand All @@ -1023,7 +1018,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator
}
const auto& object_value = value->GetObject();
auto sub_serdes = data_serde->get_nested_serdes();
auto map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
auto* map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);

for (const auto& member_value : object_value) {
RETURN_IF_ERROR(_write_data_to_column(
Expand Down Expand Up @@ -1151,7 +1146,7 @@ std::string NewJsonReader::_print_json_value(const rapidjson::Value& value) {
buffer.Clear();
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
value.Accept(writer);
return std::string(buffer.GetString());
return {buffer.GetString()};
}

Status NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, size_t* read_size) {
Expand Down Expand Up @@ -1614,8 +1609,8 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
"partial update, missing key column: {}",
slot_desc->col_name(), valid));
// remove this line in block
for (int i = 0; i < block.columns(); ++i) {
auto column = block.get_by_position(i).column->assume_mutable();
for (size_t index = 0; index < block.columns(); ++index) {
auto column = block.get_by_position(index).column->assume_mutable();
if (column->size() != cur_row_count) {
DCHECK(column->size() == cur_row_count + 1);
column->pop_back(1);
Expand Down Expand Up @@ -1702,7 +1697,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value&
auto sub_col_size = type_desc.children.size();
simdjson::ondemand::object struct_value = value.get_object();
auto sub_serdes = data_serde->get_nested_serdes();
auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
auto* struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);

std::map<std::string, size_t> sub_col_name_to_idx;
for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) {
Expand Down Expand Up @@ -1735,7 +1730,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value&

//fill missing subcolumn
for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; sub_col_idx++) {
if (has_value[sub_col_idx] == true) {
if (has_value[sub_col_idx]) {
continue;
}

Expand Down Expand Up @@ -1764,10 +1759,10 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value&
auto f = [](std::string_view key_view, const TypeDescriptor& type_desc,
vectorized::IColumn* column_ptr, DataTypeSerDeSPtr serde,
vectorized::DataTypeSerDe::FormatOptions serde_options, bool* valid) {
auto data_column_ptr = column_ptr;
auto* data_column_ptr = column_ptr;
auto data_serde = serde;
if (column_ptr->is_nullable()) {
auto nullable_column = static_cast<ColumnNullable*>(column_ptr);
auto* nullable_column = static_cast<ColumnNullable*>(column_ptr);

nullable_column->get_null_map_data().push_back(0);
data_column_ptr = nullable_column->get_nested_column().get_ptr().get();
Expand Down Expand Up @@ -1804,7 +1799,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value&
simdjson::ondemand::array array_value = value.get_array();

auto sub_serdes = data_serde->get_nested_serdes();
auto array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
auto *array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);

int field_count = 0;
for (simdjson::ondemand::value sub_value : array_value) {
Expand Down
10 changes: 3 additions & 7 deletions be/src/vec/exec/format/json/new_json_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
#include <rapidjson/rapidjson.h>
#include <simdjson/common_defs.h>
#include <simdjson/simdjson.h> // IWYU pragma: keep
#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <string>
Expand All @@ -42,8 +40,6 @@
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
#include "vec/exec/format/generic_reader.h"
#include "vec/json/json_parser.h"
#include "vec/json/simd_json_parser.h"

namespace simdjson {
namespace fallback {
Expand Down Expand Up @@ -230,10 +226,10 @@ class NewJsonReader : public GenericReader {
bool _skip_first_line;

std::string _line_delimiter;
int _line_delimiter_length;
size_t _line_delimiter_length;

int _next_row;
int _total_rows;
uint32_t _next_row;
size_t _total_rows;

std::string _jsonpaths;
std::string _json_root;
Expand Down
Loading

0 comments on commit 863aa3d

Please sign in to comment.