From 15bc01f622d0eaf720c5ae4311cf7595e19c6288 Mon Sep 17 00:00:00 2001 From: Tom Date: Wed, 31 Jul 2024 11:37:42 +0100 Subject: [PATCH] Initial Implementation --- src/odc/codec/CodecOptimizer.h | 2 ++ src/odc/codec/Integer.h | 41 +++++++++++++++++++++++++++++++++- src/odc/codec/String.cc | 1 + src/odc/codec/String.h | 8 +++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/odc/codec/CodecOptimizer.h b/src/odc/codec/CodecOptimizer.h index 48ea6c9f..3e2d257e 100644 --- a/src/odc/codec/CodecOptimizer.h +++ b/src/odc/codec/CodecOptimizer.h @@ -96,6 +96,8 @@ int CodecOptimizer::setOptimalCodecs(core::MetaData& columns) n = col.coder().numStrings(); if (n == 1 && col.coder().dataSizeDoubles() == 1) codec = "constant_string"; + else if (n == 1) + codec = "long_constant_string"; else if(n < 256) codec = "int8_string"; else if(n < 65536) diff --git a/src/odc/codec/Integer.h b/src/odc/codec/Integer.h index 90bc4a99..ca48c82f 100644 --- a/src/odc/codec/Integer.h +++ b/src/odc/codec/Integer.h @@ -121,7 +121,6 @@ class CodecIntegerOffset : public BaseCodecInteger { } }; - //---------------------------------------------------------------------------------------------------------------------- template >::CodecIntegerDirect; }; +//---------------------------------------------------------------------------------------------------------------------- +//This is a codec with the same functionality as CodecInt8/16/32 +// Designed so that elsewhere we can do +// template +// struct CodecLongConstantString : public IntStringCodecBase> +// CodecZeroBitInteger doesn't encode any data, it just asserts that the provided indices are all zero on encoding +// and on decoding it just returns zero, which always returns the single string saved in the header by IntStringCodecBase + +template +class CodecZeroBitInteger: public BaseCodecInteger { + +public: // methods + + CodecZeroBitInteger(api::ColumnType type) : BaseCodecInteger(type, DerivedCodec::codec_name()) {} + ~CodecZeroBitInteger() override {} + +private: // methods + + unsigned char* encode(unsigned char* p, const double& d) override { + static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check"); + // Static cast to uint64_t ensures that the narrowing cast doesn't trigger the + // undefined behaviour sanitizer. It is correct that we can generate an integer + // that is too big, and throw it away, if invalid data gets here. + const ValueType& val(reinterpret_cast(d)); + ASSERT(val == 0); + return p; // no data was encoded so this pointer is unchanged + } + + void decode(double* out) override { + static_assert(sizeof(ValueType) == sizeof(out), "unsafe casting check"); + + ValueType* val_out = reinterpret_cast(out); + (*val_out) = 0; + } + + void skip() override { + // no data was encoded so we don't need to skip anything + } +}; + //---------------------------------------------------------------------------------------------------------------------- } // namespace codec diff --git a/src/odc/codec/String.cc b/src/odc/codec/String.cc index 0082ba69..17112b56 100644 --- a/src/odc/codec/String.cc +++ b/src/odc/codec/String.cc @@ -21,6 +21,7 @@ namespace codec { namespace { core::CodecBuilder charsBuilder; + core::CodecBuilder LongConstantStringBuilder; core::CodecBuilder int8StringBuilder; core::CodecBuilder int16StringBuilder; } diff --git a/src/odc/codec/String.h b/src/odc/codec/String.h index c17584f8..6accb7d4 100644 --- a/src/odc/codec/String.h +++ b/src/odc/codec/String.h @@ -210,6 +210,14 @@ class IntStringCodecBase : public CodecChars { //---------------------------------------------------------------------------------------------------------------------- +// This class uses the same machinery as CodecInt8String/CodecInt16String etc but it only ever stores one string in the header +// CodecZeroBitInteger just doesn't encode any data and always returns index 0 when decoding. +template +struct CodecLongConstantString : public IntStringCodecBase>> { + constexpr static const char* codec_name() { return "long_constant_string"; } + CodecLongConstantString(api::ColumnType type) : IntStringCodecBase>>(type, codec_name()) {} + ~CodecLongConstantString() override {} +}; template struct CodecInt8String : public IntStringCodecBase> {