Skip to content

Commit

Permalink
Initial Implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
TomHodson committed Jul 31, 2024
1 parent baf3b3b commit 15bc01f
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/odc/codec/CodecOptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ int CodecOptimizer::setOptimalCodecs(core::MetaData& columns)
n = col.coder().numStrings();
if (n == 1 && col.coder().dataSizeDoubles() == 1)
codec = "constant_string";
else if (n == 1)
codec = "long_constant_string";
else if(n < 256)
codec = "int8_string";
else if(n < 65536)
Expand Down
41 changes: 40 additions & 1 deletion src/odc/codec/Integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ class CodecIntegerOffset : public BaseCodecInteger<ByteOrder, ValueType> {
}
};


//----------------------------------------------------------------------------------------------------------------------

template <typename ByteOrder,
Expand Down Expand Up @@ -185,6 +184,46 @@ struct CodecInt32 : public CodecIntegerDirect<ByteOrder, ValueType, int32_t, Cod
using CodecIntegerDirect<ByteOrder, ValueType, int32_t, CodecInt32<ByteOrder, ValueType>>::CodecIntegerDirect;
};

//----------------------------------------------------------------------------------------------------------------------
//This is a codec with the same functionality as CodecInt8/16/32
// Designed so that elsewhere we can do
// template<typename ByteOrder>
// struct CodecLongConstantString : public IntStringCodecBase<ByteOrder, CodecZeroBitInteger<ByteOrder>>
// CodecZeroBitInteger doesn't encode any data, it just asserts that the provided indices are all zero on encoding
// and on decoding it just returns zero, which always returns the single string saved in the header by IntStringCodecBase

template <typename ByteOrder, typename ValueType, class DerivedCodec>
class CodecZeroBitInteger: public BaseCodecInteger<ByteOrder, ValueType> {

public: // methods

CodecZeroBitInteger(api::ColumnType type) : BaseCodecInteger<ByteOrder, ValueType>(type, DerivedCodec::codec_name()) {}
~CodecZeroBitInteger() override {}

private: // methods

unsigned char* encode(unsigned char* p, const double& d) override {
static_assert(sizeof(ValueType) == sizeof(d), "unsafe casting check");
// Static cast to uint64_t ensures that the narrowing cast doesn't trigger the
// undefined behaviour sanitizer. It is correct that we can generate an integer
// that is too big, and throw it away, if invalid data gets here.
const ValueType& val(reinterpret_cast<const ValueType&>(d));
ASSERT(val == 0);
return p; // no data was encoded so this pointer is unchanged
}

void decode(double* out) override {
static_assert(sizeof(ValueType) == sizeof(out), "unsafe casting check");

ValueType* val_out = reinterpret_cast<ValueType*>(out);
(*val_out) = 0;
}

void skip() override {
// no data was encoded so we don't need to skip anything
}
};

//----------------------------------------------------------------------------------------------------------------------

} // namespace codec
Expand Down
1 change: 1 addition & 0 deletions src/odc/codec/String.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace codec {

namespace {
core::CodecBuilder<CodecChars> charsBuilder;
core::CodecBuilder<CodecLongConstantString> LongConstantStringBuilder;
core::CodecBuilder<CodecInt8String> int8StringBuilder;
core::CodecBuilder<CodecInt16String> int16StringBuilder;
}
Expand Down
8 changes: 8 additions & 0 deletions src/odc/codec/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,14 @@ class IntStringCodecBase : public CodecChars<ByteOrder> {

//----------------------------------------------------------------------------------------------------------------------

// This class uses the same machinery as CodecInt8String/CodecInt16String etc but it only ever stores one string in the header
// CodecZeroBitInteger just doesn't encode any data and always returns index 0 when decoding.
template<typename ByteOrder>
struct CodecLongConstantString : public IntStringCodecBase<ByteOrder, CodecZeroBitInteger<ByteOrder, int64_t, CodecLongConstantString<ByteOrder>>> {
constexpr static const char* codec_name() { return "long_constant_string"; }
CodecLongConstantString(api::ColumnType type) : IntStringCodecBase<ByteOrder, CodecZeroBitInteger<ByteOrder, int64_t, CodecLongConstantString<ByteOrder>>>(type, codec_name()) {}
~CodecLongConstantString() override {}
};

template<typename ByteOrder>
struct CodecInt8String : public IntStringCodecBase<ByteOrder, CodecInt8<ByteOrder, int64_t>> {
Expand Down

0 comments on commit 15bc01f

Please sign in to comment.