Skip to content

Commit

Permalink
[feat][sdk] Support langchain expr encode
Browse files Browse the repository at this point in the history
  • Loading branch information
wchuande authored and ketor committed Apr 3, 2024
1 parent e7b1200 commit f56df68
Show file tree
Hide file tree
Showing 23 changed files with 1,591 additions and 15 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,6 @@
[submodule "contrib/hdf5"]
path = contrib/hdf5
url = https://github.com/HDFGroup/hdf5.git
[submodule "contrib/nlohmann-json"]
path = contrib/nlohmann-json
url = https://github.com/nlohmann/json.git
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ endif()
if(THIRD_PARTY_BUILD_TYPE MATCHES "Debug")
set(CMAKE_STATIC_LIBRARY_SUFFIX "d.a")
endif()
SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0
LOG_UPDATE 1
LOG_CONFIGURE 0
LOG_BUILD 0
LOG_TEST 1
LOG_INSTALL 0)

if (CMAKE_CXX_STANDARD EQUAL 23)
set(CMAKE_CXX_STANDARD 23)
Expand Down Expand Up @@ -202,6 +209,7 @@ include(rocksdb)
include(bdb)
include(brpc)
include(braft)
include(nlohmann)

if(BUILD_BENCHMARK STREQUAL "ON")
include(hdf5)
Expand Down Expand Up @@ -271,6 +279,7 @@ include_directories(${GFLAGS_INCLUDE_DIR})
include_directories(${ROCKSDB_INCLUDE_DIR})
include_directories(${BDB_INCLUDE_DIR})
include_directories(${RAPIDJSON_INCLUDE_DIR})
include_directories(${NLOHMANN_JSON_INCLUDE_DIR})
if (ENABLE_XDPROCKS)
include_directories(${XDPROCKS_PATH}/include)
endif()
Expand Down Expand Up @@ -353,6 +362,7 @@ set(DEPEND_LIBS
libbacktrace
bdb
rapidjson
nlohmann-json
)

if(WITH_MKL)
Expand Down
44 changes: 44 additions & 0 deletions cmake/nlohmann.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

INCLUDE(ExternalProject)
message(STATUS "Include nlohmann json...")

SET(NLOHMANN_JSON_SOURCES_DIR ${CMAKE_SOURCE_DIR}/contrib/nlohmann-json)
SET(NLOHMANN_JSON_BINARY_DIR ${THIRD_PARTY_PATH}/build/nlohmann-json)
SET(NLOHMANN_JSON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nlohmann-json)
SET(NLOHMANN_JSON_INCLUDE_DIR "${NLOHMANN_JSON_INSTALL_DIR}/include" CACHE PATH "nlohmann_json include directory." FORCE)

ExternalProject_Add(
nlohmann-json
${EXTERNAL_PROJECT_LOG_ARGS}

SOURCE_DIR ${NLOHMANN_JSON_SOURCES_DIR}
BINARY_DIR ${NLOHMANN_JSON_BINARY_DIR}
PREFIX ${NLOHMANN_JSON_BINARY_DIR}

UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${NLOHMANN_JSON_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DJSON_BuildTests=OFF
-DJSON_MultipleHeaders=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
)

# ADD_LIBRARY(nlohmann-json)
# ADD_DEPENDENCIES(nlohmann-json extern_nlohmann_json)
14 changes: 1 addition & 13 deletions cmake/rapidjson.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,11 @@ SET(RAPIDJSON_SOURCES_DIR ${CMAKE_SOURCE_DIR}/contrib/rapidjson)
SET(RAPIDJSON_BINARY_DIR ${THIRD_PARTY_PATH}/build/rapidjson)
SET(RAPIDJSON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rapidjson)
SET(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_INSTALL_DIR}/include" CACHE PATH "rapidjson include directory." FORCE)
SET(RAPIDJSON_LIBRARIES "${RAPIDJSON_INSTALL_DIR}/lib/librapidjson.a" CACHE FILEPATH "rapidjson library." FORCE)


ExternalProject_Add(
extern_rapidjson
rapidjson
${EXTERNAL_PROJECT_LOG_ARGS}

DEPENDS gtest

SOURCE_DIR ${RAPIDJSON_SOURCES_DIR}
BINARY_DIR ${RAPIDJSON_BINARY_DIR}
PREFIX ${RAPIDJSON_BINARY_DIR}
Expand All @@ -38,19 +34,11 @@ ExternalProject_Add(
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
# -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
# -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
# -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX=${RAPIDJSON_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
# -DWITH_CUSTOM_PREFIX=ON
-DBUILD_SHARED_LIBS=OFF
-DRAPIDJSON_BUILD_DOC=OFF
-DRAPIDJSON_BUILD_EXAMPLES=OFF
-DRAPIDJSON_BUILD_TESTS=
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
)

ADD_LIBRARY(rapidjson STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET rapidjson PROPERTY IMPORTED_LOCATION ${RAPIDJSON_LIBRARIES})
ADD_DEPENDENCIES(rapidjson extern_rapidjson)
1 change: 1 addition & 0 deletions contrib/nlohmann-json
Submodule nlohmann-json added at 9cca28
2 changes: 1 addition & 1 deletion src/example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ add_executable(sdk_vector_example
sdk_vector_example.cc)
target_link_libraries(sdk_vector_example
sdk
)
)
8 changes: 7 additions & 1 deletion src/sdk/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

add_library(sdk
admin_tool.cc
client_stub.cc
Expand Down Expand Up @@ -44,6 +45,11 @@ add_library(sdk
vector/vector_search_task.cc
utils/thread_pool_actuator.cc
common/param_config.cc
expression/coding.cc
expression/langchain_expr_encoder.cc
expression/langchain_expr_factory.cc
expression/langchain_expr.cc
expression/types.cc
# TODO: use libary
${PROJECT_SOURCE_DIR}/src/coordinator/coordinator_interaction.cc
${PROJECT_SOURCE_DIR}/src/common/role.cc
Expand Down Expand Up @@ -100,6 +106,6 @@ target_link_libraries(sdk
PRIVATE
$<TARGET_OBJECTS:PROTO_OBJS>
${SDK_LIBS}
)
)

add_dependencies(sdk ${DEPEND_LIBS})
80 changes: 80 additions & 0 deletions src/sdk/expression/coding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sdk/expression/coding.h"

#include <cstddef>
#include <cstring>
#include <vector>

#include "sdk/expression/encodes.h"

namespace dingodb {
namespace sdk {
namespace expression {

void EncodeFloat(float value, std::string* dst) {
std::uint32_t bits;
std::memcpy(&bits, &value, sizeof(float));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 24));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 16));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 8));
dst->append(sizeof(Byte), static_cast<Byte>(bits));
}

void EncodeDouble(double value, std::string* dst) {
std::uint64_t bits;
std::memcpy(&bits, &value, sizeof(double));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 56));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 48));

dst->append(sizeof(Byte), static_cast<Byte>(bits >> 40));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 32));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 24));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 16));
dst->append(sizeof(Byte), static_cast<Byte>(bits >> 8));
dst->append(sizeof(Byte), static_cast<Byte>(bits));
}

void EncodeString(const std::string& value, std::string* dst) {
uint32_t len = value.size();
EncodeVarint(len, dst);
dst->append(value.data(), len);
}

std::string BytesToHexString(const std::string& bytes) {
const char* hex_code = "0123456789ABCDEF";
std::string r;
r.reserve(bytes.length() * 2);
for (Byte b : bytes) {
r.push_back(hex_code[(b >> 4) & 0xF]);
r.push_back(hex_code[b & 0xF]);
}
return r;
}

std::string HexStringToBytes(const std::string& hex) {
std::string bytes;

for (unsigned int i = 0; i < hex.length(); i += 2) {
std::string byte_string = hex.substr(i, 2);
Byte byte = static_cast<Byte>(std::stoi(byte_string, nullptr, 16));
bytes.push_back(byte);
}
return bytes;
}

} // namespace expression
} // namespace sdk
} // namespace dingodb
52 changes: 52 additions & 0 deletions src/sdk/expression/coding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

// Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DINGODB_SDK_EXPRESSION_CODING_H_
#define DINGODB_SDK_EXPRESSION_CODING_H_

#include <cstdint>
#include <string>
#include <vector>

#include "sdk/expression/encodes.h"

namespace dingodb {
namespace sdk {
namespace expression {

template <typename T>
void EncodeVarint(T value, std::string* dst) {
while (value >= 0x80) {
dst->append(sizeof(Byte), static_cast<Byte>((value & 0x7F) | 0x80));
value >>= 7;
}
dst->append(sizeof(Byte), static_cast<Byte>(value));
}

void EncodeFloat(float value, std::string* dst);

void EncodeDouble(double value, std::string* dst);

void EncodeString(const std::string& value, std::string* dst);

std::string BytesToHexString(const std::string& bytes);

std::string HexStringToBytes(const std::string& hex);

} // namespace expression
} // namespace sdk
} // namespace dingodb

#endif // DINGODB_SDK_EXPRESSION_CODING_H_
63 changes: 63 additions & 0 deletions src/sdk/expression/encodes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef DINGODB_SDK_EXPRESSION_ENCODES_H_
#define DINGODB_SDK_EXPRESSION_ENCODES_H_

namespace dingodb {
namespace sdk {
namespace expression {

using Byte = unsigned char;

static const Byte FILTER = 0x71;

// operator
static const Byte NOT = 0x51;
static const Byte AND = 0x52;
static const Byte OR = 0x53;

// comarator
static const Byte EQ = 0x91;
static const Byte GE = 0x92;
static const Byte GT = 0x93;
static const Byte LE = 0x94;
static const Byte LT = 0x95;
static const Byte NE = 0x96;

// var
static const Byte VAR = 0x30;

// const
static const Byte NULL_PREFIX = 0x00;
static const Byte CONST = 0x10;
static const Byte CONST_N = 0x20;

// type
const Byte TYPE_NULL = 0x00;
const Byte TYPE_INT32 = 0x01;
const Byte TYPE_INT64 = 0x02;
const Byte TYPE_BOOL = 0x03;
const Byte TYPE_FLOAT = 0x04;
const Byte TYPE_DOUBLE = 0x05;
const Byte TYPE_DECIMAL = 0x06;
const Byte TYPE_STRING = 0x07;

static const Byte EOE = 0x00;

} // namespace expression
} // namespace sdk
} // namespace dingodb

#endif // DINGODB_SDK_EXPRESSION_ENCODES_H_
Loading

0 comments on commit f56df68

Please sign in to comment.