diff --git a/README.md b/README.md index d48f31e..bd271ef 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Conversely, Arrow is an in-memory format meant for direct and efficient use for ### Requirements - kdb+ ≥ 3.5 64-bit (Linux/MacOS/Windows) -- Apache Arrow = 9.0.0 (or ≥ 2.0.0 if building `arrowkdb` from source) +- Apache Arrow = 9.0.0 (or ≥ 6.0.0 if building `arrowkdb` from source) - C++14 or later - CMake ≥ 3.1.3 diff --git a/docs/reference.md b/docs/reference.md index e150c04..d33c10f 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -156,6 +156,11 @@ These functions are exposed within the `.arrowkdb` namespace, allowing users to kdb+ list [pq.readParquetToTable](#pqreadparquettotable) Read an Arrow table from a Parquet file and convert to a kdb+ table + [pq.readParquetNumRowGroups](#pqreadparquetnumrowgroups) Read the number of row groups used by a Parquet file + [pq.readParquetRowGroups](#pqreadparquetrowgroups) Read a set of row groups from a Parquet file into an Arrow + table then convert to a kdb+ mixed list of array data + [pq.readParquetRowGroupsToTable](#pqreadparquetrowgroupstotable) Read a set of row groups from a Parquet file into an Arrow + table then convert to a kdb+ table [Arrow IPC files](#arrow-ipc-files) [ipc.writeArrow](#ipcwritearrow) Convert a kdb+ mixed list of array data to an Arrow table diff --git a/src/GenericStore.h b/src/GenericStore.h index 0bbd6e2..4e15d21 100644 --- a/src/GenericStore.h +++ b/src/GenericStore.h @@ -55,6 +55,29 @@ class GenericStore return 0; } + /** + * @brief Adds an arrow object to the lookup maps. If an existing equal + * object is already present it will return the identifier for that instead. + * This avoid polluting the store with multiple equal objects. + * + * @param value Arrow object to add + * @return Identifier for that object + */ + long AddInternal(T value) + { + if (auto equal = FindEqual(value)) + return equal; + + // Add forward lookup: long > value + long value_id = ++counter; + forward_lookup[value_id] = value; + + // Add reverse lookup: value > long + reverse_lookup[value] = value_id; + + return value_id; + } + public: /** * @brief Returns the singlton instance, constructing it not already existing @@ -81,17 +104,7 @@ class GenericStore // Get write lock std::unique_lock lock(mutex); - if (auto equal = FindEqual(value)) - return equal; - - // Add forward lookup: long > value - long value_id = ++counter; - forward_lookup[value_id] = value; - - // Add reverse lookup: value > long - reverse_lookup[value] = value_id; - - return value_id; + return AddInternal(value); } /** @@ -147,15 +160,16 @@ class GenericStore */ long ReverseFind(T value) { - // Get read lock - std::shared_lock lock(mutex); + // Get write lock + std::unique_lock lock(mutex); // Reverse lookup is only used internally by the interface so insert the // object if it's not already present. This avoids having to add this logic // into all the calling functions. auto lookup = reverse_lookup.find(value); - if (lookup == reverse_lookup.end()) - return Add(value); + if (lookup == reverse_lookup.end()) { + return AddInternal(value); + } else return lookup->second; }