diff --git a/include/z5/io/io_n5.hxx b/include/z5/io/io_n5.hxx index c807e1ae..ba920cf3 100644 --- a/include/z5/io/io_n5.hxx +++ b/include/z5/io/io_n5.hxx @@ -1,6 +1,7 @@ #pragma once #include +#include #ifndef BOOST_FILESYSTEM_NO_DEPERECATED #define BOOST_FILESYSTEM_NO_DEPERECATED @@ -27,8 +28,9 @@ namespace io { public: + // TODO syncWrite_ should be set from options and not be hard-coded ChunkIoN5(const types::ShapeType & shape, const types::ShapeType & chunkShape) : - shape_(shape), chunkShape_(chunkShape){ + syncWrite_(true), shape_(shape), chunkShape_(chunkShape){ } inline bool read(const handle::Chunk & chunk, std::vector & data) const { @@ -62,13 +64,40 @@ namespace io { inline void write(const handle::Chunk & chunk, const char * data, const std::size_t fileSize) const { // create the parent folder + // TODO can mkdirs create race conditions ? chunk.createTopDir(); + const auto & path = chunk.path(); + // this might speed up the I/O by decoupling C++ buffers from C buffers std::ios_base::sync_with_stdio(false); - fs::ofstream file(chunk.path(), std::ios::binary); + fs::ofstream file(path, std::ios::binary); + + // check if we synchronize chunk access + if(syncWrite_) { + // F_LOCK acquires a lock on the file + // if the file is already locked, it waits until + // the lock is released + const int fd = util::fileno(file); + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + if(lockf(fd, F_LOCK, 0) != 0) { + throw std::runtime_error("Acquiring file lock failed"); + } + } + // write the header writeHeader(chunk, file); file.write(data, fileSize); + + // the lock should be automatically closed when we clsoe the file + // check if we synchronize chunk access + // if(syncWrite_) { + // // release the file lock + // if(lockf(fd, F_ULOCK, 0) != 0) { + // throw std::runtime_error("Releasing file lock failed"); + // } + // } file.close(); } @@ -404,6 +433,7 @@ namespace io { } // members + bool syncWrite_; const types::ShapeType & shape_; const types::ShapeType & chunkShape_; diff --git a/include/z5/util/util.hxx b/include/z5/util/util.hxx index 1192ea22..58b54eba 100644 --- a/include/z5/util/util.hxx +++ b/include/z5/util/util.hxx @@ -4,6 +4,18 @@ #include "z5/types/types.hxx" +#include // declaration of ::fileno +#include // for basic_filebuf template +#include + +#if defined(__GLIBCXX__) || (defined(__GLIBCPP__) && __GLIBCPP__>=20020514) // GCC >= 3.1.0 +# include +#endif +#if defined(__GLIBCXX__) // GCC >= 3.4.0 +# include +#endif + + namespace z5 { namespace util { @@ -81,5 +93,148 @@ namespace util { } val = ret; } + + // copied from: + // https://ginac.de/~kreckel/fileno/ + + //! Similar to fileno(3), but taking a C++ stream as argument instead of a + //! FILE*. Note that there is no way for the library to track what you do with + //! the descriptor, so be careful. + //! \return The integer file descriptor associated with the stream, or -1 if + //! that stream is invalid. In the latter case, for the sake of keeping the + //! code as similar to fileno(3), errno is set to EBADF. + //! \see The upstream page at + //! https://www.ginac.de/~kreckel/fileno/ of this code provides more + //! detailed information. + template + inline int + fileno_hack(const std::basic_ios& stream) + { + // Some C++ runtime libraries shipped with ancient GCC, Sun Pro, + // Sun WS/Forte 5/6, Compaq C++ supported non-standard file descriptor + // access basic_filebuf<>::fd(). Alas, starting from GCC 3.1, the GNU C++ + // runtime removes all non-standard std::filebuf methods and provides an + // extension template class __gnu_cxx::stdio_filebuf on all systems where + // that appears to make sense (i.e. at least all Unix systems). Starting + // from GCC 3.4, there is an __gnu_cxx::stdio_sync_filebuf, in addition. + // Sorry, darling, I must get brutal to fetch the darn file descriptor! + // Please complain to your compiler/libstdc++ vendor... + #if defined(__GLIBCXX__) || defined(__GLIBCPP__) + // OK, stop reading here, because it's getting obscene. Cross fingers! + # if defined(__GLIBCXX__) // >= GCC 3.4.0 + // This applies to cin, cout and cerr when not synced with stdio: + typedef __gnu_cxx::stdio_filebuf unix_filebuf_t; + unix_filebuf_t* fbuf = dynamic_cast(stream.rdbuf()); + if (fbuf != NULL) { + return fbuf->fd(); + } + + // This applies to filestreams: + typedef std::basic_filebuf filebuf_t; + filebuf_t* bbuf = dynamic_cast(stream.rdbuf()); + if (bbuf != NULL) { + // This subclass is only there for accessing the FILE*. Ouuwww, sucks! + struct my_filebuf : public std::basic_filebuf { + int fd() { return this->_M_file.fd(); } + }; + return static_cast(bbuf)->fd(); + } + + // This applies to cin, cout and cerr when synced with stdio: + typedef __gnu_cxx::stdio_sync_filebuf sync_filebuf_t; + sync_filebuf_t* sbuf = dynamic_cast(stream.rdbuf()); + if (sbuf != NULL) { + # if (__GLIBCXX__<20040906) // GCC < 3.4.2 + // This subclass is only there for accessing the FILE*. + // See GCC PR#14600 and PR#16411. + struct my_filebuf : public sync_filebuf_t { + my_filebuf(); // Dummy ctor keeps the compiler happy. + // Note: stdio_sync_filebuf has a FILE* as its first (but private) + // member variable. However, it is derived from basic_streambuf<> + // and the FILE* is the first non-inherited member variable. + FILE* c_file() { + return *(FILE**)((char*)this + sizeof(std::basic_streambuf)); + } + }; + return ::fileno(static_cast(sbuf)->c_file()); + # else + return ::fileno(sbuf->file()); + # endif + } + # else // GCC < 3.4.0 used __GLIBCPP__ + # if (__GLIBCPP__>=20020514) // GCC >= 3.1.0 + // This applies to cin, cout and cerr: + typedef __gnu_cxx::stdio_filebuf unix_filebuf_t; + unix_filebuf_t* buf = dynamic_cast(stream.rdbuf()); + if (buf != NULL) { + return buf->fd(); + } + + // This applies to filestreams: + typedef std::basic_filebuf filebuf_t; + filebuf_t* bbuf = dynamic_cast(stream.rdbuf()); + if (bbuf != NULL) { + // This subclass is only there for accessing the FILE*. Ouuwww, sucks! + struct my_filebuf : public std::basic_filebuf { + // Note: _M_file is of type __basic_file which has a + // FILE* as its first (but private) member variable. + FILE* c_file() { return *(FILE**)(&this->_M_file); } + }; + FILE* c_file = static_cast(bbuf)->c_file(); + if (c_file != NULL) { // Could be NULL for failed ifstreams. + return ::fileno(c_file); + } + } + # else // GCC 3.0.x + typedef std::basic_filebuf filebuf_t; + filebuf_t* fbuf = dynamic_cast(stream.rdbuf()); + if (fbuf != NULL) { + struct my_filebuf : public filebuf_t { + // Note: basic_filebuf has a __basic_file* as + // its first (but private) member variable. Since it is derived + // from basic_streambuf we can guess its offset. + // __basic_file in turn has a FILE* as its first (but + // private) member variable. Get it by brute force. Oh, geez! + FILE* c_file() { + std::__basic_file* ptr_M_file = *(std::__basic_file**)((char*)this + sizeof(std::basic_streambuf)); + # if _GLIBCPP_BASIC_FILE_INHERITANCE + // __basic_file inherits from __basic_file_base + return *(FILE**)((char*)ptr_M_file + sizeof(std::__basic_file_base)); + # else + // __basic_file is base class, but with vptr. + return *(FILE**)((char*)ptr_M_file + sizeof(void*)); + # endif + } + }; + return ::fileno(static_cast(fbuf)->c_file()); + } + # endif + # endif + #else + # error "Does anybody know how to fetch the bloody file descriptor?" + return stream.rdbuf()->fd(); // Maybe a good start? + #endif + errno = EBADF; + return -1; + } + + //! 8-Bit character instantiation: fileno(ios). + // template <> + inline int fileno(const std::ios& stream) + { + return fileno_hack(stream); + } + + /* + #if !(defined(__GLIBCXX__) || defined(__GLIBCPP__)) || (defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCXX_USE_WCHAR_T)) + //! Wide character instantiation: fileno(wios). + template <> + int fileno(const std::wios& stream) + { + return fileno_hack(stream); + } + #endif + */ + } } diff --git a/src/python/module/z5py/dataset.py b/src/python/module/z5py/dataset.py index 23a43fcb..70ba6d6a 100644 --- a/src/python/module/z5py/dataset.py +++ b/src/python/module/z5py/dataset.py @@ -248,8 +248,7 @@ def _create_dataset(cls, path, shape, dtype, compression=None, fillvalue=0, n_threads=1, compression_options={}, - is_zarr=True, - mode=None): + is_zarr=True, mode=None): # check if this dataset already exists if os.path.exists(path): raise RuntimeError("Cannot create dataset (name already exists)")