Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file locks to n5 chunk writing #65

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions include/z5/io/io_n5.hxx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <ios>
#include <unistd.h>

#ifndef BOOST_FILESYSTEM_NO_DEPERECATED
#define BOOST_FILESYSTEM_NO_DEPERECATED
Expand All @@ -27,8 +28,9 @@ namespace io {

public:

// TODO syncWrite_ should be set from options and not be hard-coded
ChunkIoN5(const types::ShapeType & shape, const types::ShapeType & chunkShape) :
shape_(shape), chunkShape_(chunkShape){
syncWrite_(true), shape_(shape), chunkShape_(chunkShape){
}

inline bool read(const handle::Chunk & chunk, std::vector<char> & data) const {
Expand Down Expand Up @@ -62,13 +64,40 @@ namespace io {

inline void write(const handle::Chunk & chunk, const char * data, const std::size_t fileSize) const {
// create the parent folder
// TODO can mkdirs create race conditions ?
chunk.createTopDir();
const auto & path = chunk.path();

// this might speed up the I/O by decoupling C++ buffers from C buffers
std::ios_base::sync_with_stdio(false);
fs::ofstream file(chunk.path(), std::ios::binary);
fs::ofstream file(path, std::ios::binary);

// check if we synchronize chunk access
if(syncWrite_) {
// F_LOCK acquires a lock on the file
// if the file is already locked, it waits until
// the lock is released
const int fd = util::fileno(file);
if(fd < 0) {
throw std::runtime_error("Invalid file descriptor");
}
if(lockf(fd, F_LOCK, 0) != 0) {
throw std::runtime_error("Acquiring file lock failed");
}
}

// write the header
writeHeader(chunk, file);
file.write(data, fileSize);

// the lock should be automatically closed when we clsoe the file
// check if we synchronize chunk access
// if(syncWrite_) {
// // release the file lock
// if(lockf(fd, F_ULOCK, 0) != 0) {
// throw std::runtime_error("Releasing file lock failed");
// }
// }
file.close();
}

Expand Down Expand Up @@ -404,6 +433,7 @@ namespace io {
}

// members
bool syncWrite_;
const types::ShapeType & shape_;
const types::ShapeType & chunkShape_;

Expand Down
155 changes: 155 additions & 0 deletions include/z5/util/util.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@

#include "z5/types/types.hxx"

#include <cstdio> // declaration of ::fileno
#include <fstream> // for basic_filebuf template
#include <cerrno>

#if defined(__GLIBCXX__) || (defined(__GLIBCPP__) && __GLIBCPP__>=20020514) // GCC >= 3.1.0
# include <ext/stdio_filebuf.h>
#endif
#if defined(__GLIBCXX__) // GCC >= 3.4.0
# include <ext/stdio_sync_filebuf.h>
#endif


namespace z5 {
namespace util {

Expand Down Expand Up @@ -81,5 +93,148 @@ namespace util {
}
val = ret;
}

// copied from:
// https://ginac.de/~kreckel/fileno/

//! Similar to fileno(3), but taking a C++ stream as argument instead of a
//! FILE*. Note that there is no way for the library to track what you do with
//! the descriptor, so be careful.
//! \return The integer file descriptor associated with the stream, or -1 if
//! that stream is invalid. In the latter case, for the sake of keeping the
//! code as similar to fileno(3), errno is set to EBADF.
//! \see The <A HREF="https://www.ginac.de/~kreckel/fileno/">upstream page at
//! https://www.ginac.de/~kreckel/fileno/</A> of this code provides more
//! detailed information.
template <typename charT, typename traits>
inline int
fileno_hack(const std::basic_ios<charT, traits>& stream)
{
// Some C++ runtime libraries shipped with ancient GCC, Sun Pro,
// Sun WS/Forte 5/6, Compaq C++ supported non-standard file descriptor
// access basic_filebuf<>::fd(). Alas, starting from GCC 3.1, the GNU C++
// runtime removes all non-standard std::filebuf methods and provides an
// extension template class __gnu_cxx::stdio_filebuf on all systems where
// that appears to make sense (i.e. at least all Unix systems). Starting
// from GCC 3.4, there is an __gnu_cxx::stdio_sync_filebuf, in addition.
// Sorry, darling, I must get brutal to fetch the darn file descriptor!
// Please complain to your compiler/libstdc++ vendor...
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
// OK, stop reading here, because it's getting obscene. Cross fingers!
# if defined(__GLIBCXX__) // >= GCC 3.4.0
// This applies to cin, cout and cerr when not synced with stdio:
typedef __gnu_cxx::stdio_filebuf<charT, traits> unix_filebuf_t;
unix_filebuf_t* fbuf = dynamic_cast<unix_filebuf_t*>(stream.rdbuf());
if (fbuf != NULL) {
return fbuf->fd();
}

// This applies to filestreams:
typedef std::basic_filebuf<charT, traits> filebuf_t;
filebuf_t* bbuf = dynamic_cast<filebuf_t*>(stream.rdbuf());
if (bbuf != NULL) {
// This subclass is only there for accessing the FILE*. Ouuwww, sucks!
struct my_filebuf : public std::basic_filebuf<charT, traits> {
int fd() { return this->_M_file.fd(); }
};
return static_cast<my_filebuf*>(bbuf)->fd();
}

// This applies to cin, cout and cerr when synced with stdio:
typedef __gnu_cxx::stdio_sync_filebuf<charT, traits> sync_filebuf_t;
sync_filebuf_t* sbuf = dynamic_cast<sync_filebuf_t*>(stream.rdbuf());
if (sbuf != NULL) {
# if (__GLIBCXX__<20040906) // GCC < 3.4.2
// This subclass is only there for accessing the FILE*.
// See GCC PR#14600 and PR#16411.
struct my_filebuf : public sync_filebuf_t {
my_filebuf(); // Dummy ctor keeps the compiler happy.
// Note: stdio_sync_filebuf has a FILE* as its first (but private)
// member variable. However, it is derived from basic_streambuf<>
// and the FILE* is the first non-inherited member variable.
FILE* c_file() {
return *(FILE**)((char*)this + sizeof(std::basic_streambuf<charT, traits>));
}
};
return ::fileno(static_cast<my_filebuf*>(sbuf)->c_file());
# else
return ::fileno(sbuf->file());
# endif
}
# else // GCC < 3.4.0 used __GLIBCPP__
# if (__GLIBCPP__>=20020514) // GCC >= 3.1.0
// This applies to cin, cout and cerr:
typedef __gnu_cxx::stdio_filebuf<charT, traits> unix_filebuf_t;
unix_filebuf_t* buf = dynamic_cast<unix_filebuf_t*>(stream.rdbuf());
if (buf != NULL) {
return buf->fd();
}

// This applies to filestreams:
typedef std::basic_filebuf<charT, traits> filebuf_t;
filebuf_t* bbuf = dynamic_cast<filebuf_t*>(stream.rdbuf());
if (bbuf != NULL) {
// This subclass is only there for accessing the FILE*. Ouuwww, sucks!
struct my_filebuf : public std::basic_filebuf<charT, traits> {
// Note: _M_file is of type __basic_file<char> which has a
// FILE* as its first (but private) member variable.
FILE* c_file() { return *(FILE**)(&this->_M_file); }
};
FILE* c_file = static_cast<my_filebuf*>(bbuf)->c_file();
if (c_file != NULL) { // Could be NULL for failed ifstreams.
return ::fileno(c_file);
}
}
# else // GCC 3.0.x
typedef std::basic_filebuf<charT, traits> filebuf_t;
filebuf_t* fbuf = dynamic_cast<filebuf_t*>(stream.rdbuf());
if (fbuf != NULL) {
struct my_filebuf : public filebuf_t {
// Note: basic_filebuf<charT, traits> has a __basic_file<charT>* as
// its first (but private) member variable. Since it is derived
// from basic_streambuf<charT, traits> we can guess its offset.
// __basic_file<charT> in turn has a FILE* as its first (but
// private) member variable. Get it by brute force. Oh, geez!
FILE* c_file() {
std::__basic_file<charT>* ptr_M_file = *(std::__basic_file<charT>**)((char*)this + sizeof(std::basic_streambuf<charT, traits>));
# if _GLIBCPP_BASIC_FILE_INHERITANCE
// __basic_file<charT> inherits from __basic_file_base<charT>
return *(FILE**)((char*)ptr_M_file + sizeof(std::__basic_file_base<charT>));
# else
// __basic_file<charT> is base class, but with vptr.
return *(FILE**)((char*)ptr_M_file + sizeof(void*));
# endif
}
};
return ::fileno(static_cast<my_filebuf*>(fbuf)->c_file());
}
# endif
# endif
#else
# error "Does anybody know how to fetch the bloody file descriptor?"
return stream.rdbuf()->fd(); // Maybe a good start?
#endif
errno = EBADF;
return -1;
}

//! 8-Bit character instantiation: fileno(ios).
// template <>
inline int fileno(const std::ios& stream)
{
return fileno_hack(stream);
}

/*
#if !(defined(__GLIBCXX__) || defined(__GLIBCPP__)) || (defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCXX_USE_WCHAR_T))
//! Wide character instantiation: fileno(wios).
template <>
int fileno<wchar_t>(const std::wios& stream)
{
return fileno_hack(stream);
}
#endif
*/

}
}
3 changes: 1 addition & 2 deletions src/python/module/z5py/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ def _create_dataset(cls, path, shape, dtype,
compression=None,
fillvalue=0, n_threads=1,
compression_options={},
is_zarr=True,
mode=None):
is_zarr=True, mode=None):
# check if this dataset already exists
if os.path.exists(path):
raise RuntimeError("Cannot create dataset (name already exists)")
Expand Down