Skip to content

Commit

Permalink
Add FLock to lock files another way
Browse files Browse the repository at this point in the history
The build system uses a class called Singleton to handle file locking. This class has a few potential issues:

1) An instance of Single could delete a file owned by another instance
2) Calling `print` in `__enter__` doesn't write to STDOUT
3) If the lock file already exists but the process that "owned" it died without deleting it every process will be locked

This change is an attempt to resolve these issues and provide some lock debugging code.

The code is structured to default to using the current Singleton class unless the USE_FLOCK_CLS environmental variable is set. In the event the environment variable is set the Singleton class is replaced with FLock.

The FLock class is designed to work while running on a machine that's also using the real Singleton class. It does this at the expense of not solving issue number #1. If FLock is proven successful and completely deployed to the cluster it should be made the default and the code that deletes the lock file in `__exit__` should be removed.
  • Loading branch information
Matt Hardcastle committed May 8, 2019
1 parent 3877a35 commit 0ddfd6f
Showing 1 changed file with 116 additions and 2 deletions.
118 changes: 116 additions & 2 deletions hifi_singleton.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import logging
import os
import platform
import time
Expand All @@ -7,6 +9,15 @@
except ImportError:
fcntl = None

try:
import msvcrt
except ImportError:
msvcrt = None


logger = logging.getLogger(__name__)


# Used to ensure only one instance of the script runs at a time
class Singleton:
def __init__(self, path):
Expand All @@ -33,7 +44,10 @@ def __enter__(self):
else:
self.fh.close()
self.fh = None
print("Couldn't aquire lock, retrying in 10 seconds")
# print is horked here so write directly to stdout.
with open(1, mode="w", closefd=False) as _stdout:
_stdout.write("Couldn't aquire lock, retrying in 10 seconds\n")
_stdout.flush()
time.sleep(10)
return self

Expand All @@ -43,4 +57,104 @@ def __exit__(self, type, value, traceback):
else:
fcntl.lockf(self.fh, fcntl.LOCK_UN)
self.fh.close()
os.unlink(self.path)
os.unlink(self.path)


class FLock:
"""
File locking context manager
>> with FLock("/tmp/foo.lock"):
>> do_something_that_must_be_synced()
The lock file must stick around forever. The author is not aware of a no cross platform way to clean it up w/o introducting race conditions.
"""
def __init__(self, path):
self.fh = os.open(path, os.O_CREAT | os.O_RDWR)
self.path = path

def _lock_posix(self):
try:
fcntl.lockf(self.fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
except BlockingIOError:
# Windows sleeps for 10 seconds before giving up on a lock.
# Lets mimic that behavior.
time.sleep(10)
return False
else:
return True

def _lock_windows(self):
try:
msvcrt.locking(self.fh, msvcrt.LK_LOCK, 1)
except OSError:
return False
else:
return True

if fcntl is not None:
_lock = _lock_posix
elif msvcrt is not None:
_lock = _lock_windows
else:
raise RuntimeError("No locking library found")

def read_stats(self):
data = {}
with open(self.fh, mode="r", closefd=False) as stats_file:
stats_file.seek(0)
try:
data = json.loads(stats_file.read())
except json.decoder.JSONDecodeError:
logger.warning("couldn't decode json in lock file")
except PermissionError:
# Can't read a locked file on Windows :(
pass

lock_age = time.time() - os.fstat(self.fh).st_mtime
if lock_age > 0:
data["Age"] = "%0.2f" % lock_age

with open(1, mode="w", closefd=False) as _stdout:
_stdout.write("Lock stats:\n")
for key, value in sorted(data.items()):
_stdout.write("* %s: %s\n" % (key, value))
_stdout.flush()

def write_stats(self):
stats = {
"Owner PID": os.getpid(),
}
flock_env_vars = os.getenv("FLOCK_ENV_VARS")
if flock_env_vars:
for env_var_name in flock_env_vars.split(":"):
stats[env_var_name] = os.getenv(env_var_name)

with open(self.fh, mode="w", closefd=False) as stats_file:
stats_file.truncate()
return stats_file.write(json.dumps(stats, indent=2))

def __enter__(self):
while not self._lock():
try:
self.read_stats()
except (IOError, ValueError) as exc:
logger.exception("couldn't read stats")
time.sleep(3.33) # don't hammer the file

self.write_stats()

return self

def __exit__(self, type, value, traceback):
os.close(self.fh)
# WARNING: `os.close` gives up the lock on `fh` then we attempt the `os.unlink`. On posix platforms this can lead to us deleting a lock file that another process owns. This step is required to maintain compatablity with Singleton. When and if FLock is completely rolled out to the build fleet this unlink should be removed.
try:
os.unlink(self.path)
except (FileNotFoundError, PermissionError):
logger.exception("couldn't unlink lock file")


if os.getenv("USE_FLOCK_CLS") is not None:
logger.warning("Using FLock locker")
Singleton = FLock

0 comments on commit 0ddfd6f

Please sign in to comment.