Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

overlord/state, tests: measure state lock #14874

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ jobs:
- { go-build-tags: withbootassetstesting, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: nosecboot, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: faultinject, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: statelocktrace, skip-coverage: true, snapd-debug: false, go-test-race: false}
- { go-build-tags: snapdusergo, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: "", skip-coverage: true, snapd-debug: false, go-test-race: true }

Expand Down
4 changes: 2 additions & 2 deletions build-aux/snap/snapcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -355,14 +355,14 @@ parts:
TAGS+=(nomanagers)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys faultinject)
TAGS+=(withtestkeys faultinject statelocktrace)
;;
esac
;;
*)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys withbootassetstesting faultinject)
TAGS+=(withtestkeys withbootassetstesting faultinject statelocktrace)
;;
esac
;;
Expand Down
6 changes: 6 additions & 0 deletions osutil/flock.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func NewFileLockWithMode(path string, mode os.FileMode) (*FileLock, error) {
return l, nil
}

// NewFileLockWithFile wraps an open file with a file lock which can be used for
// locking.
func NewFileLockWithFile(f *os.File) *FileLock {
sergiocazzolato marked this conversation as resolved.
Show resolved Hide resolved
return &FileLock{file: f}
}
sergiocazzolato marked this conversation as resolved.
Show resolved Hide resolved

// NewFileLock creates and opens the lock file given by "path" with mode 0600.
func NewFileLock(path string) (*FileLock, error) {
return NewFileLockWithMode(path, 0600)
Expand Down
13 changes: 13 additions & 0 deletions osutil/flock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ func (s *flockSuite) TestNewFileLock(c *C) {
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that opening and closing a lock works as expected, and that the mode is right.
func (s *flockSuite) TestNewFileLockWithFile(c *C) {
myfile, err := os.OpenFile(filepath.Join(c.MkDir(), "name"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
c.Assert(err, IsNil)

lock := osutil.NewFileLockWithFile(myfile)
defer lock.Close()

fi, err := os.Stat(lock.Path())
c.Assert(err, IsNil)
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that we can access the underlying open file.
func (s *flockSuite) TestFile(c *C) {
fname := filepath.Join(c.MkDir(), "name")
Expand Down
121 changes: 121 additions & 0 deletions overlord/state/locktrace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

import (
"fmt"
"os"
"runtime"
"time"

"github.com/snapcore/snapd/osutil"
)

var (
traceStateLock = false

traceThreshold = int64(0)
traceFilePath = ""
)

func init() {
if !osutil.GetenvBool("SNAPPY_TESTING") {
return
}

threshold := osutil.GetenvInt64("SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS")
logFilePath := os.Getenv("SNAPD_STATE_LOCK_TRACE_FILE")

if threshold <= 0 || logFilePath == "" {
return
}

traceThreshold = threshold
traceFilePath = logFilePath
traceStateLock = true
}

func traceCallers(ts, heldMs, waitMs int64) error {
if traceFilePath == "" {
return fmt.Errorf("internal error: trace file path unset")
}

logFile, err := os.OpenFile(traceFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
return fmt.Errorf("cannot not open/create log trace file: %v", err)
}
lockFile := osutil.NewFileLockWithFile(logFile)
defer lockFile.Close()

if err := lockFile.Lock(); err != nil {
return fmt.Errorf("cannot take file lock: %v", err)
}

pc := make([]uintptr, 10)
// avoid 3 first callers on the stack: runtime.Callers(), this function and the parent
n := runtime.Callers(3, pc)
frames := runtime.CallersFrames(pc[:n])

_, err = fmt.Fprintf(logFile, "### %s lock: held: %d ms wait %d ms\n",
time.UnixMilli(ts),
heldMs, waitMs)
if err != nil {
return err
}

for {
frame, more := frames.Next()
_, err := fmt.Fprintf(logFile, "%s:%d %s\n", frame.File, frame.Line, frame.Function)
if err != nil {
return err
}

if !more {
break
}
}

return nil
}

func lockTimestamp() int64 {
if !traceStateLock {
return 0
}

return time.Now().UnixMilli()
}

// maybeSaveLockTime allows to save lock times when this overpass the threshold
// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings.
func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
if !traceStateLock {
return
}

heldMs := now - lockHoldStart
waitMs := lockHoldStart - lockWaitStart
if heldMs > traceThreshold || waitMs > traceThreshold {
if err := traceCallers(now, heldMs, waitMs); err != nil {
fmt.Fprintf(os.Stderr, "could write state lock trace: %v\n", err)
}
}
}
28 changes: 28 additions & 0 deletions overlord/state/locktrace_fake.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build !statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

func lockTimestamp() int64 {
return int64(0)
}

func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
}
10 changes: 10 additions & 0 deletions overlord/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ type State struct {
// task/changes observing
taskHandlers map[int]func(t *Task, old, new Status) (remove bool)
changeHandlers map[int]func(chg *Change, old, new Status)

lockWaitStart int64
lockHoldStart int64
}

// New returns a new empty state.
Expand Down Expand Up @@ -139,8 +142,11 @@ func (s *State) Modified() bool {

// Lock acquires the state lock.
func (s *State) Lock() {
lockWait := lockTimestamp()
s.mu.Lock()
atomic.AddInt32(&s.muC, 1)
s.lockWaitStart = lockWait
s.lockHoldStart = lockTimestamp()
}

func (s *State) reading() {
Expand All @@ -158,7 +164,11 @@ func (s *State) writing() {

func (s *State) unlock() {
atomic.AddInt32(&s.muC, -1)
lockWaitStart, lockHoldStart := s.lockWaitStart, s.lockHoldStart
s.lockWaitStart, s.lockHoldStart = 0, 0
lockHoldEnd := lockTimestamp()
s.mu.Unlock()
maybeSaveLockTime(lockWaitStart, lockHoldStart, lockHoldEnd)
}

type marshalledState struct {
Expand Down
4 changes: 2 additions & 2 deletions packaging/ubuntu-16.04/rules
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ _SNAP_TAGS=
# check if we need to include the testkeys in the binary
ifneq (,$(filter testkeys,$(DEB_BUILD_OPTIONS)))
# if enabled also enable bootloader assets testing and fault injection
_TAGS := withtestkeys,withbootassetstesting,faultinject
_SNAP_TAGS := nomanagers,withtestkeys,faultinject
_TAGS := withtestkeys,withbootassetstesting,faultinject,statelocktrace
_SNAP_TAGS := nomanagers,withtestkeys,faultinject,statelocktrace
else
_SNAP_TAGS=nomanagers
endif
Expand Down
2 changes: 2 additions & 0 deletions spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ environment:
SNAPD_DEB_FROM_REPO: '$(HOST: echo "${SPREAD_SNAPD_DEB_FROM_REPO:-true}")'
# Directory where the built snapd snaps and other assets are stored
SNAPD_WORK_DIR: '$(HOST: echo "${SPREAD_SNAPD_WORK_DIR:-/var/tmp/work-dir}")'
# Threshold used to determine which stake locks have to be collected during runtime
SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS: '$(HOST: echo "${SPREAD_SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS:-0}")'

# Directory where the nested images and test assets are stored
NESTED_WORK_DIR: '$(HOST: echo "${NESTED_WORK_DIR:-/var/tmp/work-dir}")'
Expand Down
4 changes: 3 additions & 1 deletion tests/lib/prepare-restore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -697,14 +697,16 @@ prepare_suite_each() {
fi

if [[ "$variant" = full ]]; then
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
if os.query is-classic; then
prepare_each_classic
else
prepare_each_core
fi

prepare_state_lock "$SPREAD_JOB"
fi

case "$SPREAD_SYSTEM" in
Expand Down
35 changes: 35 additions & 0 deletions tests/lib/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ prepare_classic() {
fi

prepare_reexec_override
prepare_state_lock "SNAPD PROJECT"
prepare_memory_limit_override
disable_refreshes

Expand Down Expand Up @@ -1520,6 +1521,39 @@ EOF
rm -rf "$UNPACK_DIR"
}

prepare_state_lock(){
TAG=$1
CONF_FILE="/etc/systemd/system/snapd.service.d/state-lock.conf"
LOCKS_FILE="$TESTSTMP"/snapd_lock_traces
RESTART=false

if [ "$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" -gt 0 ]; then
echo "###START: $TAG" >> "$LOCKS_FILE"

# Generate the config file when it does not exist and when the threshold has changed different
if ! [ -f "$CONF_FILE" ] || ! grep -q "SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS=$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" < "$CONF_FILE"; then
echo "Prepare snapd for getting state lock time"
cat <<EOF > "$CONF_FILE"
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS="$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS"
Environment=SNAPD_STATE_LOCK_TRACE_FILE="$LOCKS_FILE"
EOF
RESTART=true
fi
elif [ -f "$CONF_FILE" ]; then
rm -f "$CONF_FILE"
RESTART=true
fi

if [ "$RESTART" = "true" ]; then
# the service setting may have changed in the service so we need
# to ensure snapd is reloaded
systemctl daemon-reload
systemctl restart snapd
fi
}

# prepare_ubuntu_core will prepare ubuntu-core 16+
prepare_ubuntu_core() {
# we are still a "classic" image, prepare the surgery
Expand Down Expand Up @@ -1633,6 +1667,7 @@ prepare_ubuntu_core() {
# or restore will break
remove_disabled_snaps
prepare_memory_limit_override
prepare_state_lock "SNAPD PROJECT"
setup_experimental_features
systemctl stop snapd.service snapd.socket
save_snapd_state
Expand Down
16 changes: 16 additions & 0 deletions tests/main/snapd-state-lock/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
summary: smoke test used to retrieve the lock state times

details: |
Test used to collect artifacts
priority: -1

artifacts:
- snapd_lock_traces

execute: |
if [ -f "$TESTSTMP"/snapd_lock_traces ]; then
cp -f "$TESTSTMP"/snapd_lock_traces .
else
touch snapd_lock_traces
fi
Loading