Skip to content

Commit

Permalink
Merge pull request #553 from jkloetzke/robust-predictions
Browse files Browse the repository at this point in the history
Imrove source prediction robustness
  • Loading branch information
jkloetzke authored Feb 18, 2024
2 parents 47a0c29 + b987c12 commit f4e85e2
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 52 deletions.
20 changes: 16 additions & 4 deletions doc/manpages/bob-jenkins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ Available sub-commands:
bob jenkins add [-h] [-n NODES] [-o OPTIONS]
[--host-platform {linux,msys,win32}] [-w] [-p PREFIX]
[-r ROOT] [-D DEFINES] [--keep] [--download] [--upload]
[--no-sandbox] [--credentials CREDENTIALS] [--clean]
[--no-sandbox] [--credentials CREDENTIALS]
[--clean | --incremental]
[--shortdescription | --longdescription]
name url
bob jenkins export [-h] name dir
Expand Down Expand Up @@ -94,14 +95,14 @@ Options
configuration will be able to trigger a build.

``--clean``
Do clean builds.
Do clean builds. This is the default.

Whenever a Jenkins job is triggered, the workspace is erased first. This
will cause a fresh checkout of the sources and a clean (re)build of all
packages in the job.

Use this option to ensure reproducibility at the expense of speed.
Disabled by default. See ``--incremental`` for the opposite switch.
Enabled by default. See ``--incremental`` for the opposite switch.

``--credentials CREDENTIALS``
Credentials ID for git/svn SCM checkouts.
Expand Down Expand Up @@ -158,7 +159,7 @@ Options
operating system.

``--incremental``
Reuse workspace for incremental builds. This is the default.
Reuse workspace for incremental builds.

Bob will still apply the internal heuristics to make clean builds where
recipes or any of the dependencies were changed. Use ``--clean`` to always
Expand Down Expand Up @@ -532,6 +533,17 @@ jobs.update
otherwise. This will provide considerable speed improvements at the
expense of an outdated description of the unchanged jobs.

scm.always-checkout
Boolean option (possible values: '0' or 'false' resp. '1' or 'true') that
forces the execution of checkout steps. This option is enabled by default.
If disabled, the checkout might be skipped if a matching binary artifact
can be found.

Disabling this option can increase the build speed. On the other hand, it
might hide problems in recipes where the checkout step is not
deterministic. Note that git and svn SCMs are checked out regardless of
this option. For release builds it is best to keep the option enabled.

scm.git.shallow
Instruct the Jenkins git plugin to create shallow clones with a history
truncated to the specified number of commits. If the parameter is unset
Expand Down
4 changes: 2 additions & 2 deletions doc/tutorial/jenkins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ often than others, though:

* ``--download``, ``--upload``: These are usually enabled so that the binary
archive is populated by the Jenkins centrally.
* ``--clean``: increases build times but will make sure that indeterminism in
recipes is suppressed.
* ``--incremental``: optimize build times but flaky recipes might cause build
problems.
* ``-p PREFIX, --prefix PREFIX``: you should almost always give the jobs of
each project a unique prefix.
* ``--shortdescription``: shortens job descriptions and may dramatically
Expand Down
89 changes: 50 additions & 39 deletions pym/bob/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ async def downloadPackage(self, step, buildId, audit, content, caches=[],

def cachePackage(self, buildId, workspace):
try:
return self._openUploadFile(buildId, ARTIFACT_SUFFIX)
return self._openUploadFile(buildId, ARTIFACT_SUFFIX, False)
except ArtifactExistsError:
return None
except (ArtifactUploadError, OSError) as e:
Expand Down Expand Up @@ -455,7 +455,7 @@ def _downloadLocalFile(self, key, suffix):
# to prevent ugly backtraces when user presses ctrl+c.
signal.signal(signal.SIGINT, signal.SIG_DFL)

def _openUploadFile(self, buildId, suffix):
def _openUploadFile(self, buildId, suffix, overwrite):
raise ArtifactUploadError("not implemented")

async def uploadPackage(self, step, buildId, audit, content, executor=None):
Expand Down Expand Up @@ -483,7 +483,7 @@ def _uploadPackage(self, buildId, suffix, audit, content):
signal.signal(signal.SIGINT, signal.default_int_handler)

try:
with self._openUploadFile(buildId, suffix) as (name, fileobj):
with self._openUploadFile(buildId, suffix, False) as (name, fileobj):
self._pack(name, fileobj, audit, content)
except ArtifactExistsError:
return ("skipped ({} exists in archive)".format(content), SKIPPED)
Expand Down Expand Up @@ -516,10 +516,10 @@ def _uploadLocalFile(self, key, suffix, content):
signal.signal(signal.SIGINT, signal.default_int_handler)

try:
with self._openUploadFile(key, suffix) as (name, fileobj):
# Meta data file uploads overwrite their targets. Thus we should
# never see an ArtifactExistsError.
with self._openUploadFile(key, suffix, True) as (name, fileobj):
writeFileOrHandle(name, fileobj, content)
except ArtifactExistsError:
return ("skipped (exists in archive)", SKIPPED)
except (ArtifactUploadError, OSError) as e:
if self.__ignoreErrors:
return ("error ("+str(e)+")", ERROR)
Expand Down Expand Up @@ -668,9 +668,9 @@ def _openDownloadFile(self, buildId, suffix):
else:
raise ArtifactNotFoundError()

def _openUploadFile(self, buildId, suffix):
def _openUploadFile(self, buildId, suffix, overwrite):
(packageResultPath, packageResultFile) = self._getPath(buildId, suffix)
if os.path.isfile(packageResultFile):
if not overwrite and os.path.isfile(packageResultFile):
raise ArtifactExistsError()

# open temporary file in destination directory
Expand All @@ -684,8 +684,7 @@ def _openUploadFile(self, buildId, suffix):
os.umask(oldMask)
return LocalArchiveUploader(
NamedTemporaryFile(dir=packageResultPath, delete=False),
self.__fileMode,
packageResultFile)
self.__fileMode, packageResultFile, overwrite)

class LocalArchiveDownloader:
def __init__(self, name):
Expand All @@ -700,19 +699,23 @@ def __exit__(self, exc_type, exc_value, traceback):
return False

class LocalArchiveUploader:
def __init__(self, tmp, fileMode, destination):
def __init__(self, tmp, fileMode, destination, overwrite):
self.tmp = tmp
self.fileMode = fileMode
self.destination = destination
self.overwrite = overwrite
def __enter__(self):
return (None, self.tmp)
def __exit__(self, exc_type, exc_value, traceback):
self.tmp.close()
# atomically move file to destination at end of upload
if exc_type is None:
if not isWindows():
if self.fileMode is not None:
os.chmod(self.tmp.name, self.fileMode)
if not isWindows() and self.fileMode is not None:
os.chmod(self.tmp.name, self.fileMode)

if self.overwrite:
os.replace(self.tmp.name, self.destination)
elif not isWindows():
# Cannot use os.rename() because it will unconditionally
# replace an existing file. Instead we link the file at the
# destination and unlink the temporary file.
Expand Down Expand Up @@ -812,44 +815,47 @@ def __openDownloadFile(self, buildId, suffix):
raise ArtifactDownloadError("{} {}".format(response.status,
response.reason))

def _openUploadFile(self, buildId, suffix):
(ok, result) = self.__retry(lambda: self.__openUploadFile(buildId, suffix))
def _openUploadFile(self, buildId, suffix, overwrite):
(ok, result) = self.__retry(lambda: self.__openUploadFile(buildId, suffix, overwrite))
if ok:
return result
else:
raise ArtifactUploadError(str(result))

def __openUploadFile(self, buildId, suffix):
def __openUploadFile(self, buildId, suffix, overwrite):
connection = self._getConnection()
url = self._makeUrl(buildId, suffix)

# check if already there
connection.request("HEAD", url, headers=self._getHeaders())
response = connection.getresponse()
response.read()
if response.status == 200:
raise ArtifactExistsError()
elif response.status != 404:
raise ArtifactUploadError("HEAD {} {}".format(response.status, response.reason))
if not overwrite:
connection.request("HEAD", url, headers=self._getHeaders())
response = connection.getresponse()
response.read()
if response.status == 200:
raise ArtifactExistsError()
elif response.status != 404:
raise ArtifactUploadError("HEAD {} {}".format(response.status, response.reason))

# create temporary file
return SimpleHttpUploader(self, url)
return SimpleHttpUploader(self, url, overwrite)

def _putUploadFile(self, url, tmp):
(ok, result) = self.__retry(lambda: self.__putUploadFile(url, tmp))
def _putUploadFile(self, url, tmp, overwrite):
(ok, result) = self.__retry(lambda: self.__putUploadFile(url, tmp, overwrite))
if ok:
return result
else:
raise ArtifactUploadError(str(result))

def __putUploadFile(self, url, tmp):
def __putUploadFile(self, url, tmp, overwrite):
# Determine file length outself and add a "Content-Length" header. This
# used to work in Python 3.5 automatically but was removed later.
tmp.seek(0, os.SEEK_END)
length = str(tmp.tell())
tmp.seek(0)
headers = self._getHeaders()
headers.update({ 'Content-Length' : length, 'If-None-Match' : '*' })
headers.update({ 'Content-Length' : length })
if not overwrite:
headers.update({ 'If-None-Match' : '*' })
connection = self._getConnection()
connection.request("PUT", url, tmp, headers=headers)
response = connection.getresponse()
Expand All @@ -873,17 +879,18 @@ def __exit__(self, exc_type, exc_value, traceback):
return False

class SimpleHttpUploader:
def __init__(self, archiver, url):
def __init__(self, archiver, url, overwrite):
self.archiver = archiver
self.tmp = TemporaryFile()
self.url = url
self.overwrite = overwrite
def __enter__(self):
return (None, self.tmp)
def __exit__(self, exc_type, exc_value, traceback):
try:
# do actual upload on regular handle close
if exc_type is None:
self.archiver._putUploadFile(self.url, self.tmp)
self.archiver._putUploadFile(self.url, self.tmp, self.overwrite)
finally:
self.tmp.close()
return False
Expand Down Expand Up @@ -932,11 +939,11 @@ def _openDownloadFile(self, buildId, suffix):
finally:
if tmpName is not None: os.unlink(tmpName)

def _openUploadFile(self, buildId, suffix):
def _openUploadFile(self, buildId, suffix, overwrite):
(tmpFd, tmpName) = mkstemp()
os.close(tmpFd)
return CustomUploader(tmpName, self._makeUrl(buildId, suffix), self.__whiteList,
self.__uploadCmd)
self.__uploadCmd, overwrite)

class CustomDownloader:
def __init__(self, name):
Expand All @@ -948,11 +955,12 @@ def __exit__(self, exc_type, exc_value, traceback):
return False

class CustomUploader:
def __init__(self, name, remoteName, whiteList, uploadCmd):
def __init__(self, name, remoteName, whiteList, uploadCmd, overwrite):
self.name = name
self.remoteName = remoteName
self.whiteList = whiteList
self.uploadCmd = uploadCmd
self.overwrite = overwrite

def __enter__(self):
return (self.name, None)
Expand All @@ -963,6 +971,8 @@ def __exit__(self, exc_type, exc_value, traceback):
env = { k:v for (k,v) in os.environ.items() if k in self.whiteList }
env["BOB_LOCAL_ARTIFACT"] = self.name
env["BOB_REMOTE_ARTIFACT"] = self.remoteName
if self.overwrite:
env["BOB_REMOTE_OVERWRITE"] = "1"
ret = subprocess.call([getBashPath(), "-ec", self.uploadCmd],
stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
cwd=gettempdir(), env=env)
Expand Down Expand Up @@ -1034,16 +1044,16 @@ def _openDownloadFile(self, buildId, suffix):
finally:
if client is not None: client.close()

def _openUploadFile(self, buildId, suffix):
def _openUploadFile(self, buildId, suffix, overwrite):
containerClient = self.__getClient()
from azure.core.exceptions import AzureError
blobName = self.__makeBlobName(buildId, suffix)
blobClient = None
try:
blobClient = containerClient.get_blob_client(blobName)
if blobClient.exists():
if not overwrite and blobClient.exists():
raise ArtifactExistsError()
ret = AzureUploader(containerClient, blobClient)
ret = AzureUploader(containerClient, blobClient, overwrite)
containerClient = blobClient = None
return ret
except AzureError as e:
Expand All @@ -1063,9 +1073,10 @@ def __exit__(self, exc_type, exc_value, traceback):
return False

class AzureUploader:
def __init__(self, containerClient, blobClient):
def __init__(self, containerClient, blobClient, overwrite):
self.__containerClient = containerClient
self.__blobClient = blobClient
self.__overwrite = overwrite

def __enter__(self):
self.__tmp = TemporaryFile()
Expand All @@ -1087,7 +1098,7 @@ def __upload(self):
self.__tmp.seek(0, os.SEEK_END)
length = self.__tmp.tell()
self.__tmp.seek(0)
self.__blobClient.upload_blob(self.__tmp, length=length, overwrite=False)
self.__blobClient.upload_blob(self.__tmp, length=length, overwrite=self.__overwrite)
except ResourceExistsError:
raise ArtifactExistsError()
except AzureError as e:
Expand Down
4 changes: 4 additions & 0 deletions pym/bob/cmds/jenkins/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def __handleCfg(self, line):
self.shareQuota = v
elif k == "platform":
self.platform = v
elif k == "always-checkout":
self.scmAlwaysCheckout = isTrue(v)
else:
raise AssertionError(line)

Expand Down Expand Up @@ -233,6 +235,8 @@ def doJenkinsExecuteRun(argv, bobRoot):
builder.setShareHandler(getShare({ 'path' : path,
'quota' : spec.shareQuota }))
builder.setShareMode(True, True)
if spec.scmAlwaysCheckout:
builder.setAlwaysCheckout([".*"])
builder.cook(ir.getRoots(), False, loop)

return 0
Expand Down
8 changes: 6 additions & 2 deletions pym/bob/cmds/jenkins/jenkins.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ def dumpXML(self, orig, config, date, recipesAudit=None):
"upload=" + ("1" if self.__upload else "0"),
"copy=" + config.artifactsCopy,
"share=" + config.sharedDir,
"always-checkout=" + ("1" if config.scmAlwaysCheckout else "0"),
])
if config.sharedQuota:
execCmds.append("quota=" + config.sharedQuota)
Expand Down Expand Up @@ -809,8 +810,11 @@ def doJenkinsAdd(recipes, argv):
parser.add_argument('--no-sandbox', action='store_false', dest='sandbox', default=True,
help="Disable sandboxing")
parser.add_argument("--credentials", help="Credentials UUID for SCM checkouts")
parser.add_argument('--clean', action='store_true', default=False,
help="Do clean builds (clear workspace)")
group = parser.add_mutually_exclusive_group()
group.add_argument('--clean', action='store_true', default=True,
help="Do clean builds (clear workspace, default)")
group.add_argument('--incremental', action='store_false', dest='clean',
help="Reuse workspace for incremental builds")
parser.add_argument("name", help="Symbolic name for server")
parser.add_argument("url", help="Server URL")
group = parser.add_mutually_exclusive_group()
Expand Down
11 changes: 9 additions & 2 deletions pym/bob/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def load(cls, config):
self.upload = config.get("upload", False)
self.sandbox = config.get("sandbox", True)
self.credentials = config.get("credentials", None)
self.clean = config.get("clean", False)
self.clean = config.get("clean", True)
self.keep = config.get("keep", False)
self.authtoken = config.get("authtoken", None)
self.shortdescription = config.get("shortdescription", False)
Expand Down Expand Up @@ -99,7 +99,7 @@ def reset(self):
self.upload = False
self.sandbox = True
self.credentials = None
self.clean = False
self.clean = True
self.keep = False
self.authtoken = None
self.shortdescription = False
Expand Down Expand Up @@ -161,6 +161,9 @@ def setOption(self, key, value, errorHandler):
elif key == "jobs.update":
if value not in ("always", "description", "lazy"):
errorHandler("'jobs.update' extended option has unsupported value!");
elif key == "scm.always-checkout":
if value.lower() not in ("0", "false", "1", "true"):
errorHandler("scm.always-checkout must be any of: 0/false/1/true")
elif key == "scm.git.shallow":
try:
num = int(value)
Expand Down Expand Up @@ -234,6 +237,10 @@ def jobsPolicy(self):
def jobsUpdate(self):
return self.__options.get('jobs.update', "always")

@property
def scmAlwaysCheckout(self):
return isTrue(self.__options.get("scm.always-checkout", "1"))

@property
def scmGitShallow(self):
return self.__options.get("scm.git.shallow")
Expand Down
Loading

0 comments on commit f4e85e2

Please sign in to comment.