Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Refactor pendingpatches #111

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions db_migrations/03-add-pendingjobs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE pendingjobs(
id INTEGER PRIMARY KEY,
job_name TEXT,
build_id INTEGER
);
8 changes: 8 additions & 0 deletions db_migrations/04-alter-pendingpatches.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- NOTE: Apply this change only when no patches are pending.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get that this is due to not knowing how to link the pending jobs with patches in the table, but there's a chance people applying the migration wouldn't open and read the file itself since they don't need to. Can you make a subsection in the README (under Database upgrading) for migration limitations, and mention this condition there?

DROP TABLE pendingpatches;
CREATE TABLE pendingpatches(
id INTEGER PRIMARY KEY,
pendingjob_id INTEGER,
timestamp INTEGER,
FOREIGN KEY(pendingjob_id) REFERENCES pendingjobs(id)
);
104 changes: 57 additions & 47 deletions sktm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,46 +284,68 @@ def check_patchwork(self):
new_series = cpw.get_new_patchsets()
for series in new_series:
logging.info("new series: %s", series.get_obj_url_list())

# Get a list of patch URLs from the series
patch_urls = series.get_patch_url_list()

# Make a list of patch info tuples
patches = [self.get_patch_info_from_url(cpw, patch_url)
for patch_url in patch_urls]

# Add the patches to the database
self.db.commit_series(patches)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The patches should be included in the patch table only after they completed the testing. I understand that you want to save every grabbed patch into the table, but the table was meant to say "this is what was fully processed" and now we won't have that. Granted, the question is whether we do need that functionality, but that belongs more to the discussion around #80.


# Filter the list of series into the ones we will test (ready)
# and the ones that are filtered out (dropped)
series_ready, series_dropped = self.filter_patchsets(new_series)

# Log the series that are ready to test
for series in series_ready:
logging.info("ready series: %s", series.get_obj_url_list())

# Log the series that have been dropped (due to filtering)
for series in series_dropped:
logging.info("dropped series: %s", series.get_obj_url_list())

# Retrieve all data and save dropped patches in the DB
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see issue #100 and commit 0051c4c for explanation why this was added. We don't want to be rechecking patches that were supposed to be dropped all over again until new patch is added to the project. If you want to do it other way feel free to do so but please don't reintroduce the bug :)

patches = []
for patch_url in series.get_patch_url_list():
patches.append(self.get_patch_info_from_url(cpw,
patch_url))

self.db.commit_series(patches)

# Add the list of ready series to our list to test
series_list += series_ready

# Add series summaries for all patches staying pending for
# longer than 12 hours
series_list += cpw.get_patchsets(
self.db.get_expired_pending_patches(cpw.baseurl,
cpw.project_id,
43200)
)
# For each series summary

for series in series_list:
# (Re-)add the series' patches to the "pending" list
self.db.set_patchset_pending(cpw.baseurl, cpw.project_id,
series.get_patch_info_list())
# Submit and remember a Jenkins build for the series
self.pj.append((sktm.jtype.PATCHWORK,
self.jk.build(
self.jobname,
baserepo=self.baserepo,
ref=stablecommit,
baseconfig=self.cfgurl,
message_id=series.message_id,
subject=series.subject,
emails=series.email_addr_set,
patchwork=series.get_patch_url_list(),
makeopts=self.makeopts),
cpw))
build_id = self.jk.build(
self.jobname,
baserepo=self.baserepo,
ref=stablecommit,
baseconfig=self.cfgurl,
message_id=series.message_id,
subject=series.subject,
emails=series.email_addr_set,
patchwork=series.get_patch_url_list(),
makeopts=self.makeopts
)
self.pj.append((sktm.jtype.PATCHWORK, build_id, cpw))

# Store the pending job in the database
pendingjob_id = self.db.create_pending_job(
self.jobname,
build_id
)

# Add the series to the list of pending patches or update the
# series if the series is being re-added after expiring.
self.db.set_patchset_pending(
pendingjob_id,
series.get_patch_info_list()
)

logging.info("submitted message ID: %s", series.message_id)
logging.info("submitted subject: %s", series.subject)
logging.info("submitted emails: %s", series.email_addr_set)
Expand All @@ -344,29 +366,17 @@ def check_pending(self):
bid
)
elif pjt == sktm.jtype.PATCHWORK:
patches = list()
bres = self.jk.get_result(self.jobname, bid)
rurl = self.jk.get_result_url(self.jobname, bid)
logging.info("result=%s", bres)
logging.info("url=%s", rurl)
basehash = self.jk.get_base_hash(self.jobname, bid)
logging.info("basehash=%s", basehash)
if bres == sktm.tresult.BASELINE_FAILURE:
self.db.update_baseline(
self.baserepo,
basehash,
self.jk.get_base_commitdate(self.jobname, bid),
sktm.tresult.TEST_FAILURE,
bid
)

patch_url_list = self.jk.get_patchwork(self.jobname, bid)
for patch_url in patch_url_list:
patches.append(self.get_patch_info_from_url(cpw,
patch_url))

if bres != sktm.tresult.BASELINE_FAILURE:
self.db.commit_tested(patches)
# Get the build result
result = self.jk.get_result(self.jobname, bid)
logging.info("result=%s", result)

# Get the build result URL
result_url = self.jk.get_result_url(self.jobname, bid)
logging.info("url=%s", result_url)

# Remove the patches from the pendingpatches table and
# remove the pendingjob entry.
self.db.unset_patchset_pending(self.jobname, bid)
else:
raise Exception("Unknown job type: %d" % pjt)

Expand Down
125 changes: 70 additions & 55 deletions sktm/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,15 @@ def __createdb(self, db):

CREATE TABLE pendingpatches(
id INTEGER PRIMARY KEY,
pdate TEXT,
patchsource_id INTEGER,
pendingjob_id INTEGER,
timestamp INTEGER,
FOREIGN KEY(patchsource_id) REFERENCES patchsource(id)
FOREIGN KEY(pendingjob_id) REFERENCES pendingjobs(id)
);

CREATE TABLE pendingjobs(
id INTEGER PRIMARY KEY,
job_name TEXT,
build_id INTEGER
);

CREATE TABLE testrun(
Expand Down Expand Up @@ -154,6 +159,37 @@ def __get_sourceid(self, baseurl, project_id):

return result[0]

def create_pending_job(self, job_name, build_id):
"""Create a pending job entry.

Args:
job_name: Job name in Jenkins.
build_id: Build ID for the Jenkins job.

Returns: The pendingjob ID that was just added.
"""
self.cur.execute(
"INSERT INTO pendingjobs (job_name, build_id) VALUES (?, ?)",
(job_name, build_id)
)
self.conn.commit()

return self.cur.lastrowid

def get_pending_job_id(self, job_name, build_id):
"""Get a pending job ID.

Args:
job_name: Job name in Jenkins.
build_id: Build ID for the Jenkins job.
"""
self.cur.execute(
'SELECT id FROM pendingjobs WHERE job_name = ? AND build_id = ?',
(job_name, build_id)
)
result = self.cur.fetchone()
return str(result[0])

def get_last_checked_patch(self, baseurl, project_id):
"""Get the patch id of the last patch that was checked.

Expand Down Expand Up @@ -259,9 +295,10 @@ def get_expired_pending_patches(self, baseurl, project_id, exptime=86400):
sourceid = self.__get_sourceid(baseurl, project_id)
tstamp = int(time.time()) - exptime

self.cur.execute('SELECT id FROM pendingpatches WHERE '
'patchsource_id = ? AND '
'timestamp < ?',
self.cur.execute('SELECT pendingpatches.id FROM pendingpatches '
'LEFT JOIN patch ON patch.id=pendingpatches.id'
' WHERE patch.patchsource_id = ? AND '
'pendingpatches.timestamp < ?',
(sourceid, tstamp))
for res in self.cur.fetchall():
patchlist.append(res[0])
Expand Down Expand Up @@ -373,57 +410,50 @@ def __get_latest(self, baserepo):

return result[0]

def set_patchset_pending(self, baseurl, project_id, series_data):
"""Add a patch to pendingpatches or update an existing entry.
def set_patchset_pending(self, pendingjob_id, series_data):
"""Add a patch to `pendingpatches` or update an existing entry.

Add each specified patch to the list of "pending" patches, with
specifed patch date, for specified Patchwork base URL and project ID,
and marked with current timestamp. Replace any previously added
patches with the same ID (bug: should be "same ID, project ID and
base URL").
Each entry in the `pendingpatches` table refers back to a patch in the
`patch` table.

Args:
baseurl: Base URL of the Patchwork instance the project ID and
patch IDs belong to.
project_id: ID of the Patchwork project the patch IDs belong to.
pendingjob_id: ID from the pendingjobs table for the Jenkins job
that is testing the patches.
series_data: List of info tuples for patches to add to the list,
where each tuple contains the patch ID and a free-form
patch date string.

"""
sourceid = self.__get_sourceid(baseurl, project_id)
tstamp = int(time.time())

logging.debug("setting patches as pending: %s", series_data)
self.cur.executemany('INSERT OR REPLACE INTO '
'pendingpatches(id, pdate, patchsource_id, '
'timestamp) '
'VALUES(?, ?, ?, ?)',
[(patch_id, patch_date, sourceid, tstamp) for
(patch_id, patch_date) in series_data])
self.cur.executemany(
'INSERT OR REPLACE INTO pendingpatches '
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that we can have same patch IDs across different Patchworks, we might end up overwriting entries we shouldn't. We are lucky we haven't run into this issue yet, but since you are touching the same logic here and carrying the bug over, can you fix the bug as well?

'(id, pendingjob_id, timestamp) VALUES(?, ?, ?)',
[(patch_id, pendingjob_id, tstamp)
for (patch_id, patch_date) in series_data])
self.conn.commit()

def __unset_patchset_pending(self, baseurl, patch_id_list):
"""Remove a patch from the list of pending patches.

Remove each specified patch from the list of "pending" patches, for
the specified Patchwork base URL.
def unset_patchset_pending(self, job_name, build_id):
"""Clean up pendingpatches and pendingjobs after a test is complete.

Args:
baseurl: Base URL of the Patchwork instance the patch IDs
belong to.
patch_id_list: List of IDs of patches to be removed from the list.
job_name: Jenkins job that tested the patches.
build_id: Build ID from the Jenkins job.

"""
logging.debug("removing patches from pending list: %s", patch_id_list)

self.cur.executemany('DELETE FROM pendingpatches WHERE '
'patchsource_id IN '
'(SELECT DISTINCT id FROM patchsource WHERE '
'baseurl = ?) '
'AND id = ? ',
[(baseurl, patch_id) for
patch_id in patch_id_list])
# Get the pendingjob ID
pendingjob_id = self.get_pending_job_id(job_name, build_id)

# Delete the matching pendingpatches and pendingjobs entries
self.cur.executemany(
'DELETE FROM pendingpatches WHERE pendingjob_id = ?',
(pendingjob_id)
)
self.conn.commit()
self.cur.execute(
'DELETE FROM pendingjobs WHERE id = ?', (pendingjob_id)
)
self.conn.commit()

def update_baseline(self, baserepo, commithash, commitdate,
Expand Down Expand Up @@ -462,21 +492,6 @@ def update_baseline(self, baserepo, commithash, commitdate,
(testrun_id, commithash, baserepo_id))
self.conn.commit()

def commit_tested(self, patches):
"""Saved tested patches.

Args:
patches: List of patches that were tested
"""
logging.debug("commit_tested: patches=%d", len(patches))
self.commit_series(patches)

for (patch_id, patch_name, patch_url, baseurl, project_id,
patch_date) in patches:
# TODO: Can accumulate per-project list instead of doing it one by
# one
self.__unset_patchset_pending(baseurl, [patch_id])

def __commit_testrun(self, result, buildid):
"""Add a test run to the database.

Expand Down
Loading