Skip to content

Commit

Permalink
fix sweepLogs abort functionality (#9195)
Browse files Browse the repository at this point in the history
* fix sweepLogs abort functionality

* add test

* split up sweep into transaction batches

* fix timeout
  • Loading branch information
fungairino authored Sep 24, 2024
1 parent c91395f commit 73c5eb5
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 25 deletions.
28 changes: 23 additions & 5 deletions src/telemetry/logging.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,41 @@ describe("logging", () => {
await expect(count()).resolves.toBe(1);
});

test("sweep", async () => {
test("sweepLogs", async () => {
// Set up test log database that will trigger sweepLogs due to count > MAX_LOG_RECORDS
// sweepLog assertions are all written in one test due to this expensive setup
flagOnMock.mockResolvedValue(false);
await Promise.all(
array(logEntryFactory, 1500)().map(async (x) => {
array(logEntryFactory, 1300)().map(async (x) => {
await appendEntry(x);
}),
);

// Verify that when the DISABLE_IDB_LOGGING flag is on, the logs are not swept
mockFlag(FeatureFlags.DISABLE_IDB_LOGGING);
await sweepLogs();
await expect(count()).resolves.toBe(1500);
await expect(count()).resolves.toBe(1300);

flagOnMock.mockResolvedValue(false);

// Verify that sweeper will abort if timeout is hit
const consoleWarnSpy = jest.spyOn(console, "warn");
const originalTimeout = global.setTimeout;
// Simulate timeout by mocking setTimeout to immediately call the abort signal
const setTimeoutSpy = jest
.spyOn(global, "setTimeout")
.mockImplementation((fn) => originalTimeout(fn, 0));
await sweepLogs();
await expect(count()).resolves.toBeGreaterThan(1250);
expect(consoleWarnSpy).toHaveBeenCalledWith(
"Log sweep aborted due to timeout",
);
setTimeoutSpy.mockRestore();

// Verify sweepLogs functionality
await sweepLogs();
await expect(count()).resolves.toBe(937);
// Increase timeout so test isn't flakey on CI due to slow append operation
await expect(count()).resolves.toBeLessThanOrEqual(930);
// Increase timeout so test isn't flaky on CI due to slow append operation
}, 25_000);

test("getLogEntries by modId", async () => {
Expand Down
49 changes: 29 additions & 20 deletions src/telemetry/logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ const DB_VERSION_NUMBER = 4;
* impacted due to the number of entries.
*/
const MAX_LOG_RECORDS = 1250;
// How many log entries to delete at a time with each transaction.
const SWEEP_LOGS_BATCH_SIZE = 10;

/**
* Amount to clear old logs, as a ratio of the maximum number of logs.
Expand Down Expand Up @@ -584,11 +586,6 @@ async function _sweepLogs(): Promise<void> {
return;
}

const abortController = new AbortController();
// Ensure in cases where the sweep is taking too long, we abort the operation to reduce the likelihood
// of blocking other db transactions.
setTimeout(abortController.abort, 30_000);

await withLoggingDB(async (db) => {
const numRecords = await db.count(ENTRY_OBJECT_STORE);

Expand All @@ -600,25 +597,37 @@ async function _sweepLogs(): Promise<void> {
numToDelete,
});

const tx = db.transaction(ENTRY_OBJECT_STORE, "readwrite", {
durability: "relaxed",
});
// Ensure in cases where the sweep is taking too long, we abort the operation to reduce the likelihood
// of blocking other db transactions.
const abortController = new AbortController();
setTimeout(() => {
abortController.abort();
}, 10_000);

let deletedCount = 0;

// Ideally this would be ordered by timestamp to delete the oldest records, but timestamp is not an index.
// This might mostly "just work" if the cursor happens to iterate in insertion order
for await (const cursor of tx.store) {
if (abortController.signal.aborted) {
console.warn("Log sweep aborted due to timeout");
break;
}

await cursor.delete();
deletedCount++;
while (deletedCount < numToDelete) {
const tx = db.transaction(ENTRY_OBJECT_STORE, "readwrite", {
durability: "relaxed",
});

if (deletedCount > numToDelete) {
return;
let processedBatchCount = 0;
// Ideally this would be ordered by timestamp to delete the oldest records, but timestamp is not an index.
// This might mostly "just work" if the cursor happens to iterate in insertion order
// eslint-disable-next-line no-await-in-loop -- Process one entry at a time
for await (const cursor of tx.store) {
if (abortController.signal.aborted) {
console.warn("Log sweep aborted due to timeout");
return;
}

await cursor.delete();
deletedCount++;
processedBatchCount++;

if (processedBatchCount >= SWEEP_LOGS_BATCH_SIZE) {
break;
}
}
}
}
Expand Down

0 comments on commit 73c5eb5

Please sign in to comment.