diff --git a/.vscode/launch.json b/.vscode/launch.json index 13d18df76..e2850d0fb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,12 +19,26 @@ "cwd": "${workspaceFolder}/services/cron-jobs/crawler", "console": "integratedTerminal", "internalConsoleOptions": "neverOpen" + }, + { + "name": "Run import-conference-week-details dev", + "type": "node", + "request": "launch", + "runtimeExecutable": "pnpm", + "runtimeArgs": ["dev"], + "cwd": "${workspaceFolder}/services/cron-jobs/import-conference-week-details", + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen" } ], "compounds": [ { "name": "Debug crawler dev", "configurations": ["Run crawler dev", "Attach to Process"] + }, + { + "name": "Debug import-conference-week-details dev", + "configurations": ["Run import-conference-week-details dev", "Attach to Process"] } ] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6f2c78616..30799b34c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -612,6 +612,9 @@ importers: tsup-config: specifier: workspace:* version: link:../../../packages/tsup-config + tsx: + specifier: ^4.11.0 + version: 4.11.0 typescript: specifier: ^5.4.5 version: 5.4.5 @@ -8024,6 +8027,7 @@ packages: jsondiffpatch@0.1.43: resolution: {integrity: sha512-lvOkGuk7gl9Rr4M/SfN530TslMb9QZG9PM5uznjb6oVwkkHNt7rgPNOBO59mwegJ/0Msx/yjwYzdiuoET6a87Q==} hasBin: true + bundledDependencies: [] jsonfile@4.0.0: resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==} diff --git a/services/cron-jobs/import-conference-week-details/.env.local b/services/cron-jobs/import-conference-week-details/.env.local index 55aa66c7b..e0fe39eb5 100644 --- a/services/cron-jobs/import-conference-week-details/.env.local +++ b/services/cron-jobs/import-conference-week-details/.env.local @@ -1,4 +1,4 @@ DB_URL=mongodb://localhost/bundestagio -CONFERENCE_WEEK_DETAIL_YEAR=2021 -CONFERENCE_WEEK_DETAIL_WEEK=45 -DEBUG=* \ No newline at end of file +CONFERENCE_WEEK_DETAIL_YEAR=2023 +CONFERENCE_WEEK_DETAIL_WEEK=25 +DEBUG= \ No newline at end of file diff --git a/services/cron-jobs/import-conference-week-details/package.json b/services/cron-jobs/import-conference-week-details/package.json index 6503bd09b..601776f05 100644 --- a/services/cron-jobs/import-conference-week-details/package.json +++ b/services/cron-jobs/import-conference-week-details/package.json @@ -4,7 +4,7 @@ "main": "build/index.js", "license": "Apache-2.0", "scripts": { - "dev": "dotenv -e .env.local -- tsup src/index.ts --watch --onSuccess 'node build/index.js'", + "dev": "tsx --env-file .env --env-file .env.local --watch src/index.ts", "build": "tsup-node", "lint": "pnpm lint:ts && pnpm lint:exports", "lint:ts": "tsc --noEmit", @@ -26,6 +26,7 @@ "tsconfig": "workspace:*", "tsup": "catalog:", "tsup-config": "workspace:*", + "tsx": "^4.11.0", "typescript": "^5.4.5" } } diff --git a/services/cron-jobs/import-conference-week-details/src/index.ts b/services/cron-jobs/import-conference-week-details/src/index.ts index a9d062527..dfbc99c81 100644 --- a/services/cron-jobs/import-conference-week-details/src/index.ts +++ b/services/cron-jobs/import-conference-week-details/src/index.ts @@ -102,6 +102,111 @@ const getProcedureIds = async (documents: any) => { return procedures.map((p) => p.procedureId); }; +const updateConferenceWeekDetail = async (dataPackage: any, voteDates: any[], lastProcedureIds: any[]) => { + console.debug(dataPackage); + const ConferenceWeekDetail = { + URL: dataPackage.meta.url, + id: dataPackage.data.id, + previousYear: dataPackage.data.previous.year, + previousWeek: dataPackage.data.previous.week, + thisYear: dataPackage.data.this.year ?? dataPackage.meta.currentYear, + thisWeek: dataPackage.data.this.week ?? dataPackage.meta.currentWeek, + nextYear: dataPackage.data.next.year, + nextWeek: dataPackage.data.next.week, + sessions: await dataPackage.data.sessions.reduce(async (pSession: any, session: any) => { + const resultSession = await pSession; + resultSession.push({ + ...session, + tops: await session.tops.reduce(async (pTop: any, top: any) => { + // Await for last result + const resultTop = await pTop; + // Write VoteEnd Date + lastProcedureIds.forEach((procedureId) => { + if (voteDates[procedureId].voteDate && voteDates[procedureId].voteDate <= top.time) { + voteDates[procedureId].voteEnd = top.time; + } + }); + lastProcedureIds = []; + // Append current result + resultTop.push({ + ...top, + topic: await Promise.all( + top.topic.map(async (topic: any) => { + // eslint-disable-next-line no-param-reassign + topic.isVote = isVote(topic.lines.join(' '), top.heading, topic.documents, top.status); + topic.procedureIds = await getProcedureIds(topic.documents); // eslint-disable-line no-param-reassign + // Save VoteDates to update them at the end when the correct values are present + topic.procedureIds.forEach((procedureId: any) => { + // Override voteDate only if there is none set or we would override it by a new date + if (!voteDates[procedureId] || !voteDates[procedureId].voteDate || topic.isVote === true) { + voteDates[procedureId] = { + procedureId, + voteDate: topic.isVote ? top.time : null, + voteEnd: null, + documents: topic.documents, + }; + } + }); + // Remember last procedureIds to save voteEnd Date + lastProcedureIds = lastProcedureIds.concat(topic.procedureIds); + return topic; + }), + ), + }); + return resultTop; + }, []), + }); + return resultSession; + }, []), + }; + // Update/Insert with unique index handling + await ConferenceWeekDetailModel.updateOne( + { id: ConferenceWeekDetail.id }, + { $set: ConferenceWeekDetail }, + { upsert: true }, + ).catch((error) => { + if (error.code === 11000) { + console.warn('Duplicate key error, updating existing document'); + ConferenceWeekDetailModel.updateOne( + { nextYear: ConferenceWeekDetail.nextYear, nextWeek: ConferenceWeekDetail.nextWeek }, + { $set: ConferenceWeekDetail }, + ).catch(console.error); + } else { + console.error('Error while updating ConferenceWeekDetail'); + console.debug('Error details: ', error); + } + }); +}; + +const updateProcedureVoteDates = async (voteDates: any[]) => { + await Promise.all( + voteDates.map(async (procedureUpdate) => { + await ProcedureModel.updateOne( + { + procedureId: procedureUpdate.procedureId, + // Update only when needed + $or: [ + { + $and: [ + { voteDate: { $ne: procedureUpdate.voteDate } }, + // Make sure we do not override date from procedureScraper + { voteDate: { $lt: procedureUpdate.voteDate } }, + ], + }, + { voteEnd: { $ne: procedureUpdate.voteEnd } }, + ], + }, + { + $set: { + voteDate: procedureUpdate.voteDate, + voteEnd: procedureUpdate.voteEnd, + }, + }, + ); + }), + ); +}; + const start = async () => { const startDate = new Date(); const cron = await getCron({ name: CRON_NAME }); @@ -109,126 +214,45 @@ const start = async () => { await setCronStart({ name: CRON_NAME, startDate }); try { - const startData = - cron.data?.lastYear && cron.lastSuccessStartDate?.getDay() === new Date().getDay() - ? { - year: cron.data.lastYear, - week: cron.data.lastWeek, - } - : { - year: process.env.CONFERENCE_WEEK_DETAIL_YEAR ? Number(process.env.CONFERENCE_WEEK_DETAIL_YEAR) : 2022, - week: process.env.CONFERENCE_WEEK_DETAIL_WEEK ? Number(process.env.CONFERENCE_WEEK_DETAIL_WEEK) : 2, - }; + const startData = getStartData(cron); let voteDates: any[] = []; - let lastProcedureIds: any[] = []; + const lastProcedureIds: any[] = []; await Scraper.scrape(new ConferenceWeekDetailScraper(startData), async (dataPackage: any) => { - // Construct Database object - lastData = { lastYear: dataPackage.data.previous.year, lastWeek: dataPackage.data.previous.week, }; - - const ConferenceWeekDetail = { - URL: dataPackage.meta.url, - id: dataPackage.data.id, - previousYear: dataPackage.data.previous.year, - previousWeek: dataPackage.data.previous.week, - thisYear: dataPackage.data.this.year ?? dataPackage.meta.currentYear, - thisWeek: dataPackage.data.this.week ?? dataPackage.meta.currentWeek, - nextYear: dataPackage.data.next.year, - nextWeek: dataPackage.data.next.week, - sessions: await dataPackage.data.sessions.reduce(async (pSession: any, session: any) => { - const resultSession = await pSession; - resultSession.push({ - ...session, - tops: await session.tops.reduce(async (pTop: any, top: any) => { - // Await for last result - const resultTop = await pTop; - // Write VoteEnd Date - lastProcedureIds.forEach((procedureId) => { - if (voteDates[procedureId].voteDate && voteDates[procedureId].voteDate <= top.time) { - voteDates[procedureId].voteEnd = top.time; - } - }); - lastProcedureIds = []; - // Append current result - resultTop.push({ - ...top, - topic: await Promise.all( - top.topic.map(async (topic: any) => { - // eslint-disable-next-line no-param-reassign - topic.isVote = isVote(topic.lines.join(' '), top.heading, topic.documents, top.status); - topic.procedureIds = await getProcedureIds(topic.documents); // eslint-disable-line no-param-reassign - // Save VoteDates to update them at the end when the correct values are present - topic.procedureIds.forEach((procedureId: any) => { - // Override voteDate only if there is none set or we would override it by a new date - if (!voteDates[procedureId] || !voteDates[procedureId].voteDate || topic.isVote === true) { - voteDates[procedureId] = { - procedureId, - voteDate: topic.isVote ? top.time : null, - voteEnd: null, - documents: topic.documents, - }; - } - }); - // Remember last procedureIds to save voteEnd Date - lastProcedureIds = lastProcedureIds.concat(topic.procedureIds); - return topic; - }), - ), - }); - return resultTop; - }, []), - }); - return resultSession; - }, []), - }; - // Update/Insert - await ConferenceWeekDetailModel.updateOne( - { id: ConferenceWeekDetail.id }, - { $set: ConferenceWeekDetail }, - { upsert: true }, - ).catch(console.error); + await updateConferenceWeekDetail(dataPackage, voteDates, lastProcedureIds); }); + voteDates = voteDates.filter((voteDate) => !!voteDate); - // Update Procedure VoteDates - await Promise.all( - voteDates.map(async (procedureUpdate) => { - await ProcedureModel.updateOne( - { - procedureId: procedureUpdate.procedureId, - // Update only when needed - $or: [ - { - $and: [ - { voteDate: { $ne: procedureUpdate.voteDate } }, - // Make sure we do not override date from procedureScraper - { voteDate: { $lt: procedureUpdate.voteDate } }, - ], - }, - { voteEnd: { $ne: procedureUpdate.voteEnd } }, - ], - }, - { - $set: { - voteDate: procedureUpdate.voteDate, - voteEnd: procedureUpdate.voteEnd, - }, - }, - ); - }), - ); + await updateProcedureVoteDates(voteDates); + + await setCronSuccess({ + name: CRON_NAME, + successStartDate: startDate, + data: lastData, + }); } catch (error) { await setCronError({ name: CRON_NAME, error: JSON.stringify(error) }); - throw error; + + console.error('ERROR'); + console.debug('Error details: ', error); + // throw error; } - await setCronSuccess({ - name: CRON_NAME, - successStartDate: startDate, - data: lastData, - }); +}; + +const getStartData = (cron: any) => { + return cron.data?.lastYear && cron.lastSuccessStartDate?.getDay() === new Date().getDay() + ? { + year: cron.data.lastYear, + week: cron.data.lastWeek, + } + : { + year: process.env.CONFERENCE_WEEK_DETAIL_YEAR ? Number(process.env.CONFERENCE_WEEK_DETAIL_YEAR) : 2023, + week: process.env.CONFERENCE_WEEK_DETAIL_WEEK ? Number(process.env.CONFERENCE_WEEK_DETAIL_WEEK) : 25, + }; }; (async () => {