From 852a889f82a67d641520772b066351ee2a50555e Mon Sep 17 00:00:00 2001 From: Amin Latifi Date: Mon, 10 Jun 2024 11:48:08 +0330 Subject: [PATCH] Added description summary --- db/migrations/1718005314786-Data.js | 11 + package-lock.json | 208 ++++++++++++++---- package.json | 2 + schema.graphql | 2 + src/constants.ts | 4 + .../import-projects/giveth/constants.ts | 2 +- .../import-projects/giveth/service.ts | 2 +- src/features/import-projects/giveth/type.ts | 2 +- src/features/import-projects/helpers.ts | 38 +++- src/model/generated/project.model.ts | 6 + 10 files changed, 227 insertions(+), 50 deletions(-) create mode 100644 db/migrations/1718005314786-Data.js diff --git a/db/migrations/1718005314786-Data.js b/db/migrations/1718005314786-Data.js new file mode 100644 index 0000000..dcd0928 --- /dev/null +++ b/db/migrations/1718005314786-Data.js @@ -0,0 +1,11 @@ +module.exports = class Data1718005314786 { + name = 'Data1718005314786' + + async up(db) { + await db.query(`ALTER TABLE "project" ADD "description_summary" text`) + } + + async down(db) { + await db.query(`ALTER TABLE "project" DROP COLUMN "description_summary"`) + } +} diff --git a/package-lock.json b/package-lock.json index 4ddebc4..b2c7a53 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ "@subsquid/typeorm-store": "^1.4.0", "dotenv": "^16.4.4", "ethers": "^6.12.1", + "html-to-text": "^9.0.5", "node-cron": "^3.0.3", "pg": "^8.11.5", "showdown": "^2.1.0", @@ -25,6 +26,7 @@ "@dotenvx/dotenvx": "^0.35.1", "@subsquid/evm-typegen": "^3.3.0", "@subsquid/typeorm-codegen": "^1.3.3", + "@types/html-to-text": "^9.0.4", "@types/jest": "^29.5.12", "@types/node": "^20.11.17", "@types/node-cron": "^3.0.11", @@ -3391,6 +3393,18 @@ } ] }, + "node_modules/@selderee/plugin-htmlparser2": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@selderee/plugin-htmlparser2/-/plugin-htmlparser2-0.11.0.tgz", + "integrity": "sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==", + "dependencies": { + "domhandler": "^5.0.3", + "selderee": "^0.11.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/@sentry/core": { "version": "5.30.0", "resolved": "https://registry.npmjs.org/@sentry/core/-/core-5.30.0.tgz", @@ -4234,6 +4248,12 @@ "@types/node": "*" } }, + "node_modules/@types/html-to-text": { + "version": "9.0.4", + "resolved": "https://registry.npmjs.org/@types/html-to-text/-/html-to-text-9.0.4.tgz", + "integrity": "sha512-pUY3cKH/Nm2yYrEmDlPR1mR7yszjGx4DrwPjQ702C4/D5CwHuZTgZdIdwPkRbcuhs7BAh2L5rg3CL5cbRiGTCQ==", + "dev": true + }, "node_modules/@types/http-errors": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.4.tgz", @@ -5934,9 +5954,9 @@ "dev": true }, "node_modules/cookie": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", - "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==", + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", + "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==", "engines": { "node": ">= 0.6" } @@ -6178,7 +6198,6 @@ "version": "4.3.1", "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", - "dev": true, "engines": { "node": ">=0.10.0" } @@ -6287,6 +6306,57 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dot-prop": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-6.0.1.tgz", @@ -6436,6 +6506,17 @@ "node": ">=8" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/env-paths": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", @@ -6815,16 +6896,16 @@ } }, "node_modules/express": { - "version": "4.18.2", - "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", - "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==", + "version": "4.19.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", + "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.1", + "body-parser": "1.20.2", "content-disposition": "0.5.4", "content-type": "~1.0.4", - "cookie": "0.5.0", + "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", @@ -6855,43 +6936,6 @@ "node": ">= 0.10.0" } }, - "node_modules/express/node_modules/body-parser": { - "version": "1.20.1", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz", - "integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==", - "dependencies": { - "bytes": "3.1.2", - "content-type": "~1.0.4", - "debug": "2.6.9", - "depd": "2.0.0", - "destroy": "1.2.0", - "http-errors": "2.0.0", - "iconv-lite": "0.4.24", - "on-finished": "2.4.1", - "qs": "6.11.0", - "raw-body": "2.5.1", - "type-is": "~1.6.18", - "unpipe": "1.0.0" - }, - "engines": { - "node": ">= 0.8", - "npm": "1.2.8000 || >= 1.4.16" - } - }, - "node_modules/express/node_modules/raw-body": { - "version": "2.5.1", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz", - "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==", - "dependencies": { - "bytes": "3.1.2", - "http-errors": "2.0.0", - "iconv-lite": "0.4.24", - "unpipe": "1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, "node_modules/ext": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/ext/-/ext-1.7.0.tgz", @@ -7713,6 +7757,39 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "node_modules/html-to-text": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/html-to-text/-/html-to-text-9.0.5.tgz", + "integrity": "sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==", + "dependencies": { + "@selderee/plugin-htmlparser2": "^0.11.0", + "deepmerge": "^4.3.1", + "dom-serializer": "^2.0.0", + "htmlparser2": "^8.0.2", + "selderee": "^0.11.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/http-errors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", @@ -9295,6 +9372,14 @@ "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==", "dev": true }, + "node_modules/leac": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/leac/-/leac-0.6.0.tgz", + "integrity": "sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==", + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/level": { "version": "8.0.1", "resolved": "https://registry.npmjs.org/level/-/level-8.0.1.tgz", @@ -10359,6 +10444,18 @@ "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" }, + "node_modules/parseley": { + "version": "0.12.1", + "resolved": "https://registry.npmjs.org/parseley/-/parseley-0.12.1.tgz", + "integrity": "sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==", + "dependencies": { + "leac": "^0.6.0", + "peberminta": "^0.9.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -10439,6 +10536,14 @@ "node": ">=0.12" } }, + "node_modules/peberminta": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/peberminta/-/peberminta-0.9.0.tgz", + "integrity": "sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==", + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/pg": { "version": "8.11.5", "resolved": "https://registry.npmjs.org/pg/-/pg-8.11.5.tgz", @@ -11214,6 +11319,17 @@ "node": ">=10.0.0" } }, + "node_modules/selderee": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/selderee/-/selderee-0.11.0.tgz", + "integrity": "sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==", + "dependencies": { + "parseley": "^0.12.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/semver": { "version": "7.6.0", "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz", diff --git a/package.json b/package.json index d1ef37c..fc87226 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@subsquid/typeorm-store": "^1.4.0", "dotenv": "^16.4.4", "ethers": "^6.12.1", + "html-to-text": "^9.0.5", "node-cron": "^3.0.3", "pg": "^8.11.5", "showdown": "^2.1.0", @@ -31,6 +32,7 @@ "@dotenvx/dotenvx": "^0.35.1", "@subsquid/evm-typegen": "^3.3.0", "@subsquid/typeorm-codegen": "^1.3.3", + "@types/html-to-text": "^9.0.4", "@types/jest": "^29.5.12", "@types/node": "^20.11.17", "@types/node-cron": "^3.0.11", diff --git a/schema.graphql b/schema.graphql index 8412066..54308af 100644 --- a/schema.graphql +++ b/schema.graphql @@ -53,6 +53,8 @@ type Project @entity { description: String "Html format of description" descriptionHtml: String + "Description summary in text" + descriptionSummary: String "Total attests with value True" totalVouches: Int! "Total attests with value False" diff --git a/src/constants.ts b/src/constants.ts index 453453f..c85f290 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -29,3 +29,7 @@ export const START_BLOCK = Number.parseInt( export const IMPORT_PROJECT_CRON_SCHEDULE = process.env.IMPORT_PROJECT_CRON_SCHEDULE || "0 0 * * *"; // UTC + +export const DESCRIPTION_SUMMARY_LENGTH = Number( + process.env.DESCRIPTION_SUMMARY_LENGTH || 300 +); diff --git a/src/features/import-projects/giveth/constants.ts b/src/features/import-projects/giveth/constants.ts index b13c48d..12a2169 100644 --- a/src/features/import-projects/giveth/constants.ts +++ b/src/features/import-projects/giveth/constants.ts @@ -7,7 +7,7 @@ export const givethSourceConfig: SourceConfig = { source: "giveth", idField: "id", titleField: "title", - descriptionField: "descriptionSummary", + descriptionField: "description", urlField: "url", imageField: "image", }; diff --git a/src/features/import-projects/giveth/service.ts b/src/features/import-projects/giveth/service.ts index 1b3a7ca..96be48e 100644 --- a/src/features/import-projects/giveth/service.ts +++ b/src/features/import-projects/giveth/service.ts @@ -16,7 +16,7 @@ export const fetchGivethProjectsBatch = async (limit: number, skip: number) => { title image slug - descriptionSummary + description } } }`, diff --git a/src/features/import-projects/giveth/type.ts b/src/features/import-projects/giveth/type.ts index a7e9588..bea67f5 100644 --- a/src/features/import-projects/giveth/type.ts +++ b/src/features/import-projects/giveth/type.ts @@ -1,7 +1,7 @@ export type GivethProjectInfo = { id: string; title: string; - descriptionSummary: string; + description: string; slug: string; image: string; }; diff --git a/src/features/import-projects/helpers.ts b/src/features/import-projects/helpers.ts index 543f6a6..f07ae6f 100644 --- a/src/features/import-projects/helpers.ts +++ b/src/features/import-projects/helpers.ts @@ -1,6 +1,8 @@ import { type DataSource } from "typeorm"; import { Project } from "../../model"; import { getDataSource } from "../../helpers/db"; +import { DESCRIPTION_SUMMARY_LENGTH } from "../../constants"; +import { convert } from "html-to-text"; export const updateOrCreateProject = async ( project: any, @@ -45,7 +47,12 @@ export const updateOrCreateProject = async ( existingProject.description !== description || existingProject.url !== url || existingProject.image !== image || - existingProject.descriptionHtml !== descriptionHtml; + existingProject.descriptionHtml !== descriptionHtml || + (!existingProject.descriptionSummary && description); + + const descriptionSummary = getHtmlTextSummary( + descriptionHtml || description + ); if (isUpdated) { const updatedProject = new Project({ @@ -55,6 +62,7 @@ export const updateOrCreateProject = async ( image, url, descriptionHtml, + descriptionSummary, lastUpdatedTimestamp: new Date(), imported: true, }); @@ -71,6 +79,9 @@ export const updateOrCreateProject = async ( ); } } else { + const descriptionSummary = getHtmlTextSummary( + descriptionHtml || description + ); const newProject = new Project({ id, title, @@ -78,6 +89,7 @@ export const updateOrCreateProject = async ( image, url, descriptionHtml, + descriptionSummary, projectId, source, totalVouches: 0, @@ -99,3 +111,27 @@ export const updateOrCreateProject = async ( ); } }; + +const getHtmlTextSummary = ( + html: string = "", + lengthLimit: number = DESCRIPTION_SUMMARY_LENGTH +): string => { + const text = convert(html, { + selectors: [ + { selector: "a", options: { ignoreHref: true } }, + { selector: "img", format: "skip" }, + ], + }) + .replace(/^\n+/, "") // Remove new lines from the beginning + .replace(/\n{2,}/g, "\n") // Replace multiple \n with single one + .replace(/\n$/, ""); // Remove new line from the end + + switch (true) { + case text.length <= lengthLimit: + return text; + case lengthLimit < 3: + return ".".repeat(Math.max(0, lengthLimit)); + default: + return text.slice(0, lengthLimit - 3) + "..."; + } +}; diff --git a/src/model/generated/project.model.ts b/src/model/generated/project.model.ts index 8f092c3..db209f1 100644 --- a/src/model/generated/project.model.ts +++ b/src/model/generated/project.model.ts @@ -46,6 +46,12 @@ export class Project { @Column_("text", {nullable: true}) descriptionHtml!: string | undefined | null + /** + * Description summary in text + */ + @Column_("text", {nullable: true}) + descriptionSummary!: string | undefined | null + /** * Total attests with value True */