Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
sidalidev committed Dec 18, 2024
1 parent 3b848d6 commit f6ec8d9
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 49 deletions.
64 changes: 64 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Node.js
node_modules/
npm-debug.log
yarn-error.log

# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
jspm_packages/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# dotenv environment variables file
.env

# MacOS
.DS_Store

# IDEs and editors
.idea/
.vscode/
*.sublime-workspace
39 changes: 39 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Price Scraper Project

Ce projet utilise Playwright pour scraper le site Dealabs et récupérer le produit du jour. Les informations sont ensuite envoyées à un backend Node.js. Le scrapper peut être exécuté manuellement ou automatiquement toutes les 5 minutes grâce à une tâche CRON.

## Prérequis

- **Node.js** : Version 20.x ou supérieure est recommandée.
- **npm** : Assurez-vous que npm est installé avec Node.js.

## Installation

1. Clonez le dépôt :

````bash
git clone <URL_DU_DEPOT>
cd scrap-and-back ```
````
2. Installez les dépendances :
````bash
npm install ```

````

3. Installez les dépendances de développement :
````bash
npm install --save-dev typescript @types/node ```
## Configuration
1. Assurez-vous que le fichier `tsconfig.json` est correctement configuré pour votre environnement.
2. Modifiez les sélecteurs CSS dans `src/scraper.ts` si nécessaire pour cibler les éléments corrects sur Dealabs.
## Compilation
Compilez le projet TypeScript en JavaScript :
18 changes: 0 additions & 18 deletions runScraper.js

This file was deleted.

23 changes: 14 additions & 9 deletions scraper.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
const { chromium } = require('playwright')

async function scrapePrices() {
console.log('Running scraper...')

const browser = await chromium.launch({ headless: false })
const page = await browser.newPage()
await page.goto('https://www.dealabs.com')

await page.getByRole('button', { name: 'Tout accepter' }).click()
await page.getByRole('button', { name: 'Les + hot' }).click()
await page.pause()
await page.waitForTimeout(1000)

const productOfTheDay = await page.evaluate(() => {
const productElement = document.querySelector('.top-deal');
const productElement = document.querySelector('article[id^="thread_"]')

console.log('productElement', productElement)
if (productElement) {
return {
title: productElement.querySelector('.thread-title').textContent.trim(),
price: productElement.querySelector('.thread-price').textContent.trim(),
link: productElement.querySelector('a').href
};
return {
title: productElement.querySelector('.thread-title').textContent.trim(),
price: productElement.querySelector('.thread-price').textContent.trim(),
link: productElement.querySelector('a').href,
}
}
return null;
return null
})

await browser.close()
console.log('productOfTheDay', productOfTheDay)
return productOfTheDay
}
scrapePrices()
module.exports = scrapePrices
42 changes: 20 additions & 22 deletions server.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
const express = require('express');
const bodyParser = require('body-parser');
const cron = require('node-cron');
const scrapePrices = require('./scraper');
const axios = require('axios');
const express = require('express')
const bodyParser = require('body-parser')
const cron = require('node-cron')
const scrapePrices = require('./scraper')
const axios = require('axios')

const app = express();
app.use(bodyParser.json());
const app = express()
app.use(bodyParser.json())

app.post('/prices', (req, res) => {
const prices = req.body;
console.log('Received prices:', prices);
res.status(200).send('Prices received');
});
// app.post('/prices', (req, res) => {
// const prices = req.body
// console.log('Received prices:', prices)
// res.status(200).send('Prices received')
// })

cron.schedule('*/5 * * * *', async () => {
console.log('Running scraper...');
const prices = await scrapePrices();
await axios.post('http://localhost:3000/prices', prices)
.then(response => console.log(response.data))
.catch(error => console.error('Error sending prices:', error));
});
cron.schedule('* * * * *', async () => {
const prices = await scrapePrices()
console.log('prices', prices)
})

const PORT = 3000;
const PORT = 3000
app.listen(PORT, () => {
console.log(`Server is running on port ${PORT}`);
});
scrapePrices()
console.log(`Server is running on port ${PORT}`)
})
18 changes: 18 additions & 0 deletions src/runScraper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import scrapePrices from './scraper';
import axios from 'axios';

async function runScraper() {
try {
const productOfTheDay = await scrapePrices();
if (productOfTheDay) {
await axios.post('http://localhost:3000/prices', productOfTheDay);
console.log('Product of the day sent:', productOfTheDay);
} else {
console.log('No product of the day found.');
}
} catch (error) {
console.error('Error running scraper:', error);
}
}

runScraper();
28 changes: 28 additions & 0 deletions src/scraper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { chromium } from 'playwright';

async function scrapePrices(): Promise<{ title: string; price: string; link: string } | null> {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://www.dealabs.com');

await page.getByRole('button', { name: 'Tout accepter' }).click();
await page.getByRole('button', { name: 'Les + hot' }).click();
await page.pause();
const productOfTheDay = await page.evaluate(() => {
const productElement = document.querySelector('.top-deal');
if (productElement) {
return {
title: (productElement.querySelector('.thread-title') as HTMLElement).textContent?.trim() || '',
price: (productElement.querySelector('.thread-price') as HTMLElement).textContent?.trim() || '',
link: (productElement.querySelector('a') as HTMLAnchorElement).href
};
}
return null;
});

await browser.close();
return productOfTheDay;
}

scrapePrices();
export default scrapePrices;
29 changes: 29 additions & 0 deletions src/server.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import express from 'express';
import bodyParser from 'body-parser';
import cron from 'node-cron';
import scrapePrices from './scraper';
import axios from 'axios';

const app = express();
app.use(bodyParser.json());

app.post('/prices', (req, res) => {
const prices = req.body;
console.log('Received prices:', prices);
res.status(200).send('Prices received');
});

cron.schedule('*/5 * * * *', async () => {
console.log('Running scraper...');
const prices = await scrapePrices();
if (prices) {
await axios.post('http://localhost:3000/prices', prices)
.then(response => console.log(response.data))
.catch(error => console.error('Error sending prices:', error));
}
});

const PORT = 3000;
app.listen(PORT, () => {
console.log(`Server is running on port ${PORT}`);
});
14 changes: 14 additions & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "ES6",
"module": "commonjs",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}

0 comments on commit f6ec8d9

Please sign in to comment.