-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy path08-duplicate-job-listings.sql
48 lines (32 loc) · 1.32 KB
/
08-duplicate-job-listings.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
-- Assume you're given a table containing job postings from various companies on the LinkedIn platform. Write a query to retrieve the count of companies that have posted duplicate job listings.
-- Definition:
-- Duplicate job listings are defined as two job listings within the same company that share identical titles and descriptions.
-- Solution 1 : using subquery
SELECT Count(DISTINCT company_id) AS duplicate_companies
FROM (SELECT company_id,
title,
description,
Count(job_id) AS job_count
FROM job_listings
GROUP BY 1,2,3) cte
WHERE job_count > 1
-- Solution 2 : using CTE
WITH CTE AS (
SELECT company_id,
title,
description,
Count(job_id) AS job_count
FROM job_listings
GROUP BY 1, 2, 3
)
SELECT COUNT(DISTINCT company_id) AS duplicate_companies
FROM CTE
WHERE job_count > 1;
-- Solution 3 : more similar to my approach
SELECT COUNT(DISTINCT company_id) as duplicate_companies
FROM job_listings a JOIN job_listings b USING(company_id)
WHERE a.job_id <> b.job_id AND a.title=b.title AND a.description=b.description;
-- first approach :
SELECT COUNT(DISTINCT a.job_id) as duplicate_companies
FROM job_listings a, job_listings b
WHERE a.description = b.description