From b25c1e182a5d3116b7d851e55048735bb3585dbe Mon Sep 17 00:00:00 2001
From: TofyLion <tofy2001@hotmail.com>
Date: Sun, 8 Sep 2024 19:43:09 +0300
Subject: [PATCH] Updated crawler for new immowelt website

---
 flathunter/crawler/immowelt.py | 56 +++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/flathunter/crawler/immowelt.py b/flathunter/crawler/immowelt.py
index 8f6b113f..070c58fa 100644
--- a/flathunter/crawler/immowelt.py
+++ b/flathunter/crawler/immowelt.py
@@ -49,61 +49,68 @@ def get_expose_details(self, expose):
     def extract_data(self, soup: BeautifulSoup):
         """Extracts all exposes from a provided Soup object"""
         entries = []
-        soup_res = soup.find("main")
+        soup_res = soup
         if not isinstance(soup_res, Tag):
             return []
 
-        title_elements = soup_res.find_all("h2")
-        expose_ids = soup_res.find_all("a", id=True)
+        advertisements = soup_res.find_all("div", attrs={"class": "css-79elbk"})
+        for adv in advertisements:
+            try:
+                title = adv.find("div", {"class": "css-1cbj9xw"}).text
+            except:
+                title = ""
 
-        for idx, title_el in enumerate(title_elements):
             try:
-                price = expose_ids[idx].find(
-                    "div", attrs={"data-test": "price"}).text
-            except IndexError:
+                price = adv.find(
+                    "div", attrs={"data-test": "cardmfe-price-testid"}).text
+            except:
                 price = ""
 
             try:
-                size = expose_ids[idx].find(
-                    "div", attrs={"data-test": "area"}).text
+                descriptions = adv.find("div", attrs={"data-testid": "cardmfe-keyfacts-testid"}).children
+                descriptions = [result.text for result in descriptions]
+            except:
+                descriptions = []
+            
+            size = list(filter(lambda x: "m²" in x, descriptions))
+            try:
+                size = size[0]
             except IndexError:
                 size = ""
 
+            rooms = list(filter(lambda x: "Zimmer" in x, descriptions))
             try:
-                rooms = expose_ids[idx].find(
-                    "div", attrs={"data-test": "rooms"}).text.replace(" Zi.", "")
+                rooms = rooms[0]
             except IndexError:
                 rooms = ""
 
+            id_element = adv.find("a")
             try:
-                url = expose_ids[idx].get("href")
+                url = "https://immowelt.de" + id_element.get("href")
             except IndexError:
                 continue
 
-            picture = expose_ids[idx].find("picture")
+            picture = adv.find("img")
             image = None
             if picture:
-                src = picture.find("source")
-                if src:
-                    image = src.get("data-srcset")
+                image = picture.get('src')
 
             try:
-                address = expose_ids[idx].find(
-                    "div", attrs={"class": re.compile("IconFact.*")}
-                  )
-                address = address.find("span").text
+                address = adv.find(
+                    "div", attrs={"data-testid": "cardmfe-description-box-address"}
+                  ).text
             except (IndexError, AttributeError):
                 address = ""
-
+            ad_id = url.split('/')[-1]
             processed_id = int(
-              hashlib.sha256(expose_ids[idx].get("id").encode('utf-8')).hexdigest(), 16
+              hashlib.sha256(ad_id.encode('utf-8')).hexdigest(), 16
             ) % 10**16
 
             details = {
                 'id': processed_id,
                 'image': image,
                 'url': url,
-                'title': title_el.text.strip(),
+                'title': title.strip(),
                 'rooms': rooms,
                 'price': price,
                 'size': size,
@@ -113,5 +120,4 @@ def extract_data(self, soup: BeautifulSoup):
             entries.append(details)
 
         logger.debug('Number of entries found: %d', len(entries))
-
-        return entries
+        return entries
\ No newline at end of file