Skip to content

Commit

Permalink
Merge pull request #639 from Pandoril/fix/problems-with-kleinanzeigen
Browse files Browse the repository at this point in the history
Fix: Several problems with Kleinanzeigen
  • Loading branch information
codders authored Dec 8, 2024
2 parents c00602a + b8afb3c commit b20c6df
Showing 1 changed file with 19 additions and 17 deletions.
36 changes: 19 additions & 17 deletions flathunter/crawler/kleinanzeigen.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,22 @@ def extract_data(self, soup):
entries = []
soup = soup.find(id="srchrslt-adtable")

try:
title_elements = soup.find_all(lambda e: e.has_attr('class')
and 'ellipsis' in e['class'])
except AttributeError:
return entries
exposes = soup.find_all("article", class_="aditem")
for expose in exposes:

expose_ids = soup.find_all("article", class_="aditem")
title_elem = expose.find(class_="ellipsis")
if title_elem.get("href"):
url = title_elem.get("href")
else:
# If there is no title element, just continue since we can't provide an URL
continue

for idx, title_el in enumerate(title_elements):
try:
price = expose_ids[idx].find(
price = expose.find(
class_="aditem-main--middle--price-shipping--price").text.strip()
tags = expose_ids[idx].find_all(class_="simpletag")
address = expose_ids[idx].find("div", {"class": "aditem-main--top--left"})
image_element = expose_ids[idx].find("div", {"class": "galleryimage-element"})
tags = expose.find_all(class_="simpletag")
address = expose.find("div", {"class": "aditem-main--top--left"})
image_element = expose.find("div", {"class": "galleryimage-element"})
except AttributeError as error:
logger.warning("Unable to process eBay expose: %s", str(error))
continue
Expand All @@ -73,19 +74,20 @@ def extract_data(self, soup):

rooms = ""
if len(tags) > 1:
rooms_match = re.match(r'(\d+)', tags[1].text)
rooms_match = re.search(r'\d+[.|,]*\d*', tags[1].text, flags=re.MULTILINE)
if rooms_match is not None:
rooms = rooms_match[1]
rooms = rooms_match.group()

try:
size = tags[0].text
size = tags[0].text.strip()
except (IndexError, TypeError):
size = ""

details = {
'id': int(expose_ids[idx].get("data-adid")),
'id': int(expose.get("data-adid")),
'image': image,
'url': ("https://www.kleinanzeigen.de" + title_el.get("href")),
'title': title_el.text.strip(),
'url': ("https://www.kleinanzeigen.de" + url),
'title': title_elem.text.strip(),
'price': price,
'size': size,
'rooms': rooms,
Expand Down

0 comments on commit b20c6df

Please sign in to comment.