Skip to content

Commit

Permalink
Merge branch 'flathunters:main' into Alexandero89-live_update_config_…
Browse files Browse the repository at this point in the history
…file
  • Loading branch information
Alexandero89 authored Feb 6, 2024
2 parents 4c07278 + 73a6229 commit 90d0a8a
Show file tree
Hide file tree
Showing 9 changed files with 630 additions and 588 deletions.
6 changes: 3 additions & 3 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ coverage = "*"
requests-mock = "*"
Flask = ">=3"
werkzeug = ">=3"
firebase-admin = "*"
firebase-admin = "6.2.0"
mock-firestore = "*"
pytest-mock = "*"
pytest = "*"
Expand All @@ -28,11 +28,11 @@ python-dotenv = "*"
undetected-chromedriver = "*"
prompt-toolkit = "*"
"ruamel.yaml" = "*"
pyright = "*"
pyright = "1.1.350"
types-beautifulsoup4 = "*"
selenium = "*"
gunicorn = "21.2.0"
flask-api = {editable = true, ref = "bugfix/159/remove-werkzeug-deprecated-calls", git = "git+https://github.com/codders/flask-api.git"}
flask-api = {editable = true, ref = "develop", git = "git+https://github.com/flask-api/flask-api.git"}

[dev-packages]

Expand Down
1,123 changes: 572 additions & 551 deletions Pipfile.lock

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions flathunter/chrome_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
CHROME_VERSION_REGEXP = re.compile(r'.* (\d+\.\d+\.\d+\.\d+)( .*)?')
WINDOWS_CHROME_REG_PATH = r'HKEY_CURRENT_USER\Software\Google\Chrome\BLBeacon'
WINDOWS_CHROME_REG_REGEXP = re.compile(r'\s*version\s*REG_SZ\s*(\d+)\..*')
CHROME_BINARY_NAMES = ['google-chrome', 'chromium', 'chrome', 'chromium-browser',
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome']

def get_command_output(args) -> List[str]:
"""Run a command and return stdout"""
Expand All @@ -28,8 +30,7 @@ def get_command_output(args) -> List[str]:

def get_chrome_version() -> int:
"""Determine the correct name for the chrome binary"""
for binary_name in ['google-chrome', 'chromium', 'chrome',
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome']:
for binary_name in CHROME_BINARY_NAMES:
try:
version_output = get_command_output([binary_name, '--version'])
if not version_output:
Expand Down Expand Up @@ -64,9 +65,7 @@ def get_chrome_driver(driver_arguments):
for driver_argument in driver_arguments:
chrome_options.add_argument(driver_argument)
chrome_version = get_chrome_version()
# something is weird with the patched driver version (maybe only in python3.11), I had to patch
# the chrome options to make it work
setattr(chrome_options, "headless", True)
chrome_options.add_argument("--headless=new")
driver = uc.Chrome(version_main=chrome_version, options=chrome_options) # pylint: disable=no-member

driver.execute_cdp_cmd(
Expand Down
18 changes: 9 additions & 9 deletions flathunter/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Wrap configuration options as an object"""
import os
from typing import Optional, Dict, Any
from typing import Optional, Dict, Any, List

import json
import yaml
Expand Down Expand Up @@ -161,7 +161,7 @@ def get(self, key, value=None):
"""Emulate dictionary"""
return self.config.get(key, value)

def _read_yaml_path(self, path, default_value=None):
def _read_yaml_path(self, path, default_value):
"""Resolve a dotted variable path in nested dictionaries"""
config = self.config
parts = path.split('.')
Expand Down Expand Up @@ -203,18 +203,18 @@ def get_captcha_afterlogin_string(self):

def database_location(self):
"""Return the location of the database folder"""
config_database_location = self._read_yaml_path('database_location')
config_database_location = self._read_yaml_path('database_location', None)
if config_database_location is not None:
return config_database_location
return os.path.abspath(os.path.dirname(os.path.abspath(__file__)) + "/..")

def target_urls(self):
def target_urls(self) -> List[str]:
"""List of target URLs for crawling"""
return self._read_yaml_path('urls', [])

def verbose_logging(self):
"""Return true if logging should be verbose"""
return self._read_yaml_path('verbose') is not None
return self._read_yaml_path('verbose', None) is not None

def loop_is_active(self):
"""Return true if flathunter should be crawling in a loop"""
Expand Down Expand Up @@ -263,7 +263,7 @@ def message_format(self):
return config_format
return self.DEFAULT_MESSAGE_FORMAT

def notifiers(self):
def notifiers(self) -> List[str]:
"""List of currently-active notifiers"""
return self._read_yaml_path('notifiers', [])

Expand All @@ -279,7 +279,7 @@ def telegram_notify_with_images(self) -> bool:

def telegram_receiver_ids(self):
"""Static list of receiver IDs for notification messages"""
return self._read_yaml_path('telegram.receiver_ids') or []
return self._read_yaml_path('telegram.receiver_ids', [])

def mattermost_webhook_url(self):
"""Webhook for sending Mattermost messages"""
Expand All @@ -289,15 +289,15 @@ def slack_webhook_url(self):
"""Webhook for sending Slack messages"""
return self._read_yaml_path('slack.webhook_url', "")

def apprise_urls(self):
def apprise_urls(self) -> List[str]:
"""Notification URLs for Apprise"""
return self._read_yaml_path('apprise', [])

def _get_imagetyperz_token(self):
"""API Token for Imagetyperz"""
return self._read_yaml_path("captcha.imagetyperz.token", "")

def get_twocaptcha_key(self):
def get_twocaptcha_key(self) -> str:
"""API Token for 2captcha"""
return self._read_yaml_path("captcha.2captcha.api_key", "")

Expand Down
4 changes: 3 additions & 1 deletion flathunter/crawler/immobilienscout.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,12 @@ def get_entries_from_javascript(self):

def get_entries_from_json(self, json):
"""Get entries from JSON"""
return [
entries = [
self.extract_entry_from_javascript(entry.value)
for entry in self.JSON_PATH_PARSER_ENTRIES.find(json)
]
logger.debug('Number of found entries: %d', len(entries))
return entries

def extract_entry_from_javascript(self, entry):
"""Get single entry from JavaScript"""
Expand Down
2 changes: 1 addition & 1 deletion flathunter/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class AbstractFilter(ABC):
"""Abstract base class for filters"""

def is_interesting(self, _expose):
def is_interesting(self, _expose) -> bool:
"""Return True if an expose should be included in the output, False otherwise"""
return True

Expand Down
2 changes: 1 addition & 1 deletion flathunter/hunter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def try_crawl(searcher, url, max_pages):
for searcher in self.config.searchers()
for url in self.config.target_urls()])

def hunt_flats(self, max_pages=None):
def hunt_flats(self, max_pages: None|int = None):
"""Crawl, process and filter exposes"""
filter_set = Filter.builder() \
.read_config(self.config) \
Expand Down
2 changes: 1 addition & 1 deletion flathunter/web/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from urllib import parse

from flask import render_template, jsonify, request, session, redirect
from flask_api import status
from flask_api import status # type: ignore

from flathunter.web import app, log
from flathunter.web.util import sanitize_float
Expand Down
52 changes: 36 additions & 16 deletions test/test_chrome_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,45 @@
import unittest
from unittest.mock import patch

from flathunter.chrome_wrapper import get_chrome_version
from flathunter.chrome_wrapper import get_chrome_version, CHROME_BINARY_NAMES
from flathunter.exceptions import ChromeNotFound

CHROME_VERSION_RESULTS = [
[], [], [],
[], ['Chromium 107.0.5304.87 built on Debian bookworm/sid, running on Debian bookworm/sid'],
['Google Chrome 107.0.5304.110'],
['Chromium 107.0.5304.87 built on Debian 11.5, running on Debian 11.5'],
[], [], [],
]

def calc_linux_binary_names():
"""
Creates a list containing empty lists for each name in CHROME_BINARY_NAMES that does not start with a forward slash.
"""
return [[] for name in CHROME_BINARY_NAMES if not name.startswith('/')]


"""
The list of mock commands get_command_output should return as an output.
The first returns should all be empty [] so the get_chrome_version function at flathunter/chrome_wrapper.py:31
thinks no linux chrome is installed and then checks for windows registry entry at flathunter/chrome_wrapper.py:46
Therefore prepending calc_linux_binary_names().
Append the same amount empty returns to the end so flathunter/chrome_wrapper.py:31 is forced to check for windows
again and self.assertEqual(get_chrome_version(), 116) works out
"""
CHROME_VERSION_RESULTS = calc_linux_binary_names() + [
['Chromium 107.0.5304.87 built on Debian bookworm/sid, running on Debian bookworm/sid'],
['Google Chrome 107.0.5304.110'],
['Chromium 107.0.5304.87 built on Debian 11.5, running on Debian 11.5'],
] + calc_linux_binary_names()

"""
The first return should be empty ([]) so the system thinks no chrome installed at all and
self.assertEqual(get_chrome_version(), None) works out correctly
"""
REG_VERSION_RESULTS = [
[],
[
'',
r'HKEY_CURRENT_USER\Software\Google\Chrome\BLBeacon',
' version REG_SZ 116.0.5845.141',
'',
]
]
[],
[
'',
r'HKEY_CURRENT_USER\Software\Google\Chrome\BLBeacon',
' version REG_SZ 116.0.5845.141',
'',
]
]

def my_subprocess_mock(args, static={ 'chrome_calls': 0, 'reg_calls': 0 }):
if 'chrom' in args[0]:
Expand Down

0 comments on commit 90d0a8a

Please sign in to comment.