Skip to content

Commit

Permalink
use beautifulsoup to parse html
Browse files Browse the repository at this point in the history
  • Loading branch information
Buxdehuda committed Feb 4, 2024
1 parent 32ff8e7 commit de5c51f
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
strato-auth.json
venv
certbot.env
__pycache__
58 changes: 33 additions & 25 deletions certbotstratoapi.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Certbot-Strato-API Class"""
import os
import re
import sys
import urllib

import pyotp
import requests
from bs4 import BeautifulSoup


class CertbotStratoApi:
Expand Down Expand Up @@ -53,7 +54,8 @@ def login_2fa(
"""
# Is 2FA used
if not re.search(r'<h1>\s*Zwei-Faktor-Authentifizierung\s*<\/h1>', response.text):
soup = BeautifulSoup(response.text, 'html.parser')
if soup.find('h1', string=re.compile('Zwei\\-Faktor\\-Authentifizierung')) is not None:
print('INFO: 2FA is not used.')
return response
if (not totp_secret) or (not totp_devicename):
Expand All @@ -63,19 +65,18 @@ def login_2fa(
param = {'identifier': username}

# Set parameter 'totp_token'
result = re.search(
r'<input type="hidden" name="totp_token" '
r'value="(?P<totp_token>\w+)">',
response.text)
if result:
param['totp_token'] = result.group('totp_token')
totp_input = soup.find('input', attrs={'type': 'hidden', 'name': 'totp_token'})
if totp_input is not None:
param['totp_token'] = totp_input['value']
else:
print('ERROR: Parsing error on 2FA site by totp_token.')
return response

# Set parameter 'action_customer_login.x'
param['action_customer_login.x'] = 1

# No idea what this regex does
# TODO: rewrite with beautifulsoup
# Set parameter pw_id
for device in re.finditer(
rf'<option value="(?P<value>(S\.{username}\.\w*))"'
Expand Down Expand Up @@ -128,10 +129,10 @@ def login(
totp_secret, totp_devicename)

# Check successful login
result = re.search(r'sessionID=(\w+)', request.url)
if not result:
query_parameters = urllib.parse.parse_qs(request.url)
if 'sessionID' not in query_parameters:
return False
self.session_id = result.group(1)
self.session_id = query_parameters['sessionID'][0]
print(f'DEBUG: session_id: {self.session_id}')
return True

Expand All @@ -144,20 +145,25 @@ def get_package_id(self) -> None:
'cID': 0,
'node': 'kds_CustomerEntryPage',
})
result = re.search(
r'<div class="package-information">.+?<span\s+class="domains_\d+_long[^>]*>.+?'
+ self.second_level_domain_name.replace('.', r'\.')
+ r'.+?cID=(?P<cID>\d+)',
request.text.replace('\n', ' ')
)

if result is None:
print(f'ERROR: Domain {self.second_level_domain_name} not '
'found in strato packages. Using fallback cID=1')
self.package_id = 1
return
self.package_id = result.group('cID')
print(f'INFO: strato package id (cID): {self.package_id}')
soup = BeautifulSoup(request.text, 'html.parser')
package_element = soup.select_one(f'div.package-information:-soup-contains("{self.second_level_domain_name}")')
if package_element is not None:
if package_element.has_attr('id') and package_element['id'].startswith('package_information_'):
self.package_id = package_element['id'][20:] # remove prefix 'package_information_'
print(f'INFO: strato package id (cID): {self.package_id}')
return
else:
# Old page layout: still relevant?
customer_link = package_element.find_next('a', class_='customer-link')['href']
query_parameters = urllib.parse.parse_qs(customer_link)
if 'cID' in query_parameters:
self.package_id = query_parameters['cID'][0]
print(f'INFO: strato package id (cID): {self.package_id}')
return

print(f'ERROR: Domain {self.second_level_domain_name} not '
'found in strato packages. Using fallback cID=1')
self.package_id = 1


def get_txt_records(self) -> None:
Expand All @@ -169,6 +175,8 @@ def get_txt_records(self) -> None:
'action_show_txt_records': '',
'vhost': self.domain_name
})
# No idea what this regex does
# TODO: rewrite with beautifulsoup
for record in re.finditer(
r'<select [^>]*name="type"[^>]*>.*?'
r'<option[^>]*value="(?P<type>[^"]*)"[^>]*selected[^>]*>'
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
beautifulsoup4
pyotp
requests
4 changes: 3 additions & 1 deletion test/get_package_id_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def test_parse_second_level_domain(test_input, expected):
('test/multiple_domains_in_package.html', 'test.domain-b.com', '1'),
('test/multiple_domains_in_package.html', 'test.domain-c.eu', '1'),
('test/multiple_domains_in_package.html', 'test.domain-d.com', '1'),
('test/multiple_domains_in_package.html', 'test.domain-e.com', '1')
('test/multiple_domains_in_package.html', 'test.domain-e.com', '1'),
('test/package_ids_in_div_id.html', 'test.domain_a.de', '1'),
('test/package_ids_in_div_id.html', 'test.domain_b.de', '1')
])
def test_get_package_id(test_file, test_input, expected, requests_mock):
project_page = open(test_file, 'r').read()
Expand Down
5 changes: 5 additions & 0 deletions test/package_ids_in_div_id.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!-- from user @chrstn-hntschl https://github.com/Buxdehuda/strato-certbot/issues/19#issuecomment-1922205131 -->
<div id="package_information_1" class="package-information">
<p> domain_a.de </p>
<p> domain_b.de </p>
</div>

0 comments on commit de5c51f

Please sign in to comment.