Skip to content

Commit

Permalink
Fix javascript routing problem, add check for project to match URL, f…
Browse files Browse the repository at this point in the history
…ix line endings, add validation tests
  • Loading branch information
audiodude committed Jul 16, 2023
1 parent 3446aca commit e13463c
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 221 deletions.
2 changes: 1 addition & 1 deletion wp1-frontend/src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ const routes = [
},
{
path: '/selections/book/:builder_id',
component: PetscanBuilder,
component: BookBuilder,
meta: {
title: () => BASE_TITLE + ' - Edit Book Selection',
},
Expand Down
156 changes: 78 additions & 78 deletions wp1/selection/models/book.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,78 @@
import logging
import urllib

import mwparserfromhell
import requests
import validators

from wp1.constants import WP1_USER_AGENT
from wp1.exceptions import Wp1FatalSelectionError
from wp1.selection.abstract_builder import AbstractBuilder

logger = logging.getLogger(__name__)


class Builder(AbstractBuilder):

def build(self, content_type, **params):
if content_type != 'text/tab-separated-values':
raise Wp1FatalSelectionError('Unrecognized content type')
if 'url' not in params:
raise Wp1FatalSelectionError('Missing required param: url')
if 'project' not in params:
raise Wp1FatalSelectionError('Missing required param: project')

if not isinstance(params['url'], str):
raise Wp1FatalSelectionError('Param `url` was not str')
if not isinstance(params['project'], str):
raise Wp1FatalSelectionError('Param `project` was not str')

book_name = params['url'].split('wiki/')[1]
final_url = (
'https://%s/w/api.php?'
'action=query&prop=revisions&rvprop=content&format=json&rvslots=main'
'&titles=%s' % (params['project'], book_name))

resp = requests.get(final_url, headers={'User-Agent': WP1_USER_AGENT})
try:
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
logger.exception('Error status received from Wikipedia API')
raise Wp1FatalSelectionError(
'Error status received from Wikipedia API') from e

data = resp.json()
pages = data['query']['pages']
page = list(pages.values())[0]
wikitext = page['revisions'][0]['slots']['main']['*']

parsed = mwparserfromhell.parse(wikitext)
unique = set()
titles = []
for link in parsed.filter_wikilinks():
title = link.strip('[]').replace(' ', '_')
if title not in unique:
titles.append(title)
unique.add(title)

return '\n'.join(titles).encode('utf-8')

def validate(self, **params):
if 'url' not in params:
return ('', params['url'], ['Missing URL parameter'])

if 'project' not in params:
return ('', params['url'], ['Missing project parameter'])

if params['project'] not in params['url']:
parsed_url = urllib.parse.urlparse(params['url'])
return ('', params['url'], [
'The domain of your URL does not match your '
'selected project (project is: %s, URL has: %s)' %
(params['project'], parsed_url.netloc)
])

if not validators.url(params['url']):
return ('', params['url'], ['That doesn\'t look like a valid URL.'])

return ('', '', [])
import logging
import urllib

import mwparserfromhell
import requests
import validators

from wp1.constants import WP1_USER_AGENT
from wp1.exceptions import Wp1FatalSelectionError
from wp1.selection.abstract_builder import AbstractBuilder

logger = logging.getLogger(__name__)


class Builder(AbstractBuilder):

def build(self, content_type, **params):
if content_type != 'text/tab-separated-values':
raise Wp1FatalSelectionError('Unrecognized content type')
if 'url' not in params:
raise Wp1FatalSelectionError('Missing required param: url')
if 'project' not in params:
raise Wp1FatalSelectionError('Missing required param: project')

if not isinstance(params['url'], str):
raise Wp1FatalSelectionError('Param `url` was not str')
if not isinstance(params['project'], str):
raise Wp1FatalSelectionError('Param `project` was not str')

book_name = params['url'].split('wiki/')[1]
final_url = (
'https://%s/w/api.php?'
'action=query&prop=revisions&rvprop=content&format=json&rvslots=main'
'&titles=%s' % (params['project'], book_name))

resp = requests.get(final_url, headers={'User-Agent': WP1_USER_AGENT})
try:
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
logger.exception('Error status received from Wikipedia API')
raise Wp1FatalSelectionError(
'Error status received from Wikipedia API') from e

data = resp.json()
pages = data['query']['pages']
page = list(pages.values())[0]
wikitext = page['revisions'][0]['slots']['main']['*']

parsed = mwparserfromhell.parse(wikitext)
unique = set()
titles = []
for link in parsed.filter_wikilinks():
title = link.strip('[]').replace(' ', '_')
if title not in unique:
titles.append(title)
unique.add(title)

return '\n'.join(titles).encode('utf-8')

def validate(self, **params):
if 'url' not in params:
return ('', '', ['Missing URL parameter'])

if 'project' not in params:
return ('', params['url'], ['Missing project parameter'])

if params['project'] not in params['url']:
parsed_url = urllib.parse.urlparse(params['url'])
return ('', params['url'], [
'The domain of your URL does not match your '
'selected project (project is: %s, URL has: %s)' %
(params['project'], parsed_url.netloc)
])

if not validators.url(params['url']):
return ('', params['url'], ['That doesn\'t look like a valid URL.'])

return ('', '', [])
Loading

0 comments on commit e13463c

Please sign in to comment.