Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow users to create selections from Wikipedia books #645

Merged
merged 6 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions wp1-frontend/src/components/BookBuilder.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<template>
<BaseBuilder
:key="$route.path"
:listName="'Book Selection'"
:model="'wp1.selection.models.book'"
:params="params"
:builderId="$route.params.builder_id"
:invalidItems="invalidItems"
@onBuilderLoaded="onBuilderLoaded"
@onBeforeSubmit="onBeforeSubmit"
@onValidationError="onValidationError"
>
<template #create-desc>
<p>
Use this tool to create an article selection list for the Wikipedia
project of your choice, based off a Wikipedia Book that you already
created. You must first "save" your book, then enter the URL of the
saved book. Your selection will be saved in public cloud storage and can
be accessed through URLs that will be provided once it has been saved.
</p>
<p class="mb-0">
For more information on creating a Book selection, see the
<a href="https://wp1.readthedocs.io/en/latest/user/selections/"
>end user documentation</a
>
</p>
</template>
<template #extra-params>
<div id="items" class="form-group m-4">
<label for="items">URL</label>
<input
id="bookUrl"
ref="bookUrl"
class="form-control my-2"
v-model="params.url"
/>
<div class="invalid-feedback">Please provide a valid URL</div>
</div>
</template>
</BaseBuilder>
</template>

<script>
import BaseBuilder from './BaseBuilder.vue';

export default {
components: { BaseBuilder },
name: 'BookBuilder',
data: function () {
return {
url: '',
invalidItems: '',
params: {},
};
},
methods: {
validationOnBlur: function (event) {
if (event.target.value) {
event.target.classList.remove('is-invalid');
} else {
event.target.classList.add('is-invalid');
}
},
onBuilderLoaded: function (builder) {
this.params = builder.params;
},
onBeforeSubmit: function () {
this.$refs.bookUrl.setCustomValidity('');
},
onValidationError: function () {
this.$refs.bookUrl.setCustomValidity('URL not valid');
},
},
};
</script>

<style scoped></style>
10 changes: 10 additions & 0 deletions wp1-frontend/src/components/SecondaryNav.vue
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@
>Petscan Selection</router-link
>
</li>
<li
:class="
'nav-item ' +
(this.$route.path.startsWith('/selections/book') ? 'active' : '')
"
>
<router-link class="nav-link" to="/selections/book"
>Book Selection</router-link
>
</li>
</ul>
</div>
</nav>
Expand Down
15 changes: 15 additions & 0 deletions wp1-frontend/src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import VueRouter from 'vue-router';

import App from './App.vue';
import ArticlePage from './components/ArticlePage.vue';
import BookBuilder from './components/BookBuilder.vue';
import PetscanBuilder from './components/PetscanBuilder.vue';
import SimpleBuilder from './components/SimpleBuilder.vue';
import SparqlBuilder from './components/SparqlBuilder.vue';
Expand Down Expand Up @@ -118,6 +119,13 @@ const routes = [
title: () => BASE_TITLE + ' - Create Petscan Selection',
},
},
{
path: '/selections/book',
component: BookBuilder,
meta: {
title: () => BASE_TITLE + ' - Create Book Selection',
},
},
{
path: '/selections/simple/:builder_id',
component: SimpleBuilder,
Expand All @@ -139,6 +147,13 @@ const routes = [
title: () => BASE_TITLE + ' - Edit Petscan Selection',
},
},
{
path: '/selections/book/:builder_id',
component: BookBuilder,
meta: {
title: () => BASE_TITLE + ' - Edit Book Selection',
},
},
{
path: '/selections/:builder_id/zim',
component: ZimFile,
Expand Down
83 changes: 83 additions & 0 deletions wp1/selection/models/book.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import logging
import urllib

import mwparserfromhell
import requests
import validators

from wp1.constants import WP1_USER_AGENT
from wp1.exceptions import Wp1FatalSelectionError
from wp1.selection.abstract_builder import AbstractBuilder

logger = logging.getLogger(__name__)


class Builder(AbstractBuilder):

def build(self, content_type, **params):
if content_type != 'text/tab-separated-values':
raise Wp1FatalSelectionError('Unrecognized content type')
if 'url' not in params:
raise Wp1FatalSelectionError('Missing required param: url')
if 'project' not in params:
raise Wp1FatalSelectionError('Missing required param: project')

if not isinstance(params['url'], str):
raise Wp1FatalSelectionError('Param `url` was not str')
if not isinstance(params['project'], str):
raise Wp1FatalSelectionError('Param `project` was not str')

book_name = params['url'].split('wiki/')[1]
audiodude marked this conversation as resolved.
Show resolved Hide resolved
final_url = (
'https://%s/w/api.php?'
'action=query&prop=revisions&rvprop=content&format=json&rvslots=main'
'&titles=%s' % (params['project'], book_name))

resp = requests.get(final_url, headers={'User-Agent': WP1_USER_AGENT})
try:
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
logger.exception('Error status received from Wikipedia API')
raise Wp1FatalSelectionError(
'Error status received from Wikipedia API') from e

data = resp.json()
pages = data['query']['pages']
page = list(pages.values())[0]
wikitext = page['revisions'][0]['slots']['main']['*']

parsed = mwparserfromhell.parse(wikitext)
unique = set()
titles = []
for link in parsed.filter_wikilinks():
title = link.strip('[]').replace(' ', '_')
if title not in unique:
audiodude marked this conversation as resolved.
Show resolved Hide resolved
titles.append(title)
unique.add(title)

return '\n'.join(titles).encode('utf-8')

def validate(self, **params):
if 'url' not in params:
return ('', '', ['Missing URL parameter'])

if 'project' not in params:
return ('', params['url'], ['Missing project parameter'])

url = params['url']

if params['project'] not in url:
parsed_url = urllib.parse.urlparse(url)
return ('', url, [
'The domain of your URL does not match your '
'selected project (project is: %s, URL has: %s)' %
(params['project'], parsed_url.netloc)
])

if not validators.url(url):
return ('', url, ['That doesn\'t look like a valid URL.'])

if 'wiki/' not in url:
return ('', url, ['Valid book urls include /wiki/.'])

return ('', '', [])
168 changes: 168 additions & 0 deletions wp1/selection/models/book_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from unittest.mock import MagicMock, patch

from wp1.base_db_test import BaseWpOneDbTest, get_first_selection
from wp1.exceptions import Wp1FatalSelectionError
from wp1.models.wp10.builder import Builder
from wp1.selection.models.book import Builder as BookBuilder

import requests


class BookBuilderTest(BaseWpOneDbTest):
mock_book_response = {
'batchcomplete': "",
'query': {
'pages': {
74370036: {
'pageid':
74370036,
'ns':
2,
'title':
"User:Audiodude/Books/test",
'revisions': [{
'slots': {
'main': {
'contentmodel':
"wikitext",
'contentformat':
"text/x-wiki",
'*':
"""{{saved book
|title=
|subtitle=
|cover-image=
|cover-color=}}

:[[Katrina Kaif]]
:[[Hindi]]
:[[Kaizad Gustad]]
:[[List of awards and nominations received by Katrina Kaif]]
:[[Screen Awards]]
:[[Zee Cine Awards]]
:[[Filmfare Awards]]
:[[Katrina Kaif]]
:[[Hindi]]
:[[Kaizad Gustad]]
:[[List of awards and nominations received by Katrina Kaif]]
:[[Screen Awards]]
:[[Zee Cine Awards]]
:[[Filmfare Awards]]
:[[John Smith (explorer)]]"""
}
}
}]
}
}
}
}

def setUp(self):
super().setUp()
self.s3 = MagicMock()
self.builder_model = Builder(
b_id=b'1a-2b-3c-4d',
b_name=b'Book Builder',
b_user_id=1234,
b_project=b'en.wikipedia.fake',
b_model=b'wp1.selection.models.book',
b_params=
'{"url":"https://en.wikipedia.fake/wiki/User:Audiodude/Books/test"}')
self.builder = BookBuilder()

@patch('wp1.selection.models.book.requests')
def test_materialize(self, mock_requests):
mock_response = MagicMock()
mock_response.json.return_value = self.mock_book_response
mock_requests.get.return_value = mock_response

self.builder.materialize(self.s3, self.wp10db, self.builder_model,
'text/tab-separated-values', 1)
actual = get_first_selection(self.wp10db)
self.assertEqual(actual.s_content_type, b'text/tab-separated-values')
self.assertEqual(actual.s_builder_id, b'1a-2b-3c-4d')

@patch('wp1.selection.models.book.requests')
def test_build(self, mock_requests):
mock_response = MagicMock()
mock_response.json.return_value = self.mock_book_response
mock_requests.get.return_value = mock_response

actual = self.builder.build(
'text/tab-separated-values',
url='https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',
project='en.wikipedia.fake')
self.assertEqual(
b'Katrina_Kaif\nHindi\nKaizad_Gustad\n'
b'List_of_awards_and_nominations_received_by_Katrina_Kaif\nScreen_Awards\n'
b'Zee_Cine_Awards\nFilmfare_Awards\nJohn_Smith_(explorer)', actual)

def test_build_wrong_content_type(self):
with self.assertRaises(Wp1FatalSelectionError):
actual = self.builder.build(
None,
url='https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',
project='en.wikipedia.fake')

def test_build_missing_url(self):
with self.assertRaises(Wp1FatalSelectionError):
actual = self.builder.build('text/tab-separated-values')

def test_build_url_not_str(self):
with self.assertRaises(Wp1FatalSelectionError):
actual = self.builder.build(
'text/tab-separated-values',
url=['https://en.wikipedia.fake/wiki/User:Audiodude/Books/test'],
project='en.wikipedia.fake')

@patch('wp1.selection.models.book.requests')
def test_build_proper_api_call(self, mock_requests):
mock_response = MagicMock()
mock_response.json.return_value = self.mock_book_response
mock_requests.get.return_value = mock_response

actual = self.builder.build(
'text/tab-separated-values',
url='https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',
project='en.wikipedia.fake')
mock_requests.get.assert_called_with(
'https://en.wikipedia.fake/w/api.php?'
'action=query&prop=revisions&rvprop=content&format=json&rvslots=main'
'&titles=User:Audiodude/Books/test',
headers={
'User-Agent': 'WP 1.0 bot 1.0.0/Audiodude <[email protected]>'
})

@patch('wp1.selection.models.book.requests.get')
def test_build_non_200(self, mock_requests_get):
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError
mock_requests_get.return_value = mock_response

with self.assertRaises(Wp1FatalSelectionError):
actual = self.builder.build(
'text/tab-separated-values',
url='https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',
project='en.wikipedia.fake')

def test_validate_missing_url(self):
actual = self.builder.validate('text/tab-separated-values',
project='en.wikipedia.fake')
self.assertEquals(('', '', ['Missing URL parameter']), actual)

def test_validate_missing_url(self):
actual = self.builder.validate(
url='https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',)
self.assertEqual(
('', 'https://en.wikipedia.fake/wiki/User:Audiodude/Books/test',
['Missing project parameter']), actual)

def test_validate_project_mismatch(self):
actual = self.builder.validate(
url='https://fr.wikipedia.fake/wiki/User:Audiodude/Books/test',
project='en.wikipedia.fake')
self.assertEqual(
('', 'https://fr.wikipedia.fake/wiki/User:Audiodude/Books/test', [
'The domain of your URL does not match your '
'selected project (project is: en.wikipedia.fake, URL has: fr.wikipedia.fake)'
]), actual)