Skip to content

Commit

Permalink
SDC support (#48)
Browse files Browse the repository at this point in the history
Add basic support for updating Structured data as part of the file upload.

Support in this patch is for uploading such data (as part of the
make_info json output or as per-mediafile .sdc files). There is no support
for the creation of the sdc data itself.

Logic of SDC upload, and expected input format, is described at [pywikibot-sdc](https://github.com/lokal-profil/pywikibot-sdc).

Also includes:
* Adding .svg and .png as allowed file types
* Bumping pywikibot (and bumping it significantly forpython >= 3.6).
* Version bump
  • Loading branch information
lokal-profil authored Mar 16, 2021
1 parent 9f85d9a commit 5b08905
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 18 deletions.
3 changes: 0 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ matrix:
- python: 3.9
env:
- TOX_ENV=travis
- python: 3.10-dev
env:
- TOX_ENV=travis
install:
- pip install tox-travis
script:
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,13 @@ Below follows a list of of common errors and what to do about them (when known).
Smaller files can often be uploaded unchunked (slow).
3. `stashfailed: Cannot upload this file because Internet Explorer would detect it as "$1", which is a disallowed and potentially dangerous file type`
No clue yet. See [T147720](https://phabricator.wikimedia.org/T147720)

## Structured data on Commons

Basic support for [Structured data on Commons](https://commons.wikimedia.org/wiki/Commons:Structured_data)
is offered by passing `expect_sdc` to the uploader and providing the data as
either a `<basename>.sdc` file (where `<basename>` is shared with the `.info`
text file holding the associated file description page) or under the `sdc`-key
if the data is provided as a make_info json file.

The expected format of the data is described at [pywikibot-sdc](https://github.com/lokal-profil/pywikibot-sdc).
57 changes: 50 additions & 7 deletions batchupload/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
# -*- coding: utf-8 -*-
"""Tool for uploading a single or multiple files from disc or url."""
from __future__ import unicode_literals

import os
import pywikibot
import pywikibotsdc.sdc_upload as sdc_upload
from pywikibotsdc.sdc_exception import SdcException

import batchupload.common as common
import batchupload.prepUpload as prepUpload
from batchupload.make_info import make_info_page
import os
import pywikibot

FILE_EXTS = ('.tif', '.jpg', '.tiff', '.jpeg', '.wav', '.svg', '.png')
URL_PROTOCOLS = ('http', 'https') # @todo: extend with supported protocols
Expand Down Expand Up @@ -38,7 +42,7 @@ def allow_warnings(warning_list):
return False
return True

result = {'warning': None, 'error': None, 'log': ''}
result = {'warning': None, 'error': None, 'log': '', 'file_page': None}

# handle warnings to ignore
ignore_warnings = False
Expand Down Expand Up @@ -95,6 +99,7 @@ def allow_warnings(warning_list):
result['warning'])
elif success:
result['log'] = '%s: success' % file_page.title()
result['file_page'] = file_page
else:
result['error'] = "No warning/error but '%s' didn't upload?" % \
file_page.title()
Expand All @@ -105,7 +110,8 @@ def allow_warnings(warning_list):


def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None,
verbose=False, test=False, target_site=None, chunked=True):
verbose=False, test=False, target_site=None, chunked=True,
expect_sdc=False):
"""
Upload all matched media files in the supplied directory.
Expand All @@ -124,6 +130,7 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None,
@param target_site: pywikibot.Site to which file should be uploaded,
defaults to Commons.
@param chunked: Whether to do chunked uploading or not.
@param expect_sdc: set to True to also look for corresponding SDC-data.
"""
# set defaults unless overridden
file_exts = file_exts or FILE_EXTS
Expand Down Expand Up @@ -156,12 +163,19 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None,
break
# verify that there is a matching info file
info_file = '%s.info' % os.path.splitext(f)[0]
sdc_file = '%s.sdc' % os.path.splitext(f)[0]
base_name = os.path.basename(f)
base_info_name = os.path.basename(info_file)
base_sdc_name = os.path.basename(sdc_file)
if not os.path.exists(info_file):
flog.write_w_timestamp(
'{0}: Found multimedia file without info'.format(base_name))
continue
if expect_sdc and not os.path.exists(sdc_file):
flog.write_w_timestamp(
'{0}: Found multimedia file missing the expected '
'sdc data'.format(base_name))
continue

# prepare upload
txt = common.open_and_read_file(info_file)
Expand All @@ -175,6 +189,14 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None,

result = upload_single_file(base_name, f, txt, target_site,
upload_if_badprefix=True, chunked=chunked)
if expect_sdc and result['file_page']:
sdc_data = common.open_and_read_file(sdc_file, as_json=True)
try:
sdc_upload.upload_single_sdc_data(
result['file_page'], sdc_data)
except SdcException as e:
result[e.level] = e.data
result['log'] += '\n\t{}'.format(e.log)

target_dir = None
if result.get('error'):
Expand All @@ -189,14 +211,17 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None,
flog.write_w_timestamp(result.get('log'))
os.rename(f, os.path.join(target_dir, base_name))
os.rename(info_file, os.path.join(target_dir, base_info_name))
if expect_sdc:
os.rename(sdc_file, os.path.join(target_dir, base_sdc_name))
counter += 1

pywikibot.output(flog.close_and_confirm())


def up_all_from_url(info_path, cutoff=None, target='upload_logs',
file_exts=None, verbose=False, test=False,
target_site=None, only=None, skip=None):
target_site=None, only=None, skip=None,
expect_sdc=False):
"""
Upload all media files provided as urls in a make_info json file.
Expand All @@ -213,6 +238,7 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs',
defaults to Commons.
@param only: list of urls to upload, if provided all others will be skipped
@param skip: list of urls to skip, all others will be uploaded
@param expect_sdc: set to True to also look for corresponding SDC-data.
"""
# set defaults unless overridden
file_exts = file_exts or FILE_EXTS
Expand Down Expand Up @@ -271,6 +297,11 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs',
'{url}: Found url missing the output filename'.format(
url=url))
continue
elif expect_sdc and not data['sdc']:
flog.write_w_timestamp(
'{url}: Found url missing the expected sdc data'.format(
url=url))
continue

# prepare upload
txt = make_info_page(data)
Expand All @@ -287,6 +318,13 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs',

result = upload_single_file(
filename, url, txt, target_site, upload_if_badprefix=True)
if expect_sdc and result['file_page']:
try:
sdc_upload.upload_single_sdc_data(
result['file_page'], data['sdc'])
except SdcException as e:
result[e.level] = e.data
result['log'] += '\n\t{}'.format(e.log)
if result.get('error'):
logs['error'].write(url)
elif result.get('warning'):
Expand Down Expand Up @@ -359,6 +397,8 @@ def main(*args):
'(optional)\n'
'\t-nochunk Whether to turn off chunked uploading, this is slow '
'and does not support files > 100Mb (optional, type:FILES only)\n'
'\t-expect_sdc Whether to expect corresponding Structured Data files '
'or entries in the make_info output file.'
'\t-only:PATH to file containing list of urls to upload, skipping all '
'others. One entry per line. (optional, type:URL only)\n'
'\t-skip:PATH to file containing list of urls to skip, uploading all '
Expand All @@ -371,6 +411,7 @@ def main(*args):
cutoff = None
in_path = None
test = False
expect_sdc = False
confirm = False
chunked = True
typ = 'files'
Expand All @@ -387,6 +428,8 @@ def main(*args):
in_path = value
elif option == '-test':
test = True
elif option == '-expect_sdc':
expect_sdc = True
elif option == '-confirm':
confirm = True
elif option == '-nochunk':
Expand All @@ -410,10 +453,10 @@ def main(*args):
if in_path:
if typ == 'files':
up_all(in_path, cutoff=cutoff, test=test, verbose=confirm,
chunked=chunked)
chunked=chunked, expect_sdc=expect_sdc)
elif typ == 'url':
up_all_from_url(in_path, cutoff=cutoff, only=only, skip=skip,
test=test, verbose=confirm)
test=test, verbose=confirm, expect_sdc=expect_sdc)
else:
pywikibot.output(usage)

Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
future
mwparserfromhell
setuptools>50.0.0; python_version >= '3.6'
pywikibot==5.4.0; python_version >= '3.6'
pywikibot==3.0.20200703; python_version< '3.6'
pywikibot==5.5.0; python_version >= '3.6'
pywikibot==3.0.20200703; python_version < '3.6'
git+https://github.com/lokal-profil/[email protected]
15 changes: 9 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from setuptools import setup
version = '0.2.5'
version = '0.3.1'
repo = 'BatchUploadTools'

setup(
Expand All @@ -11,8 +11,9 @@
'future',
'mwparserfromhell',
'setuptools>50.0.0; python_version >= "3.6"',
'pywikibot==5.4.0; python_version >= "3.6"',
'pywikibot==3.0.20200703; python_version < "3.6"'
'pywikibot==5.5.0; python_version >= "3.6"',
'pywikibot==3.0.20200703; python_version < "3.6"',
'pywikibot-sdc @ https://api.github.com/repos/lokal-profil/pywikibot-sdc/tarball/0.1.0'
],
version=version,
description='Framework for mass-importing images to Wikimedia Commons.',
Expand All @@ -22,8 +23,10 @@
download_url='https://github.com/lokal-profil/' + repo + '/tarball/' + version,
keywords=['Wikimedia Commons', 'Wikimedia', 'Commons', 'pywikibot', 'API'],
classifiers=[
'Programming Language :: Python :: 2.7'
'Programming Language :: Python :: 3.6'
'Programming Language :: Python :: 3.7'
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9'
],
)

0 comments on commit 5b08905

Please sign in to comment.