From 5b089057a0baa982c11f31b521fc2b2890fcc01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Costa?= Date: Tue, 16 Mar 2021 09:19:16 +0100 Subject: [PATCH] SDC support (#48) Add basic support for updating Structured data as part of the file upload. Support in this patch is for uploading such data (as part of the make_info json output or as per-mediafile .sdc files). There is no support for the creation of the sdc data itself. Logic of SDC upload, and expected input format, is described at [pywikibot-sdc](https://github.com/lokal-profil/pywikibot-sdc). Also includes: * Adding .svg and .png as allowed file types * Bumping pywikibot (and bumping it significantly forpython >= 3.6). * Version bump --- .travis.yml | 3 --- README.md | 10 ++++++++ batchupload/uploader.py | 57 ++++++++++++++++++++++++++++++++++++----- requirements.txt | 5 ++-- setup.py | 15 ++++++----- 5 files changed, 72 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3f23447..6db63b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,9 +15,6 @@ matrix: - python: 3.9 env: - TOX_ENV=travis - - python: 3.10-dev - env: - - TOX_ENV=travis install: - pip install tox-travis script: diff --git a/README.md b/README.md index 8575844..ea2873d 100644 --- a/README.md +++ b/README.md @@ -98,3 +98,13 @@ Below follows a list of of common errors and what to do about them (when known). Smaller files can often be uploaded unchunked (slow). 3. `stashfailed: Cannot upload this file because Internet Explorer would detect it as "$1", which is a disallowed and potentially dangerous file type` No clue yet. See [T147720](https://phabricator.wikimedia.org/T147720) + +## Structured data on Commons + +Basic support for [Structured data on Commons](https://commons.wikimedia.org/wiki/Commons:Structured_data) +is offered by passing `expect_sdc` to the uploader and providing the data as +either a `.sdc` file (where `` is shared with the `.info` +text file holding the associated file description page) or under the `sdc`-key +if the data is provided as a make_info json file. + +The expected format of the data is described at [pywikibot-sdc](https://github.com/lokal-profil/pywikibot-sdc). diff --git a/batchupload/uploader.py b/batchupload/uploader.py index a58ef72..798ce8a 100644 --- a/batchupload/uploader.py +++ b/batchupload/uploader.py @@ -2,11 +2,15 @@ # -*- coding: utf-8 -*- """Tool for uploading a single or multiple files from disc or url.""" from __future__ import unicode_literals + +import os +import pywikibot +import pywikibotsdc.sdc_upload as sdc_upload +from pywikibotsdc.sdc_exception import SdcException + import batchupload.common as common import batchupload.prepUpload as prepUpload from batchupload.make_info import make_info_page -import os -import pywikibot FILE_EXTS = ('.tif', '.jpg', '.tiff', '.jpeg', '.wav', '.svg', '.png') URL_PROTOCOLS = ('http', 'https') # @todo: extend with supported protocols @@ -38,7 +42,7 @@ def allow_warnings(warning_list): return False return True - result = {'warning': None, 'error': None, 'log': ''} + result = {'warning': None, 'error': None, 'log': '', 'file_page': None} # handle warnings to ignore ignore_warnings = False @@ -95,6 +99,7 @@ def allow_warnings(warning_list): result['warning']) elif success: result['log'] = '%s: success' % file_page.title() + result['file_page'] = file_page else: result['error'] = "No warning/error but '%s' didn't upload?" % \ file_page.title() @@ -105,7 +110,8 @@ def allow_warnings(warning_list): def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, - verbose=False, test=False, target_site=None, chunked=True): + verbose=False, test=False, target_site=None, chunked=True, + expect_sdc=False): """ Upload all matched media files in the supplied directory. @@ -124,6 +130,7 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, @param target_site: pywikibot.Site to which file should be uploaded, defaults to Commons. @param chunked: Whether to do chunked uploading or not. + @param expect_sdc: set to True to also look for corresponding SDC-data. """ # set defaults unless overridden file_exts = file_exts or FILE_EXTS @@ -156,12 +163,19 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, break # verify that there is a matching info file info_file = '%s.info' % os.path.splitext(f)[0] + sdc_file = '%s.sdc' % os.path.splitext(f)[0] base_name = os.path.basename(f) base_info_name = os.path.basename(info_file) + base_sdc_name = os.path.basename(sdc_file) if not os.path.exists(info_file): flog.write_w_timestamp( '{0}: Found multimedia file without info'.format(base_name)) continue + if expect_sdc and not os.path.exists(sdc_file): + flog.write_w_timestamp( + '{0}: Found multimedia file missing the expected ' + 'sdc data'.format(base_name)) + continue # prepare upload txt = common.open_and_read_file(info_file) @@ -175,6 +189,14 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, result = upload_single_file(base_name, f, txt, target_site, upload_if_badprefix=True, chunked=chunked) + if expect_sdc and result['file_page']: + sdc_data = common.open_and_read_file(sdc_file, as_json=True) + try: + sdc_upload.upload_single_sdc_data( + result['file_page'], sdc_data) + except SdcException as e: + result[e.level] = e.data + result['log'] += '\n\t{}'.format(e.log) target_dir = None if result.get('error'): @@ -189,6 +211,8 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, flog.write_w_timestamp(result.get('log')) os.rename(f, os.path.join(target_dir, base_name)) os.rename(info_file, os.path.join(target_dir, base_info_name)) + if expect_sdc: + os.rename(sdc_file, os.path.join(target_dir, base_sdc_name)) counter += 1 pywikibot.output(flog.close_and_confirm()) @@ -196,7 +220,8 @@ def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, def up_all_from_url(info_path, cutoff=None, target='upload_logs', file_exts=None, verbose=False, test=False, - target_site=None, only=None, skip=None): + target_site=None, only=None, skip=None, + expect_sdc=False): """ Upload all media files provided as urls in a make_info json file. @@ -213,6 +238,7 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs', defaults to Commons. @param only: list of urls to upload, if provided all others will be skipped @param skip: list of urls to skip, all others will be uploaded + @param expect_sdc: set to True to also look for corresponding SDC-data. """ # set defaults unless overridden file_exts = file_exts or FILE_EXTS @@ -271,6 +297,11 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs', '{url}: Found url missing the output filename'.format( url=url)) continue + elif expect_sdc and not data['sdc']: + flog.write_w_timestamp( + '{url}: Found url missing the expected sdc data'.format( + url=url)) + continue # prepare upload txt = make_info_page(data) @@ -287,6 +318,13 @@ def up_all_from_url(info_path, cutoff=None, target='upload_logs', result = upload_single_file( filename, url, txt, target_site, upload_if_badprefix=True) + if expect_sdc and result['file_page']: + try: + sdc_upload.upload_single_sdc_data( + result['file_page'], data['sdc']) + except SdcException as e: + result[e.level] = e.data + result['log'] += '\n\t{}'.format(e.log) if result.get('error'): logs['error'].write(url) elif result.get('warning'): @@ -359,6 +397,8 @@ def main(*args): '(optional)\n' '\t-nochunk Whether to turn off chunked uploading, this is slow ' 'and does not support files > 100Mb (optional, type:FILES only)\n' + '\t-expect_sdc Whether to expect corresponding Structured Data files ' + 'or entries in the make_info output file.' '\t-only:PATH to file containing list of urls to upload, skipping all ' 'others. One entry per line. (optional, type:URL only)\n' '\t-skip:PATH to file containing list of urls to skip, uploading all ' @@ -371,6 +411,7 @@ def main(*args): cutoff = None in_path = None test = False + expect_sdc = False confirm = False chunked = True typ = 'files' @@ -387,6 +428,8 @@ def main(*args): in_path = value elif option == '-test': test = True + elif option == '-expect_sdc': + expect_sdc = True elif option == '-confirm': confirm = True elif option == '-nochunk': @@ -410,10 +453,10 @@ def main(*args): if in_path: if typ == 'files': up_all(in_path, cutoff=cutoff, test=test, verbose=confirm, - chunked=chunked) + chunked=chunked, expect_sdc=expect_sdc) elif typ == 'url': up_all_from_url(in_path, cutoff=cutoff, only=only, skip=skip, - test=test, verbose=confirm) + test=test, verbose=confirm, expect_sdc=expect_sdc) else: pywikibot.output(usage) diff --git a/requirements.txt b/requirements.txt index 3daa90a..8722fc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ future mwparserfromhell setuptools>50.0.0; python_version >= '3.6' -pywikibot==5.4.0; python_version >= '3.6' -pywikibot==3.0.20200703; python_version< '3.6' +pywikibot==5.5.0; python_version >= '3.6' +pywikibot==3.0.20200703; python_version < '3.6' +git+https://github.com/lokal-profil/pywikibot-sdc.git@0.1.0 diff --git a/setup.py b/setup.py index 5ea0f11..c0eb93f 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals from setuptools import setup -version = '0.2.5' +version = '0.3.1' repo = 'BatchUploadTools' setup( @@ -11,8 +11,9 @@ 'future', 'mwparserfromhell', 'setuptools>50.0.0; python_version >= "3.6"', - 'pywikibot==5.4.0; python_version >= "3.6"', - 'pywikibot==3.0.20200703; python_version < "3.6"' + 'pywikibot==5.5.0; python_version >= "3.6"', + 'pywikibot==3.0.20200703; python_version < "3.6"', + 'pywikibot-sdc @ https://api.github.com/repos/lokal-profil/pywikibot-sdc/tarball/0.1.0' ], version=version, description='Framework for mass-importing images to Wikimedia Commons.', @@ -22,8 +23,10 @@ download_url='https://github.com/lokal-profil/' + repo + '/tarball/' + version, keywords=['Wikimedia Commons', 'Wikimedia', 'Commons', 'pywikibot', 'API'], classifiers=[ - 'Programming Language :: Python :: 2.7' - 'Programming Language :: Python :: 3.6' - 'Programming Language :: Python :: 3.7' + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9' ], )