Skip to content

Commit

Permalink
Merge pull request #446 from Wikidata/dev
Browse files Browse the repository at this point in the history
Low-level claims deletion & maintenance
  • Loading branch information
marfox authored Oct 15, 2021
2 parents 461be60 + c547ed6 commit 7d04642
Show file tree
Hide file tree
Showing 13 changed files with 67 additions and 82 deletions.
2 changes: 1 addition & 1 deletion .conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,5 @@ dependencies:
- pip:
- mlens==0.2.3
- mwparserfromhell==0.6.3
- pywikibot==6.5.0
- pywikibot==6.6.1
prefix: /srv/miniconda3/envs/soweego
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ repos:
rev: 5.9.3
hooks:
- id: isort
args: [--skip=soweego/cli.py]
# See https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html
args: [--skip=soweego/cli.py, --multi-line=5, --line-length=88]
- repo: https://github.com/myint/autoflake
rev: v1.4
hooks:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# soweego: link Wikidata to large catalogs
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/Wikidata/soweego/master.svg)](https://results.pre-commit.ci/latest/github/Wikidata/soweego/master)
[![Documentation Status](https://readthedocs.org/projects/soweego/badge/?version=latest)](https://soweego.readthedocs.io/en/latest/?badge=latest)
[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
[![License](https://img.shields.io/github/license/Wikidata/soweego.svg)](https://www.gnu.org/licenses/gpl-3.0.html)
Expand Down
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
soweego: link Wikidata to large catalogs
========================================

.. image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white
:target: https://github.com/pre-commit/pre-commit
:alt: pre-commit enabled
.. image:: https://results.pre-commit.ci/badge/github/Wikidata/soweego/master.svg
:target: https://results.pre-commit.ci/latest/github/Wikidata/soweego/master
:alt: pre-commit CI status

.. image:: https://readthedocs.org/projects/soweego/badge/?version=latest
:target: https://soweego.readthedocs.io/en/latest/?badge=latest
Expand Down
55 changes: 40 additions & 15 deletions scripts/delete_claims.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
The required input comes from a SPARQL query like:
SELECT DISTINCT ?stmt WHERE {
?item p:P6262 ?stmt .
?stmt prov:wasDerivedFrom ?ref .
?item p:P3192 ?stmt .
?stmt ps:P3192 ?val ;
prov:wasDerivedFrom ?ref .
?ref pr:P887 wd:Q1266546 ;
pr:P248 wd:Q14005 .
FILTER (CONTAINS(?val, " ")) .
}
Just replace the PID in `p:P6262` (Fandom article ID) with the relevant one,
and the QID in `wd:Q14005` (MusicBrainz) with the target catalog.
Just replace the ``P3192`` (Last FM ID) PID with the relevant one,
and the QID in ``wd:Q14005`` (MusicBrainz) with the target catalog.
The ``FILTER`` clause is optional: you can use it to look for specific values,
see https://www.wikidata.org/wiki/Wikidata:SPARQL_tutorial#FILTER
N.B.: we look for (based on heuristic, record linkage), (stated in, catalog)
references
Expand All @@ -24,7 +28,9 @@
__license__ = 'GPL-3.0'
__copyright__ = 'Copyleft 2021, Hjfocs'

import json
import sys
from time import sleep

import requests

Expand All @@ -33,11 +39,16 @@


def main(args):
if len(args) != 3:
print(f'Usage: python {__file__} GUIDS_CSV EDIT_SUMMARY')
if len(args) != 4:
print(f'Usage: python {__file__} GUIDS_CSV LOGIN_CREDENTIALS_JSON EDIT_SUMMARY')
return 1

file_in, summary = args[1], args[2]
file_in, creds_in, summary = args[1:]

with open(creds_in) as fin:
creds = json.load(fin)
user = creds['WIKIDATA_API_USER']
pw = creds['WIKIDATA_API_PASSWORD']
guids = set()

with open(file_in) as fin:
Expand All @@ -50,27 +61,41 @@ def main(args):

session = requests.Session()

# Get login token
params = {'action': 'query', 'meta': 'tokens', 'type': 'login', 'format': 'json'}
resp = session.get(WIKIDATA_API_URL, params=params)
login_token = resp.json()['query']['tokens']['logintoken']

# Log in
data = {'action': 'login', 'lgname': user, 'lgpassword': pw, 'lgtoken': login_token, 'format': 'json'}
resp = session.post(WIKIDATA_API_URL, data=data)

# Get edit token
params = {'action': 'query', 'meta': 'tokens', 'format': 'json'}
r = session.get(WIKIDATA_API_URL, params=params)
token = r.json()['query']['tokens']['csrftoken']
resp = session.get(WIKIDATA_API_URL, params=params)
edit_token = resp.json()['query']['tokens']['csrftoken']

# Fire a POST for each GUID
for guid in guids:
data = {
'action': 'wbremoveclaims',
'format': 'json',
'token': token,
'token': edit_token,
'bot': True,
'claim': guid,
'summary': summary,
'format': 'json'
}
r = session.post(WIKIDATA_API_URL, data=data)
resp = session.post(WIKIDATA_API_URL, data=data)
if resp.ok:
print(resp.json())

sleep(9)

if r.ok:
print(r.json())
# Log out
data = {'action': 'logout', 'token': edit_token, 'format': 'json'}
resp = session.post(WIKIDATA_API_URL, data=data)

return 0
return 0


if __name__ == '__main__':
Expand Down
30 changes: 8 additions & 22 deletions soweego/commons/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,18 @@
from soweego.importer.models.base_link_entity import BaseLinkEntity
from soweego.importer.models.base_nlp_entity import BaseNlpEntity
from soweego.importer.models.discogs_entity import (
DiscogsGroupEntity,
DiscogsGroupLinkEntity,
DiscogsGroupNlpEntity,
DiscogsMasterArtistRelationship,
DiscogsMasterEntity,
DiscogsMusicianEntity,
DiscogsMusicianLinkEntity,
DiscogsMusicianNlpEntity,
DiscogsGroupEntity, DiscogsGroupLinkEntity, DiscogsGroupNlpEntity,
DiscogsMasterArtistRelationship, DiscogsMasterEntity, DiscogsMusicianEntity,
DiscogsMusicianLinkEntity, DiscogsMusicianNlpEntity
)
from soweego.importer.models.imdb_entity import (
IMDbActorEntity,
IMDbDirectorEntity,
IMDbMusicianEntity,
IMDbProducerEntity,
IMDbTitleEntity,
IMDbTitleNameRelationship,
IMDbWriterEntity,
IMDbActorEntity, IMDbDirectorEntity, IMDbMusicianEntity, IMDbProducerEntity,
IMDbTitleEntity, IMDbTitleNameRelationship, IMDbWriterEntity
)
from soweego.importer.models.musicbrainz_entity import (
MusicBrainzArtistEntity,
MusicBrainzArtistLinkEntity,
MusicBrainzBandEntity,
MusicBrainzBandLinkEntity,
MusicBrainzReleaseGroupArtistRelationship,
MusicBrainzReleaseGroupEntity,
MusicBrainzReleaseGroupLinkEntity,
MusicBrainzArtistEntity, MusicBrainzArtistLinkEntity, MusicBrainzBandEntity,
MusicBrainzBandLinkEntity, MusicBrainzReleaseGroupArtistRelationship,
MusicBrainzReleaseGroupEntity, MusicBrainzReleaseGroupLinkEntity
)
from soweego.wikidata import vocabulary

Expand Down
12 changes: 3 additions & 9 deletions soweego/importer/discogs_dump_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,9 @@
from soweego.importer.base_dump_extractor import BaseDumpExtractor
from soweego.importer.models.base_link_entity import BaseLinkEntity
from soweego.importer.models.discogs_entity import (
DiscogsArtistEntity,
DiscogsGroupEntity,
DiscogsGroupLinkEntity,
DiscogsGroupNlpEntity,
DiscogsMasterArtistRelationship,
DiscogsMasterEntity,
DiscogsMusicianEntity,
DiscogsMusicianLinkEntity,
DiscogsMusicianNlpEntity,
DiscogsArtistEntity, DiscogsGroupEntity, DiscogsGroupLinkEntity,
DiscogsGroupNlpEntity, DiscogsMasterArtistRelationship, DiscogsMasterEntity,
DiscogsMusicianEntity, DiscogsMusicianLinkEntity, DiscogsMusicianNlpEntity
)

LOGGER = logging.getLogger(__name__)
Expand Down
4 changes: 1 addition & 3 deletions soweego/importer/models/base_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@

from sqlalchemy import Column, Date, Index, Integer, String, Text
from sqlalchemy.ext.declarative import (
AbstractConcreteBase,
declarative_base,
declared_attr,
AbstractConcreteBase, declarative_base, declared_attr
)

BASE = declarative_base()
Expand Down
4 changes: 1 addition & 3 deletions soweego/importer/models/base_link_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@

from sqlalchemy import Boolean, Column, Index, Integer, String, Text
from sqlalchemy.ext.declarative import (
AbstractConcreteBase,
declarative_base,
declared_attr,
AbstractConcreteBase, declarative_base, declared_attr
)

BASE = declarative_base()
Expand Down
4 changes: 1 addition & 3 deletions soweego/importer/models/base_nlp_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@

from sqlalchemy import Column, Index, Integer, String, Text
from sqlalchemy.ext.declarative import (
AbstractConcreteBase,
declarative_base,
declared_attr,
AbstractConcreteBase, declarative_base, declared_attr
)

BASE = declarative_base()
Expand Down
12 changes: 4 additions & 8 deletions soweego/importer/musicbrainz_dump_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,10 @@
from soweego.importer.base_dump_extractor import BaseDumpExtractor
from soweego.importer.models.base_entity import BaseEntity
from soweego.importer.models.musicbrainz_entity import (
MusicBrainzArtistBandRelationship,
MusicBrainzArtistEntity,
MusicBrainzArtistLinkEntity,
MusicBrainzBandEntity,
MusicBrainzBandLinkEntity,
MusicBrainzReleaseGroupArtistRelationship,
MusicBrainzReleaseGroupEntity,
MusicBrainzReleaseGroupLinkEntity,
MusicBrainzArtistBandRelationship, MusicBrainzArtistEntity,
MusicBrainzArtistLinkEntity, MusicBrainzBandEntity, MusicBrainzBandLinkEntity,
MusicBrainzReleaseGroupArtistRelationship, MusicBrainzReleaseGroupEntity,
MusicBrainzReleaseGroupLinkEntity
)
from soweego.wikidata.sparql_queries import external_id_pids_and_urls

Expand Down
7 changes: 1 addition & 6 deletions soweego/linker/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@
from tqdm import tqdm

from soweego.commons import (
constants,
data_gathering,
keys,
target_database,
text_utils,
url_utils,
constants, data_gathering, keys, target_database, text_utils, url_utils
)
from soweego.commons.utils import count_num_lines_in_file
from soweego.importer.models.base_entity import BaseEntity
Expand Down
8 changes: 1 addition & 7 deletions soweego/linker/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,7 @@
from sqlalchemy.orm import Query

from soweego.commons import (
constants,
data_gathering,
keys,
target_database,
text_utils,
url_utils,
utils,
constants, data_gathering, keys, target_database, text_utils, url_utils, utils
)
from soweego.commons.db_manager import DBManager
from soweego.commons.logging import log_dataframe_info
Expand Down

0 comments on commit 7d04642

Please sign in to comment.