Skip to content

Commit

Permalink
Merge branch 'hotfix/24.09.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
mfraezz committed Nov 14, 2024
2 parents b5e2794 + a57467f commit 723d4bd
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 58 deletions.
48 changes: 18 additions & 30 deletions osf/management/commands/recatalog_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,6 @@ def _recatalog_all(queryset, chunk_size):
recatalog(queryset, start_id=0, chunk_count=int(9e9), chunk_size=chunk_size)


def _recatalog_datacite_custom_types(chunk_size):
logger.info('recataloguing items with datacite custom type...')
# all preprints
_recatalog_all(Preprint.objects, chunk_size)
# objects with custom resource_type_general
for _model in {Registration, Node, OsfStorageFile}:
_queryset = (
_model.objects
.exclude(guids__metadata_record__isnull=True)
.exclude(guids__metadata_record__resource_type_general='')
)
_recatalog_all(_queryset, chunk_size)
logger.info('done recataloguing items with datacite custom type!')


class Command(BaseCommand):
def add_arguments(self, parser):
type_group = parser.add_mutually_exclusive_group(required=True)
Expand Down Expand Up @@ -103,14 +88,6 @@ def add_arguments(self, parser):
action='store_true',
help='recatalog metadata for users',
)
type_group.add_argument(
'--datacite-custom-types',
action='store_true',
help='''recatalog metadata for items with a specific datacite type,
including all preprints and items with custom resource_type_general
(may be slow for lack of database indexes)
''',
)

provider_group = parser.add_mutually_exclusive_group()
provider_group.add_argument(
Expand Down Expand Up @@ -144,6 +121,11 @@ def add_arguments(self, parser):
default=int(9e9),
help='maximum number of chunks (default all/enough/lots)',
)
parser.add_argument(
'--also-decatalog',
action='store_true',
help='also remove private and deleted items from the catalog',
)

def handle(self, *args, **options):
pls_all_types = options['all_types']
Expand All @@ -156,13 +138,7 @@ def handle(self, *args, **options):
start_id = options['start_id']
chunk_size = options['chunk_size']
chunk_count = options['chunk_count']
datacite_custom_types = options['datacite_custom_types']

if datacite_custom_types: # temporary arg for datacite 4.5 migration
assert not start_id, 'oh no, cannot resume with `--datacite-custom-types`'
assert not provider_ids, 'oh no, cannot filter providers with `--datacite-custom-types`'
_recatalog_datacite_custom_types(chunk_size)
return # end
also_decatalog = options['also_decatalog']

if pls_all_types:
assert not start_id, 'choose a specific type to resume with --start-id'
Expand All @@ -185,4 +161,16 @@ def handle(self, *args, **options):
_queryset = _queryset.filter(
provider__in=AbstractProvider.objects.filter(_id__in=provider_ids),
)
if not also_decatalog:
if provided_model is OsfStorageFile:
_queryset = _queryset.filter(deleted__isnull=True)
elif provided_model is OSFUser:
_queryset = _queryset.filter(
deleted__isnull=True,
is_active=True,
).exclude(allow_indexing=False)
elif provided_model is Preprint:
_queryset = _queryset.filter(is_public=True, is_published=True, deleted__isnull=True)
else:
_queryset = _queryset.filter(is_public=True, deleted__isnull=True)
recatalog(_queryset, start_id, chunk_count, chunk_size)
68 changes: 40 additions & 28 deletions osf_tests/management_commands/test_recatalog_metadata.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import datetime
import pytest
from unittest import mock
from operator import attrgetter
import random

from django.core.management import call_command

from osf.models.metadata import GuidMetadataRecord
from osf_tests.factories import (
PreprintProviderFactory,
PreprintFactory,
ProjectFactory,
RegistrationProviderFactory,
RegistrationFactory,
UserFactory,
)


Expand Down Expand Up @@ -41,18 +41,15 @@ def registration_provider(self):
@pytest.fixture
def registrations(self, registration_provider):
return sorted_by_id([
RegistrationFactory(provider=registration_provider)
RegistrationFactory(provider=registration_provider, is_public=True)
for _ in range(7)
])

@pytest.fixture
def projects(self, registrations):
return sorted_by_id([
ProjectFactory()
ProjectFactory(is_public=True)
for _ in range(7)
] + [
registration.registered_from
for registration in registrations
])

@pytest.fixture
Expand All @@ -79,19 +76,21 @@ def users(self, preprints, registrations, projects):
])))

@pytest.fixture
def items_with_custom_datacite_type(self, preprints, registrations, projects, files):
_nonpreprint_sample = [
random.choice(_items)
for _items in (registrations, projects, files)
def decatalog_items(self, registrations):
_user = UserFactory(allow_indexing=False)
_registration = RegistrationFactory(is_public=False, creator=_user)
_implicit_projects = [
_registration.registered_from,
*(_reg.registered_from for _reg in registrations),
]
return [
_user,
_registration,
*_implicit_projects,
PreprintFactory(is_published=False, creator=_user),
ProjectFactory(is_public=False, creator=_user),
ProjectFactory(deleted=datetime.datetime.now(), creator=_user),
]
for _item in _nonpreprint_sample:
_guid_record = GuidMetadataRecord.objects.for_guid(_item)
_guid_record.resource_type_general = 'BookChapter' # datacite resourceTypeGeneral value
_guid_record.save()
return {
*preprints, # every preprint has datacite type "Preprint"
*_nonpreprint_sample,
}

def test_recatalog_metadata(
self,
Expand All @@ -103,8 +102,14 @@ def test_recatalog_metadata(
projects,
files,
users,
items_with_custom_datacite_type,
decatalog_items,
):
def _actual_osfids() -> set[str]:
return {
_call[-1]['kwargs']['guid']
for _call in mock_update_share_task.apply_async.mock_calls
}

# test preprints
call_command(
'recatalog_metadata',
Expand Down Expand Up @@ -183,17 +188,24 @@ def test_recatalog_metadata(

mock_update_share_task.reset_mock()

# datacite custom types
# all types
_all_public_items = [*preprints, *registrations, *projects, *files, *users]
call_command(
'recatalog_metadata',
'--all-types',
)
_expected_osfids = set(_iter_osfids(_all_public_items))
assert _expected_osfids == _actual_osfids()

# also decatalog private/deleted items
_all_items = [*_all_public_items, *decatalog_items]
call_command(
'recatalog_metadata',
'--datacite-custom-types',
'--all-types',
'--also-decatalog',
)
_expected_osfids = set(_iter_osfids(items_with_custom_datacite_type))
_actual_osfids = {
_call[-1]['kwargs']['guid']
for _call in mock_update_share_task.apply_async.mock_calls
}
assert _expected_osfids == _actual_osfids
_expected_osfids = set(_iter_osfids(_all_items))
assert _expected_osfids == _actual_osfids()


###
Expand Down

0 comments on commit 723d4bd

Please sign in to comment.