Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default tag ordering to the primary key #892

Merged
merged 22 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Changelog
(Unreleased)
~~~~~~~~~~~~

* By default, order tag items on instances by the primary key. This generally means that they will be ordered by "creation date" for the tag item.
The previous behavior for this was that by default tag items were not ordered. In practice tag items often end up ordered by creation date anyways, just due to how databases work, but this was not a guarantee.
If you wish to have the old behavior, set ``ordering=[]`` to your ``TaggableManager`` instance.
We believe that this should not cause a noticable performance change, and the number of queries involved should not change.

5.0.1 (2023-10-26)
~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ exclude = tests*
[flake8]
# E501: line too long
ignore = E501
exclude = .venv,.git,.tox
exclude = .venv,.git,.tox,.direnv
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

been meaning to add this for a while....


[isort]
profile = black
102 changes: 67 additions & 35 deletions taggit/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,17 @@ def __init__(self, through, model, instance, prefetch_cache_name, ordering=None)
self.model = model
self.instance = instance
self.prefetch_cache_name = prefetch_cache_name
if ordering:

if ordering is not None:
self.ordering = ordering
elif instance:
# When working off an instance (i.e. instance.tags.all())
# we default to ordering by the PK of the through table
#
# (This ordering does not apply for queries at the model
# level, i.e. Model.tags.all())
related_name = self.through.tag.field.related_query_name()
self.ordering = [f"{related_name}__pk"]
else:
self.ordering = []

Expand Down Expand Up @@ -206,59 +215,82 @@ def add(self, *tags, through_defaults=None, tag_kwargs=None, **kwargs):
def _to_tag_model_instances(self, tags, tag_kwargs):
"""
Takes an iterable containing either strings, tag objects, or a mixture
of both and returns set of tag objects.
of both and returns a list of tag objects while preserving order.
"""
db = router.db_for_write(self.through, instance=self.instance)

str_tags = set()
tag_objs = set()

for t in tags:
if isinstance(t, self.through.tag_model()):
tag_objs.add(t)
elif isinstance(t, str):
str_tags.add(t)
else:
raise ValueError(
"Cannot add {} ({}). Expected {} or str.".format(
t, type(t), type(self.through.tag_model())
)
)

case_insensitive = getattr(settings, "TAGGIT_CASE_INSENSITIVE", False)
manager = self.through.tag_model()._default_manager.using(db)

# tags can be instances of our through models, or strings

tag_strs = [tag for tag in tags if isinstance(tag, str)]
# This map from tag names to tags lets us handle deduplication
# without doing extra queries along the way, all while relying on
# data we were going to pull out of the database anyways
# existing_tags_for_str[tag_name] = tag
existing_tags_for_str = {}
# we are going to first try and lookup existing tags (in a single query)
if case_insensitive:
# Some databases can do case-insensitive comparison with IN, which
# would be faster, but we can't rely on it or easily detect it.
# would be faster, but we can't rely on it or easily detect it
existing = []
tags_to_create = []

for name in str_tags:
for name in tag_strs:
try:
tag = manager.get(name__iexact=name, **tag_kwargs)
existing.append(tag)
existing_tags_for_str[name] = tag
except self.through.tag_model().DoesNotExist:
tags_to_create.append(name)
else:
# If str_tags has 0 elements Django actually optimizes that to not
# do a query. Malcolm is very smart.
existing = manager.filter(name__in=str_tags, **tag_kwargs)

tags_to_create = str_tags - {t.name for t in existing}
# Django is smart enough to not actually query if tag_strs is empty
# but importantly, this is a single query for all potential tags
existing = manager.filter(name__in=tag_strs, **tag_kwargs)
# we're going to end up doing this query anyways, so here is fine
for t in existing:
existing_tags_for_str[t.name] = t

result = []
# this set is used for deduplicating tags
seen_tags = set()
for t in tags:
if isinstance(t, self.through.tag_model()):
if t in seen_tags:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpicky but could be simpler here to do:

if not t in seen_tags:
    seen_tags.add(t)
    result.append(t)

continue
seen_tags.add(t)
result.append(t)
elif isinstance(t, str):
# we are using a string, so either the tag exists (and we have the lookup)
# or we need to create the value

existing_tag = existing_tags_for_str.get(t, None)
if existing_tag is None:
# we need to create a tag
# (we use get_or_create to handle potential races)
if case_insensitive:
lookup = {"name__iexact": t, **tag_kwargs}
else:
lookup = {"name": t, **tag_kwargs}
existing_tag, _ = manager.get_or_create(
**lookup, defaults={"name": t}
)
# we now have an existing tag for this string

tag_objs.update(existing)
# confirm if we've seen it or not (this is where case insensitivity comes
# into play)
if existing_tag in seen_tags:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likewise we can eliminate the continue here

continue

for new_tag in tags_to_create:
if case_insensitive:
lookup = {"name__iexact": new_tag, **tag_kwargs}
seen_tags.add(existing_tag)
result.append(existing_tag)
else:
lookup = {"name": new_tag, **tag_kwargs}

tag, create = manager.get_or_create(**lookup, defaults={"name": new_tag})
tag_objs.add(tag)
raise ValueError(
"Cannot add {} ({}). Expected {} or str.".format(
t, type(t), type(self.through.tag_model())
)
)

return tag_objs
return result

@require_instance_manager
def names(self):
Expand Down
8 changes: 8 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
from tests.models import TestModel


class TestTaggableManager(TestCase):
def test_duplicates(self):
sample_obj = TestModel.objects.create()
sample_obj.tags.set(["green", "green"])
desired_result = ["green"]
self.assertEqual(desired_result, [tag.name for tag in sample_obj.tags.all()])


class TestSlugification(TestCase):
def test_unicode_slugs(self):
"""
Expand Down
25 changes: 25 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
TaggedTrackedFood,
TenantModel,
TenantTag,
TestModel,
TrackedTag,
UUIDFood,
UUIDHousePet,
Expand Down Expand Up @@ -1362,6 +1363,30 @@ def test_added_tags_are_returned_ordered(self):
list(obj.tags.values_list("name", flat=True)),
)

def test_ordered_by_creation_by_default(self):
"""
Test that the tags are set and returned exactly as they are provided in .set
"""
str_tags = ["red", "green", "delicious"]
sample_obj = TestModel.objects.create()

sample_obj.tags.set(str_tags)
self.assertEqual(str_tags, [tag.name for tag in sample_obj.tags.all()])

def test_default_ordering_on_mixed_operation(self):
"""
Test that our default ordering is preserved even when mixing strings
and objects
"""
tag_obj = Tag.objects.create(name="mcintosh")
tag_obj_2 = Tag.objects.create(name="fuji")
str_tags = ["red", "green", "delicious"]
sample_obj = TestModel.objects.create()

sample_obj.tags.set([tag_obj_2] + str_tags + [tag_obj])
expected_results = [tag_obj_2.name] + str_tags + [tag_obj.name]
self.assertEqual(expected_results, [tag.name for tag in sample_obj.tags.all()])


class PendingMigrationsTests(TestCase):
def test_taggit_has_no_pending_migrations(self):
Expand Down
Loading