jazzband · rtpg · Jun 19, 2024 · Oct 10, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,6 +4,11 @@ Changelog
 (Unreleased)
 ~~~~~~~~~~~~
 
+* By default, order tag items on instances by the primary key. This generally means that they will be ordered by "creation date" for the tag item.
+  The previous behavior for this was that by default tag items were not ordered. In practice tag items often end up ordered by creation date anyways, just due to how databases work, but this was not a guarantee.
+  If you wish to have the old behavior, set ``ordering=[]`` to your ``TaggableManager`` instance.
+  We believe that this should not cause a noticable performance change, and the number of queries involved should not change.
+
 5.0.1 (2023-10-26)
 ~~~~~~~~~~~~~~~~~~
 

diff --git a/setup.cfg b/setup.cfg
@@ -41,7 +41,7 @@ exclude = tests*
 [flake8]
 # E501: line too long
 ignore = E501
-exclude = .venv,.git,.tox
+exclude = .venv,.git,.tox,.direnv
 
 [isort]
 profile = black
diff --git a/taggit/managers.py b/taggit/managers.py
@@ -65,8 +65,17 @@ def __init__(self, through, model, instance, prefetch_cache_name, ordering=None)
         self.model = model
         self.instance = instance
         self.prefetch_cache_name = prefetch_cache_name
-        if ordering:
+
+        if ordering is not None:
             self.ordering = ordering
+        elif instance:
+            # When working off an instance (i.e. instance.tags.all())
+            # we default to ordering by the PK of the through table
+            #
+            # (This ordering does not apply for queries at the model
+            #  level, i.e. Model.tags.all())
+            related_name = self.through.tag.field.related_query_name()
+            self.ordering = [f"{related_name}__pk"]
         else:
             self.ordering = []
 
@@ -206,59 +215,82 @@ def add(self, *tags, through_defaults=None, tag_kwargs=None, **kwargs):
     def _to_tag_model_instances(self, tags, tag_kwargs):
         """
         Takes an iterable containing either strings, tag objects, or a mixture
-        of both and returns set of tag objects.
+        of both and returns a list of tag objects while preserving order.
         """
         db = router.db_for_write(self.through, instance=self.instance)
 
-        str_tags = set()
-        tag_objs = set()
-
-        for t in tags:
-            if isinstance(t, self.through.tag_model()):
-                tag_objs.add(t)
-            elif isinstance(t, str):
-                str_tags.add(t)
-            else:
-                raise ValueError(
-                    "Cannot add {} ({}). Expected {} or str.".format(
-                        t, type(t), type(self.through.tag_model())
-                    )
-                )
-
         case_insensitive = getattr(settings, "TAGGIT_CASE_INSENSITIVE", False)
         manager = self.through.tag_model()._default_manager.using(db)
 
+        # tags can be instances of our through models, or strings
+
+        tag_strs = [tag for tag in tags if isinstance(tag, str)]
+        # This map from tag names to tags lets us handle deduplication
+        # without doing extra queries along the way, all while relying on
+        # data we were going to pull out of the database anyways
+        # existing_tags_for_str[tag_name] = tag
+        existing_tags_for_str = {}
+        # we are going to first try and lookup existing tags (in a single query)
         if case_insensitive:
             # Some databases can do case-insensitive comparison with IN, which
-            # would be faster, but we can't rely on it or easily detect it.
+            # would be faster, but we can't rely on it or easily detect it
             existing = []
             tags_to_create = []
-
-            for name in str_tags:
+            for name in tag_strs:
                 try:
                     tag = manager.get(name__iexact=name, **tag_kwargs)
-                    existing.append(tag)
+                    existing_tags_for_str[name] = tag
                 except self.through.tag_model().DoesNotExist:
                     tags_to_create.append(name)
         else:
-            # If str_tags has 0 elements Django actually optimizes that to not
-            # do a query.  Malcolm is very smart.
-            existing = manager.filter(name__in=str_tags, **tag_kwargs)
-
-            tags_to_create = str_tags - {t.name for t in existing}
+            # Django is smart enough to not actually query if tag_strs is empty
+            # but importantly, this is a single query for all potential tags
+            existing = manager.filter(name__in=tag_strs, **tag_kwargs)
+            # we're going to end up doing this query anyways, so here is fine
+            for t in existing:
+                existing_tags_for_str[t.name] = t
+
+        result = []
+        # this set is used for deduplicating tags
+        seen_tags = set()
+        for t in tags:
+            if isinstance(t, self.through.tag_model()):
+                if t in seen_tags:
+                    continue
+                seen_tags.add(t)
+                result.append(t)
+            elif isinstance(t, str):
+                # we are using a string, so either the tag exists (and we have the lookup)
+                # or we need to create the value
+
+                existing_tag = existing_tags_for_str.get(t, None)
+                if existing_tag is None:
+                    # we need to create a tag
+                    # (we use get_or_create to handle potential races)
+                    if case_insensitive:
+                        lookup = {"name__iexact": t, **tag_kwargs}
+                    else:
+                        lookup = {"name": t, **tag_kwargs}
+                    existing_tag, _ = manager.get_or_create(
+                        **lookup, defaults={"name": t}
+                    )
+                # we now have an existing tag for this string
 
-        tag_objs.update(existing)
+                # confirm if we've seen it or not (this is where case insensitivity comes
+                # into play)
+                if existing_tag in seen_tags:
+                    continue
 
-        for new_tag in tags_to_create:
-            if case_insensitive:
-                lookup = {"name__iexact": new_tag, **tag_kwargs}
+                seen_tags.add(existing_tag)
+                result.append(existing_tag)
             else:
-                lookup = {"name": new_tag, **tag_kwargs}
-
-            tag, create = manager.get_or_create(**lookup, defaults={"name": new_tag})
-            tag_objs.add(tag)
+                raise ValueError(
+                    "Cannot add {} ({}). Expected {} or str.".format(
+                        t, type(t), type(self.through.tag_model())
+                    )
+                )
 
-        return tag_objs
+        return result
 
     @require_instance_manager
     def names(self):

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -3,6 +3,14 @@
 from tests.models import TestModel
 
 
+class TestTaggableManager(TestCase):
+    def test_duplicates(self):
+        sample_obj = TestModel.objects.create()
+        sample_obj.tags.set(["green", "green"])
+        desired_result = ["green"]
+        self.assertEqual(desired_result, [tag.name for tag in sample_obj.tags.all()])
+
+
 class TestSlugification(TestCase):
     def test_unicode_slugs(self):
         """

diff --git a/tests/tests.py b/tests/tests.py
@@ -59,6 +59,7 @@
     TaggedTrackedFood,
     TenantModel,
     TenantTag,
+    TestModel,
     TrackedTag,
     UUIDFood,
     UUIDHousePet,
@@ -1362,6 +1363,30 @@ def test_added_tags_are_returned_ordered(self):
             list(obj.tags.values_list("name", flat=True)),
         )
 
+    def test_ordered_by_creation_by_default(self):
+        """
+        Test that the tags are set and returned exactly as they are provided in .set
+        """
+        str_tags = ["red", "green", "delicious"]
+        sample_obj = TestModel.objects.create()
+
+        sample_obj.tags.set(str_tags)
+        self.assertEqual(str_tags, [tag.name for tag in sample_obj.tags.all()])
+
+    def test_default_ordering_on_mixed_operation(self):
+        """
+        Test that our default ordering is preserved even when mixing strings
+        and objects
+        """
+        tag_obj = Tag.objects.create(name="mcintosh")
+        tag_obj_2 = Tag.objects.create(name="fuji")
+        str_tags = ["red", "green", "delicious"]
+        sample_obj = TestModel.objects.create()
+
+        sample_obj.tags.set([tag_obj_2] + str_tags + [tag_obj])
+        expected_results = [tag_obj_2.name] + str_tags + [tag_obj.name]
+        self.assertEqual(expected_results, [tag.name for tag in sample_obj.tags.all()])
+
 
 class PendingMigrationsTests(TestCase):
     def test_taggit_has_no_pending_migrations(self):