From 996d3a65e38af0fdf6d59bc67df74d91baecd9b3 Mon Sep 17 00:00:00 2001 From: ndu Date: Thu, 12 Dec 2024 07:24:03 +0100 Subject: [PATCH] chore(feat): add fixes to relatedArticles logic --- ...ticle_article_related_articles_and_more.py | 24 +++- server/apps/research/models/article.py | 42 +++---- .../serializers/article_serializer.py | 39 +++--- server/apps/research/tasks.py | 29 ++++- server/apps/research/tests.py | 115 ++++++++++++++++++ 5 files changed, 201 insertions(+), 48 deletions(-) diff --git a/server/apps/research/migrations/0015_relatedarticle_article_related_articles_and_more.py b/server/apps/research/migrations/0015_relatedarticle_article_related_articles_and_more.py index 6d6e248..e8dc4b9 100644 --- a/server/apps/research/migrations/0015_relatedarticle_article_related_articles_and_more.py +++ b/server/apps/research/migrations/0015_relatedarticle_article_related_articles_and_more.py @@ -16,18 +16,34 @@ class Migration(migrations.Migration): fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('created_at', models.DateTimeField(auto_now_add=True)), - ('from_article', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_from', to='research.article')), - ('to_article', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_to', to='research.article')), + ('from_article', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='related_from', + to='research.article', + )), + ('to_article', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='related_to', + to='research.article', + )), ], ), migrations.AddField( model_name='article', name='related_articles', - field=models.ManyToManyField(blank=True, related_name='referenced_by', through='research.RelatedArticle', to='research.article'), + field=models.ManyToManyField( + blank=True, + related_name='referenced_by', + through='research.RelatedArticle', + to='research.article', + ), ), migrations.AddConstraint( model_name='relatedarticle', - constraint=models.CheckConstraint(check=models.Q(('from_article', models.F('to_article')), _negated=True), name='prevent_self_reference'), + constraint=models.CheckConstraint( + check=~models.Q(from_article=models.F('to_article')), + name='prevent_self_reference', + ), ), migrations.AlterUniqueTogether( name='relatedarticle', diff --git a/server/apps/research/models/article.py b/server/apps/research/models/article.py index dabef43..88c3108 100644 --- a/server/apps/research/models/article.py +++ b/server/apps/research/models/article.py @@ -151,12 +151,6 @@ def save(self, *args, **kwargs): if self.content: self.build_table_of_contents() - if ( - self.scheduled_publish_time - and self.status == "draft" - and timezone.now() >= self.scheduled_publish_time - ): - self.status = "ready" with transaction.atomic(): super().save(*args, **kwargs) @@ -196,7 +190,6 @@ def title_update(self): return original.title != self.title return False - class RelatedArticle(models.Model): """Through model for related articles to prevent circular references.""" @@ -214,38 +207,39 @@ class Meta: models.CheckConstraint( check=~models.Q(from_article=models.F("to_article")), name="prevent_self_reference", - ) + ), ] def clean(self): + if not self.from_article.pk: + return + # Prevent direct circular references - if ( - self.from_article.pk - and RelatedArticle.objects.filter( - from_article=self.to_article, to_article=self.from_article - ).exists() - ): + if RelatedArticle.objects.filter( + from_article=self.to_article, + to_article=self.from_article + ).exists(): raise ValidationError("Circular references detected.") - # Maximum of 3 related articles - if ( - self.from_article.pk - and RelatedArticle.objects.filter(from_article=self.from_article).count() - >= 3 - ): + # Maximum of 3 related articles (only check for new records) + if not self.pk and RelatedArticle.objects.filter( + from_article=self.from_article + ).count() >= 3: raise ValidationError("Maximum of 3 related articles allowed.") def save(self, *args, **kwargs): with transaction.atomic(): - # Only acquire lock if from_article exists in database if self.from_article.pk: - RelatedArticle.objects.select_for_update().filter( + # Lock all related records for this from_article + locked_relations = RelatedArticle.objects.select_for_update().filter( from_article=self.from_article - ).exists() + ) + # Force evaluation of the queryset to acquire the lock + list(locked_relations) + self.clean() super().save(*args, **kwargs) - class ArticleSlugHistory(models.Model): """Model to track historical slugs for articles.""" diff --git a/server/apps/research/serializers/article_serializer.py b/server/apps/research/serializers/article_serializer.py index 451d60c..c72c1c8 100644 --- a/server/apps/research/serializers/article_serializer.py +++ b/server/apps/research/serializers/article_serializer.py @@ -57,44 +57,53 @@ def validate_related_article_ids(self, value): """Validate related articles.""" if len(value) > 3: raise serializers.ValidationError("You can only have a maximum of 3 related articles.") - + # Check for self-reference - instance = self.instance - if instance and instance.id in [article.id for article in value]: + article_id = self.instance.id if self.instance else None + if article_id is None: # During creation, we check the request data for article ID + request = self.context.get('request') + if request and hasattr(request, 'data'): + article_id = request.data.get('id') + + if article_id and article_id in [article.id for article in value]: raise serializers.ValidationError("An article cannot be related to itself.") - + return value + - def create(self, validated_data: dict) -> Article: - """Create a new article instance.""" + def create(self, validated_data): + """Create a new Article instance.""" request = self.context.get('request') authors = validated_data.pop('authors', []) categories = validated_data.pop('categories', []) related_article_ids = validated_data.pop('related_article_ids', []) try: + # Assign default author if none provided if not authors and request and hasattr(request, 'user'): user_author = Author.objects.filter(user=request.user).first() if user_author: authors = [user_author] - # Create and save the article first + # Create the article instance article = Article.objects.create(**validated_data) + # Associate authors and categories if authors: article.authors.set(authors) if categories: article.categories.set(categories) - - # Handle related articles after the article is created - for related_article in related_article_ids: - RelatedArticle.objects.create( - from_article=article, - to_article=related_article - ) + + # Bulk create related articles + related_articles = [ + RelatedArticle(from_article=article, to_article=related_article) + for related_article in related_article_ids + ] + RelatedArticle.objects.bulk_create(related_articles) return article - except Exception as e: + + except Exception as e: raise serializers.ValidationError(f"Error creating article: {str(e)}") diff --git a/server/apps/research/tasks.py b/server/apps/research/tasks.py index 34d25b0..9318d8e 100644 --- a/server/apps/research/tasks.py +++ b/server/apps/research/tasks.py @@ -1,11 +1,30 @@ from celery import shared_task from django.utils import timezone +from django.db import transaction from .models import Article +import logging + +logger = logging.getLogger(__name__) # TODO: Implement Querying the Articles in chunks in case of very large dataset -@shared_task +@shared_task(name='apps.research.tasks.publish_scheduled_articles') def publish_scheduled_articles(): - """Publish articles that are scheduled to be published.""" - now = timezone.now() - articles_to_publish = Article.objects.filter(status='draft', scheduled_publish_time__lte=now) - articles_to_publish.update(status='ready') \ No newline at end of file + """ + Update status of draft articles that have reached their scheduled publish time + """ + try: + with transaction.atomic(): + updated = Article.objects.filter( + status='draft', + scheduled_publish_time__isnull=False, + scheduled_publish_time__lte=timezone.now() + ).update(status='ready') + + if updated: + logger.info(f"Updated {updated} articles to ready status") + + return updated + + except Exception as e: + logger.error(f"Error publishing scheduled articles: {e}") + raise \ No newline at end of file diff --git a/server/apps/research/tests.py b/server/apps/research/tests.py index fb27fc1..165edf6 100644 --- a/server/apps/research/tests.py +++ b/server/apps/research/tests.py @@ -1,6 +1,10 @@ from django.test import TestCase from django.contrib.auth.models import User from django.utils.text import slugify +from django.core.exceptions import ValidationError +from django.db.utils import IntegrityError +from django.db import transaction +from .models import Article, RelatedArticle from .models import Article from datetime import datetime, timedelta from django.utils import timezone @@ -98,3 +102,114 @@ def test_increment_views(self): article.save() article.refresh_from_db() self.assertEqual(article.views, 1) + +class RelatedArticleModelTest(TestCase): + def setUp(self): + # Create test articles without author field + self.article1 = Article.objects.create( + title='Test Article 1', + content='Content 1', + status='ready' + ) + self.article2 = Article.objects.create( + title='Test Article 2', + content='Content 2', + status='ready' + ) + self.article3 = Article.objects.create( + title='Test Article 3', + content='Content 3', + status='ready' + ) + self.article4 = Article.objects.create( + title='Test Article 4', + content='Content 4', + status='ready' + ) + + def test_prevent_self_reference(self): + """Test that an article cannot be related to itself""" + with self.assertRaises(IntegrityError): + with transaction.atomic(): + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article1 + ) + + def test_prevent_circular_reference(self): + """Test that circular references are prevented""" + # Create first relationship + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article2 + ) + + # Attempt to create circular reference + with self.assertRaises(ValidationError): + RelatedArticle.objects.create( + from_article=self.article2, + to_article=self.article1 + ) + + def test_maximum_related_articles(self): + """Test that maximum of 3 related articles is enforced""" + # Create three related articles + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article2 + ) + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article3 + ) + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article4 + ) + + # Create fifth article and attempt to add it as fourth relation + article5 = Article.objects.create( + title='Test Article 5', + content='Content 5', + status='ready' + ) + + # Attempt to add fourth related article + with self.assertRaises(ValidationError): + RelatedArticle.objects.create( + from_article=self.article1, + to_article=article5 + ) + + def test_unique_relationships(self): + """Test that duplicate relationships are prevented""" + # Create first relationship + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article2 + ) + + # Attempt to create duplicate relationship + with self.assertRaises(IntegrityError): + with transaction.atomic(): + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article2 + ) + + def test_get_related_articles(self): + """Test the get_related_articles method""" + # Create related articles + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article2 + ) + RelatedArticle.objects.create( + from_article=self.article1, + to_article=self.article3 + ) + + related = self.article1.get_related_articles() + self.assertEqual(related.count(), 2) + self.assertIn(self.article2, related) + self.assertIn(self.article3, related) \ No newline at end of file