diff --git a/article/models.py b/article/models.py index 69adbceb..a9ca8785 100755 --- a/article/models.py +++ b/article/models.py @@ -267,21 +267,6 @@ def get_or_create( except cls.DoesNotExist: return cls.create(pid_v3=pid_v3, user=user) - # @classmethod - # def get_or_create(cls, doi, pid_v2, fundings, user): - # try: - # return cls.objects.get(doi__in=doi, pid_v2=pid_v2) - # except cls.DoesNotExist: - # article = cls() - # article.pid_v2 = pid_v2 - # article.creator = user - # article.save() - # article.doi.set(doi) - # if fundings: - # for funding in fundings: - # article.fundings.add(funding) - # return article - def set_date_pub(self, dates): if dates: self.pub_date_day = dates.get("day") diff --git a/article/tasks.py b/article/tasks.py index 1fc6eaa0..4b7ea6c1 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -5,6 +5,7 @@ from django.db.models import Q, Count from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ +from django.db.models import Subquery from article.models import Article, ArticleFormat from article.sources import xmlsps @@ -42,54 +43,46 @@ def load_article(self, user_id=None, username=None, file_path=None, v3=None): xmlsps.load_article(user, file_path=file_path, v3=v3) -def _items_to_load_article(from_date, force_update): +def _items_to_load_article(from_date): if from_date: try: from_date = datetime.strptime(from_date, "%Y-%m-%d") except Exception: from_date = None if not from_date: - # obtém a última atualização de Article - try: - article = Article.objects.filter( - ~Q(valid=True) - ).order_by("-updated").first() - if not article: - article = Article.objects.filter(valid=True).order_by("-updated").first() - if article: - from_date = article.updated - except Article.DoesNotExist: + # Obtém a data do último artigo válido + last_valid_article = Article.objects.all().order_by("-updated").first() + if last_valid_article: + from_date = last_valid_article.updated + else: from_date = datetime(1900, 1, 1) - if not from_date: - from_date = datetime(1900, 1, 1) - items = PidProviderXML.public_items(from_date) - if force_update: - yield from items for item in items: - try: - article = Article.objects.get( - ~Q(valid=True), - pid_v3=item.v3, - updated__lt=item.updated, - created__lt=item.created, - ) - if article: - yield item - except Article.DoesNotExist: - yield item + yield item + + +def items_to_load_article_with_valid_false(): + # Obtém os objetos PidProviderXMl onde o campo pid_v3 de article e v3 possuem o mesmo valor + articles = Article.objects.filter(valid=False).values("pid_v3") + items = PidProviderXML.objects.filter(v3__in=Subquery(articles)) + for item in items: + yield item @celery_app.task(bind=True, name=_("load_articles")) def load_articles( - self, user_id=None, username=None, from_date=None, force_update=False + self, user_id=None, username=None, from_date=None, load_invalid_articles=False, force_update=False ): try: user = _get_user(self.request, username, user_id) - - for item in _items_to_load_article(from_date, force_update): + if load_invalid_articles: + generator_articles = items_to_load_article_with_valid_false() + else: + generator_articles = _items_to_load_article(from_date) + + for item in generator_articles: try: load_article.apply_async( kwargs={ @@ -270,9 +263,9 @@ def remove_duplicate_articles(pid_v3=None): ids_to_exclude = [] try: if pid_v3: - duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False) else: - duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False) for duplicate in duplicates: article_ids = Article.objects.filter( pid_v3=duplicate["pid_v3"]