Skip to content

Commit

Permalink
Merge pull request #1617 from ResearchHub/revisions
Browse files Browse the repository at this point in the history
Paper revisions
  • Loading branch information
yattias authored Jun 5, 2024
2 parents 133c4a6 + 46a6a30 commit c3b6330
Show file tree
Hide file tree
Showing 8 changed files with 385 additions and 2 deletions.
359 changes: 359 additions & 0 deletions src/paper/migrations/0127_historicalpaper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,359 @@
# Generated by Django 4.2.13 on 2024-06-04 17:36

from django.conf import settings
import django.contrib.postgres.search
import django.core.validators
from django.db import migrations, models
import django.db.models.deletion
import simple_history.models


class Migration(migrations.Migration):

dependencies = [
("summary", "0014_alter_summary_summary"),
("researchhub_document", "0058_researchhubunifieddocument_topics"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paper", "0126_authorship_delete_workauthorship"),
]

operations = [
migrations.CreateModel(
name="HistoricalPaper",
fields=[
(
"id",
models.IntegerField(
auto_created=True, blank=True, db_index=True, verbose_name="ID"
),
),
("created_date", models.DateTimeField(blank=True, editable=False)),
("updated_date", models.DateTimeField(blank=True, editable=False)),
("score", models.IntegerField(default=0)),
(
"is_public",
models.BooleanField(
default=True, help_text="Hides the paper from the public."
),
),
(
"is_removed",
models.BooleanField(
default=False,
help_text="Hides the paper because it is not allowed.",
),
),
(
"is_removed_by_user",
models.BooleanField(
default=False,
help_text="Hides the paper because it is not allowed.",
),
),
(
"is_pdf_removed_by_moderator",
models.BooleanField(
default=False,
help_text="Hides the PDF because it infringes Copyright.",
),
),
("bullet_low_quality", models.BooleanField(default=False)),
("summary_low_quality", models.BooleanField(default=False)),
("discussion_count", models.IntegerField(db_index=True, default=0)),
("views", models.IntegerField(default=0)),
("downloads", models.IntegerField(default=0)),
("citations", models.IntegerField(default=0)),
("open_alex_raw_json", models.JSONField(blank=True, null=True)),
("automated_bounty_created", models.BooleanField(default=False)),
(
"file",
models.TextField(
blank=True,
default=None,
max_length=512,
null=True,
validators=[
django.core.validators.FileExtensionValidator(["pdf"])
],
),
),
(
"pdf_file_extract",
models.TextField(
blank=True, default=None, max_length=512, null=True
),
),
(
"edited_file_extract",
models.TextField(
blank=True, default=None, max_length=512, null=True
),
),
(
"file_created_location",
models.CharField(
blank=True,
choices=[("PROGRESS", "Progress")],
default=None,
max_length=255,
null=True,
),
),
("retrieved_from_external_source", models.BooleanField(default=False)),
(
"is_open_access",
models.BooleanField(blank=True, default=None, null=True),
),
(
"oa_status",
models.CharField(blank=True, default=None, max_length=8, null=True),
),
(
"external_source",
models.CharField(
blank=True, default=None, max_length=255, null=True
),
),
(
"paper_type",
models.CharField(
choices=[
("REGULAR", "REGULAR"),
("PRE_REGISTRATION", "PRE_REGISTRATION"),
],
default="REGULAR",
max_length=32,
),
),
(
"completeness",
models.CharField(
choices=[
("COMPLETE", "COMPLETE"),
("PARTIAL", "PARTIAL"),
("INCOMPLETE", "INCOMPLETE"),
],
default="INCOMPLETE",
max_length=16,
),
),
("title", models.CharField(max_length=1024)),
(
"tagline",
models.CharField(
blank=True, default=None, max_length=255, null=True
),
),
(
"doi",
models.CharField(
blank=True,
db_index=True,
default=None,
max_length=255,
null=True,
),
),
("alternate_ids", models.JSONField(blank=True, default=dict)),
(
"paper_title",
models.CharField(
blank=True, default=None, max_length=1024, null=True
),
),
("paper_publish_date", models.DateField(blank=True, null=True)),
("raw_authors", models.JSONField(blank=True, null=True)),
("abstract", models.TextField(blank=True, default=None, null=True)),
(
"abstract_src",
models.TextField(
blank=True,
default=None,
help_text="\n Abstract_src is different field than abstract field.\n Abstract is legacy text field where as abstract_src field is a src field that is\n intended to be used along with different types of text editors from the frontend.\n ",
max_length=512,
null=True,
),
),
(
"abstract_src_type",
models.CharField(
choices=[
("CK_EDITOR", "CK_EDITOR"),
("DRAFT_JS", "DRAFT_JS"),
("TEXT_FIELD", "TEXT_FIELD"),
],
default="TEXT_FIELD",
help_text="Indicates which text editor was used for abstract section.",
max_length=32,
null=True,
),
),
(
"publication_type",
models.CharField(
blank=True, default=None, max_length=255, null=True
),
),
(
"url",
models.URLField(
blank=True,
db_index=True,
default=None,
max_length=1024,
null=True,
),
),
(
"pdf_url",
models.URLField(
blank=True, default=None, max_length=1024, null=True
),
),
(
"pdf_license",
models.CharField(
blank=True, default=None, max_length=255, null=True
),
),
(
"pdf_license_url",
models.URLField(
blank=True, default=None, max_length=1024, null=True
),
),
(
"csl_item",
models.JSONField(
blank=True,
default=None,
help_text="bibliographic metadata as a single Citation Styles Language JSON item.",
null=True,
),
),
(
"oa_pdf_location",
models.JSONField(
blank=True,
default=None,
help_text="PDF availability in Unpaywall OA Location format.",
null=True,
),
),
("external_metadata", models.JSONField(blank=True, null=True)),
(
"slug",
models.SlugField(
blank=True,
help_text="Slug is automatically generated on a signal, so it is not needed in a form",
max_length=1024,
),
),
(
"url_svf",
django.contrib.postgres.search.SearchVectorField(
blank=True, null=True
),
),
(
"pdf_url_svf",
django.contrib.postgres.search.SearchVectorField(
blank=True, null=True
),
),
(
"doi_svf",
django.contrib.postgres.search.SearchVectorField(
blank=True, null=True
),
),
("work_type", models.CharField(blank=True, max_length=100, null=True)),
(
"openalex_id",
models.CharField(
blank=True, db_index=True, max_length=255, null=True
),
),
(
"pubmed_id",
models.CharField(
blank=True, db_index=True, max_length=255, null=True
),
),
(
"pubmed_central_id",
models.CharField(
blank=True, db_index=True, max_length=255, null=True
),
),
(
"mag_id",
models.CharField(
blank=True, db_index=True, max_length=255, null=True
),
),
("is_retracted", models.BooleanField(blank=True, null=True)),
("language", models.CharField(blank=True, max_length=10, null=True)),
("history_id", models.AutoField(primary_key=True, serialize=False)),
("history_date", models.DateTimeField(db_index=True)),
("history_change_reason", models.CharField(max_length=100, null=True)),
(
"history_type",
models.CharField(
choices=[("+", "Created"), ("~", "Changed"), ("-", "Deleted")],
max_length=1,
),
),
(
"history_user",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="+",
to=settings.AUTH_USER_MODEL,
),
),
(
"summary",
models.ForeignKey(
blank=True,
db_constraint=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="summary.summary",
),
),
(
"unified_document",
models.ForeignKey(
blank=True,
db_constraint=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="researchhub_document.researchhubunifieddocument",
),
),
(
"uploaded_by",
models.ForeignKey(
blank=True,
db_constraint=False,
help_text="RH User account that submitted this paper. NOTE: user didnt necessarily had to be the author",
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"verbose_name": "historical paper",
"verbose_name_plural": "historical papers",
"ordering": ("-history_date", "-history_id"),
"get_latest_by": ("history_date", "history_id"),
},
bases=(simple_history.models.HistoricalChanges, models.Model),
),
]
4 changes: 3 additions & 1 deletion src/paper/openalex_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from django.core.exceptions import ValidationError
from django.db import IntegrityError, transaction
from django.db.models import Q
from django.utils.timezone import now
from simple_history.utils import bulk_update_with_history

import utils.sentry as sentry
from user.related_models.author_contribution_summary_model import (
Expand Down Expand Up @@ -162,7 +164,7 @@ def process_openalex_works(works):
fields_to_update = [*PAPER_FIELDS_ALLOWED_TO_UPDATE]
papers_to_update = [paper for paper, _ in update_papers]
try:
Paper.objects.bulk_update(papers_to_update, fields_to_update)
bulk_update_with_history(papers_to_update, Paper, fields_to_update)
except Exception as e:
sentry.log_error(e, message="Failed to bulk update papers")

Expand Down
2 changes: 2 additions & 0 deletions src/paper/related_models/paper_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from django_elasticsearch_dsl_drf.wrappers import dict_to_obj
from manubot.cite.doi import get_doi_csl_item
from manubot.cite.unpaywall import Unpaywall
from simple_history.models import HistoricalRecords

import utils.sentry as sentry
from discussion.reaction_models import AbstractGenericReactionModel, Vote
Expand Down Expand Up @@ -59,6 +60,7 @@


class Paper(AbstractGenericReactionModel):
history = HistoricalRecords()
FIELDS_TO_EXCLUDE = {"url_svf", "pdf_url_svf", "doi_svf"}

REGULAR = "REGULAR"
Expand Down
Loading

0 comments on commit c3b6330

Please sign in to comment.