Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate article summaries with chat gpt #203

Merged
merged 8 commits into from
Dec 17, 2024
74 changes: 74 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
amqp==5.2.0
annotated-types==0.7.0
anyio==4.4.0
asgiref==3.8.1
attrs==24.1.0
beautifulsoup4==4.12.3
billiard==4.2.0
bs4==0.0.2
celery==5.4.0
iankressin marked this conversation as resolved.
Show resolved Hide resolved
certifi==2024.7.4
cffi==1.17.0
charset-normalizer==3.3.2
click==8.1.7
click-didyoumean==0.3.1
click-plugins==1.1.1
click-repl==0.3.0
cron-descriptor==1.4.3
cryptography==43.0.0
distro==1.9.0
Django==5.0.8
django-admin-interface==0.28.8
django-celery-beat==2.6.0
django-ckeditor==6.7.1
django-ckeditor-5==0.2.13
django-colorfield==0.11.0
django-cors-headers==4.4.0
django-filter==24.2
django-jazzmin==3.0.0
django-js-asset==2.2.0
django-shortcuts==1.6
django-sortedm2m==4.0.0
django-timezone-field==7.0
django-tinymce==4.1.0
djangorestframework==3.15.2
drf-spectacular==0.27.2
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
inflection==0.5.1
jiter==0.8.2
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
kombu==5.4.0
openai==1.57.2
iankressin marked this conversation as resolved.
Show resolved Hide resolved
pillow==10.4.0
prompt_toolkit==3.0.47
pycparser==2.22
pydantic==2.10.3
pydantic_core==2.27.1
pyOpenSSL==24.2.1
python-crontab==3.2.0
python-dateutil==2.9.0.post0
python-decouple==3.8
python-dotenv==1.0.1
python-slugify==8.0.4
PyYAML==6.0.1
redis==5.0.8
referencing==0.35.1
requests==2.32.3
rpds-py==0.19.1
six==1.16.0
sniffio==1.3.1
soupsieve==2.6
sqlparse==0.5.1
text-unidecode==1.3
tqdm==4.67.1
typing_extensions==4.12.2
tzdata==2024.1
uritemplate==4.1.1
urllib3==2.2.2
vine==5.1.0
wcwidth==0.2.13
whitenoise==6.7.0
iankressin marked this conversation as resolved.
Show resolved Hide resolved
68 changes: 65 additions & 3 deletions server/apps/research/admin/article_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from apps.research.models import Article, ArticleSlugHistory
from tinymce.widgets import TinyMCE
from .slug_history import current_slug_history
from django.conf import settings
from django.http import JsonResponse
from django.urls import path
from ..services.gpt_service import GPTService
import asyncio

class ArticleForm(forms.ModelForm):
class Meta:
Expand All @@ -21,13 +26,64 @@ def __init__(self, *args, **kwargs):
Q(pk=self.instance.pk) | Q(status='draft')
).order_by('-scheduled_publish_time')

self.fields['acknowledgement'].widget = TinyMCE(attrs={'cols': 80, 'rows': 30, 'id': "acknowledgement_richtext_field", 'placeholder': "Enter Acknowledgement here"})
self.fields['content'].widget = TinyMCE(attrs={'cols': 80, 'rows': 30, 'id': "content_richtext_field", 'placeholder': "Enter Article Content here"})
# Configure TinyMCE widgets
self.fields['acknowledgement'].widget = TinyMCE(attrs={
'cols': 80,
'rows': 30,
'id': "acknowledgement_richtext_field",
'placeholder': "Enter Acknowledgement here"
})
self.fields['content'].widget = TinyMCE(attrs={
'cols': 80,
'rows': 30,
'id': "content_richtext_field",
'placeholder': "Enter Article Content here"
})
self.fields['gpt_summary'].widget = TinyMCE(attrs={
'cols': 80,
'rows': 15,
'id': "gpt_summary_richtext_field",
'placeholder': "GPT-generated summary will appear here"
})

class ArticleAdmin(admin.ModelAdmin):
"""Admin interface for the Article model."""
form = ArticleForm

def __init__(self, model, admin_site):
super().__init__(model, admin_site)
self.gpt_service = GPTService()

def get_urls(self):
urls = super().get_urls()
custom_urls = [
path('generate-summary/', self.generate_summary_view, name='generate-summary'),
]
return custom_urls + urls

async def _generate_summary(self, content: str) -> str:
system_prompt = (
"You are a professional summarizer at 2077 Research. Below is an article on Ethereum technical aspects. "
"Your goal is to produce a summary that is shorter than the original content, yet detailed enough for readers "
"to fully understand the piece without needing to read the original. Your summary should:\n"
"- Provide enough depth and detail so the user gets a complete understanding of the core ideas.\n"
"- Be in HTML format, use <h3> tags for headings if needed. Avoid other heading levels.\n"
"- Minimize the use of bullet points. If you need to list items, you can, but prefer concise paragraph formatting.\n\n"
)
return await self.gpt_service.prompt(system_prompt, content)
Comment on lines +64 to +73
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Move system prompt to settings or constants

The system prompt should be moved to a settings file or constants module for better maintainability and reusability.

+# In apps/research/constants.py
+GPT_SYSTEM_PROMPT = """
+You are a professional summarizer at 2077 Research. Below is an article on Ethereum technical aspects. 
+Your goal is to produce a summary that is shorter than the original content, yet detailed enough for readers 
+to fully understand the piece without needing to read the original. Your summary should:
+- Provide enough depth and detail so the user gets a complete understanding of the core ideas.
+- Be in HTML format, use <h3> tags for headings if needed. Avoid other heading levels.
+- Minimize the use of bullet points. If you need to list items, you can, but prefer concise paragraph formatting.
+"""

# In article_admin.py
+from ..constants import GPT_SYSTEM_PROMPT

     async def _generate_summary(self, content: str) -> str:
-        system_prompt = (
-            "You are a professional summarizer at 2077 Research. Below is an article on Ethereum technical aspects. "
-            "Your goal is to produce a summary that is shorter than the original content, yet detailed enough for readers "
-            "to fully understand the piece without needing to read the original. Your summary should:\n"
-            "- Provide enough depth and detail so the user gets a complete understanding of the core ideas.\n"
-            "- Be in HTML format, use <h3> tags for headings if needed. Avoid other heading levels.\n"
-            "- Minimize the use of bullet points. If you need to list items, you can, but prefer concise paragraph formatting.\n\n"
-        )
-        return await self.gpt_service.prompt(system_prompt, content)
+        return await self.gpt_service.prompt(GPT_SYSTEM_PROMPT, content)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def _generate_summary(self, content: str) -> str:
system_prompt = (
"You are a professional summarizer at 2077 Research. Below is an article on Ethereum technical aspects. "
"Your goal is to produce a summary that is shorter than the original content, yet detailed enough for readers "
"to fully understand the piece without needing to read the original. Your summary should:\n"
"- Provide enough depth and detail so the user gets a complete understanding of the core ideas.\n"
"- Be in HTML format, use <h3> tags for headings if needed. Avoid other heading levels.\n"
"- Minimize the use of bullet points. If you need to list items, you can, but prefer concise paragraph formatting.\n\n"
)
return await self.gpt_service.prompt(system_prompt, content)
async def _generate_summary(self, content: str) -> str:
return await self.gpt_service.prompt(GPT_SYSTEM_PROMPT, content)


def generate_summary_view(self, request):
if request.method == 'POST':
content = request.POST.get('content')
try:
gpt_summary = asyncio.run(self._generate_summary(content))
return JsonResponse({'summary': gpt_summary})
except Exception as e:
import logging
logging.error("An error occurred while generating the summary", exc_info=True)
return JsonResponse({'error': 'An internal error has occurred!'}, status=500)
return JsonResponse({'error': 'Invalid request method'}, status=400)

def current_slug_history(self, obj):
return current_slug_history(obj)
current_slug_history.short_description = 'Slug Change History'
Expand All @@ -36,7 +92,7 @@ def current_slug_history(self, obj):
('Article Details', {
'fields': [
'title', 'slug', 'authors', 'acknowledgement', 'categories',
'thumb', 'content', 'summary', 'status', 'scheduled_publish_time'
'thumb', 'content', 'summary', 'gpt_summary', 'status', 'scheduled_publish_time'
]
}),
('Related Content', {
Expand All @@ -60,6 +116,12 @@ def current_slug_history(self, obj):
readonly_fields = ('views', 'current_slug_history',)
list_editable = ('status',)

class Media:
css = {
'all': ('css/article_admin.css',)
}
js = ('js/article_admin.js',)

def display_authors(self, obj):
"""Return a comma-separated list of authors for the article."""
return ", ".join(author.user.username for author in obj.authors.all())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.0.8 on 2024-12-10 18:18

import tinymce.models
from django.db import migrations

class Migration(migrations.Migration):

dependencies = [
('research', '0016_article_related_articles'),
]

operations = [
migrations.AddField(
model_name='article',
name='gpt_summary',
field=tinymce.models.HTMLField(blank=True, null=True),
),
]
1 change: 1 addition & 0 deletions server/apps/research/models/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Article(BaseModel):
title = models.TextField()
content = HTMLField(blank=True, null=True)
summary = models.TextField(blank=True)
gpt_summary = models.TextField(blank=True, null=True)
acknowledgement = HTMLField(blank=True, null=True)
authors = models.ManyToManyField(Author, blank=True, related_name='articles')
slug = models.SlugField(max_length=255, blank=True, db_index=True)
Expand Down
7 changes: 4 additions & 3 deletions server/apps/research/serializers/article_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Meta:
model = Article
fields = [
'id', 'slug', 'title', 'authors', 'thumb', 'categories', 'summary',
'acknowledgement', 'content', 'min_read', 'status', 'views',
'acknowledgement', 'content', 'min_read', 'status', 'views', 'gpt_summary',
Copy link

@coderabbitai coderabbitai bot Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

The gpt_summary field exists in the Article model but with a different field type

The gpt_summary field exists in both the Article model and the migration file, but there's a discrepancy in the field type:

  • In the model: models.TextField(blank=True, null=True)
  • In the migration: tinymce.models.HTMLField(blank=True, null=True)

This inconsistency needs to be addressed to match the field types.

🔗 Analysis chain

Verify the Article model schema.

Ensure that the gpt_summary field exists in the Article model schema as referenced in both serializers.

Also applies to: 41-41


Let me gather more information about the gpt_summary field from the migration file and the complete Article model.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Check if gpt_summary field exists in Article model and its migration

# Check Article model
rg -A 5 "class Article" server/apps/research/models/

# Check migrations for gpt_summary field
rg "gpt_summary" server/apps/research/migrations/

Length of output: 1180


Script:

#!/bin/bash
# Check the complete Article model definition
cat server/apps/research/models/article.py

# Check the migration file that added gpt_summary
cat server/apps/research/migrations/0017_article_gpt_summary_alter_article_summary.py

Length of output: 6230

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There still remains an issue here to be resolved

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@losndu, to resolve the remaining issue, please ensure that the gpt_summary field in the Article model and the migration have the same field type. Currently, there is a mismatch:

  • In server/apps/research/models/article.py, gpt_summary is defined as:

    gpt_summary = models.TextField(blank=True, null=True)
  • In server/apps/research/migrations/0017_article_gpt_summary_alter_article_summary.py, it is defined as:

    field=models.HTMLField(blank=True, null=True),

To fix the inconsistency, you can update the model to use HTMLField. Here's how:

  1. Import HTMLField from tinymce.models in your model file:

    from tinymce.models import HTMLField
  2. Update the gpt_summary field in the Article model:

    gpt_summary = HTMLField(blank=True, null=True)

Alternatively, if you prefer to keep gpt_summary as a TextField, you should update the migration file to reflect TextField instead of HTMLField. However, it's generally recommended to have rich text fields like summaries use HTMLField for better content management.

After making the changes, don't forget to run:

python manage.py makemigrations
python manage.py migrate

This will create a new migration to reflect the changes and apply it to your database.

Let me know if you have any questions or need further assistance!

'created_at', 'updated_at', 'scheduled_publish_time', 'table_of_contents',
'is_sponsored', 'sponsor_color', 'sponsor_text_color', 'related_articles'
]
Expand All @@ -66,8 +66,9 @@ class Meta:
model = Article
fields = [
'title', 'slug', 'categories', 'thumb', 'content', 'summary',
'acknowledgement', 'status', 'authors', 'scheduled_publish_time',
'is_sponsored', 'sponsor_color', 'sponsor_text_color', 'related_articles'
'gpt_summary', 'acknowledgement', 'status', 'authors',
'scheduled_publish_time', 'is_sponsored', 'sponsor_color',
'sponsor_text_color', 'related_articles'
]

def validate_related_articles(self, value):
Expand Down
4 changes: 4 additions & 0 deletions server/apps/research/services/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Services package for the research app.
This package contains service classes that handle business logic and external API interactions.
"""
42 changes: 42 additions & 0 deletions server/apps/research/services/gpt_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from django.conf import settings
from openai import AsyncOpenAI

class GPTService:
"""Service for handling OpenAI GPT API interactions."""

def __init__(self):
self.client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
self.model = "gpt-3.5-turbo"
self.max_tokens = 500

async def prompt(self, system: str, user: str) -> str:
"""
Send a prompt to GPT and get the response.

Args:
system (str): The system message that sets the behavior of the assistant
user (str): The user's input/question

Returns:
str: The generated response from GPT

Raises:
Exception: If there's an error in the API call or if the API key is not set
"""
if not settings.OPENAI_API_KEY:
raise Exception("OpenAI API key is not configured")

try:
completion = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user}
],
max_tokens=self.max_tokens
)
# Access the response content directly from the completion object
return completion.choices[0].message.content
except Exception as e:
print(e)
raise Exception(f"Error calling OpenAI API: {str(e)}")
5 changes: 4 additions & 1 deletion server/core/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,7 @@
SILENCED_SYSTEM_CHECKS = ["security.W019"]

# Tinymce API Config
TINYMCE_API_KEY = config('TINYMCE_API_KEY')
TINYMCE_API_KEY = config('TINYMCE_API_KEY')

# OpenAI API Config
OPENAI_API_KEY = config('OPENAI_API_KEY', default=None)
8 changes: 8 additions & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
amqp==5.2.0
annotated-types==0.7.0
anyio==4.4.0
asgiref==3.8.1
attrs==24.1.0
Expand All @@ -15,6 +16,7 @@ click-plugins==1.1.1
click-repl==0.3.0
cron-descriptor==1.4.3
cryptography==43.0.0
distro==1.9.0
Django==5.0.8
django-admin-interface==0.28.8
django-celery-beat==2.6.0
Expand All @@ -33,12 +35,16 @@ httpcore==1.0.5
httpx==0.27.0
idna==3.7
inflection==0.5.1
jiter==0.8.2
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
kombu==5.4.0
openai==1.57.4
pillow==10.4.0
prompt_toolkit==3.0.47
pycparser==2.22
pydantic==2.10.3
pydantic_core==2.27.1
pyOpenSSL==24.2.1
python-crontab==3.2.0
python-dateutil==2.9.0.post0
Expand All @@ -55,6 +61,8 @@ sniffio==1.3.1
soupsieve==2.6
sqlparse==0.5.1
text-unidecode==1.3
tqdm==4.67.1
typing_extensions==4.12.2
tzdata==2024.1
uritemplate==4.1.1
urllib3==2.2.2
Expand Down
28 changes: 28 additions & 0 deletions server/static/css/article_admin.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
.generate-summary-btn {
background-color: #0C4B33;
color: white;
padding: 10px 15px;
border: none;
border-radius: 4px;
cursor: pointer;
font-weight: bold;
margin-left: 10px;
align-self: baseline;
}

.generate-summary-btn:disabled {
background-color: #cccccc;
cursor: not-allowed;
}

.summary-status {
margin-left: 10px;
font-style: italic;
color: #666;
}

.generate-summary-container-btn {
display: flex;
flex-direction: column;
gap: 10px;
}
Loading
Loading