Merge branch 'main' into production

bookwyrm-social · Jun 6, 2024 · 2170610 · 2170610
2 parents 09e5333 + 3545a1c
commit 2170610
Show file tree

Hide file tree

Showing 127 changed files with 2,804 additions and 895 deletions.
diff --git a/.env.example b/.env.example
@@ -16,6 +16,11 @@ DEFAULT_LANGUAGE="English"
 ## Leave unset to allow all hosts
 # ALLOWED_HOSTS="localhost,127.0.0.1,[::1]"
 
+# Specify when the site is served from a port that is not the default
+# for the protocol (80 for HTTP or 443 for HTTPS).
+# Probably only necessary in development.
+# PORT=1333
+
 MEDIA_ROOT=images/
 
 # Database configuration
@@ -71,14 +76,20 @@ ENABLE_THUMBNAIL_GENERATION=true
 USE_S3=false
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
+# seconds for signed S3 urls to expire
+# this is currently only used for user export files
+S3_SIGNED_URL_EXPIRY=900
 
 # Commented are example values if you use a non-AWS, S3-compatible service
 # AWS S3 should work with only AWS_STORAGE_BUCKET_NAME and AWS_S3_REGION_NAME
 # non-AWS S3-compatible services will need AWS_STORAGE_BUCKET_NAME,
-# along with both AWS_S3_CUSTOM_DOMAIN and AWS_S3_ENDPOINT_URL
+# along with both AWS_S3_CUSTOM_DOMAIN and AWS_S3_ENDPOINT_URL.
+# AWS_S3_URL_PROTOCOL must end in ":" and defaults to the same protocol as
+# the BookWyrm instance ("http:" or "https:", based on USE_SSL).
 
 # AWS_STORAGE_BUCKET_NAME=        # "example-bucket-name"
 # AWS_S3_CUSTOM_DOMAIN=None       # "example-bucket-name.s3.fr-par.scw.cloud"
+# AWS_S3_URL_PROTOCOL=None        # "http:"
 # AWS_S3_REGION_NAME=None         # "fr-par"
 # AWS_S3_ENDPOINT_URL=None        # "https://s3.fr-par.scw.cloud"
 
@@ -133,9 +144,9 @@ HTTP_X_FORWARDED_PROTO=false
 TWO_FACTOR_LOGIN_VALIDITY_WINDOW=2
 TWO_FACTOR_LOGIN_MAX_SECONDS=60
 
-# Additional hosts to allow in the Content-Security-Policy, "self" (should be DOMAIN)
-# and AWS_S3_CUSTOM_DOMAIN (if used) are added by default.
-# Value should be a comma-separated list of host names.
+# Additional hosts to allow in the Content-Security-Policy, "self" (should be
+# DOMAIN with optionally ":" + PORT) and AWS_S3_CUSTOM_DOMAIN (if used) are
+# added by default.  Value should be a comma-separated list of host names.
 CSP_ADDITIONAL_HOSTS=
 
 # Time before being logged out (in seconds)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,78 @@
+<!--
+Thanks for contributing!
+
+Please ensure the name of your PR is written in imperative present tense. For example:
+
+- "fix color contrast on submit buttons"
+- "add 'favourite food' value to Author model"
+
+To check (tick) a list item, replace the space between square brackets with an x, like this:
+
+- [x] I have checked the box
+
+You can find more information and tips for BookWyrm contributors at https://docs.joinbookwyrm.com/contributing.html
+-->
+
+## Are you finished?
+
+### Linters
+<!--
+Please run linters on your code before submitting your PR.
+If you miss this step it is likely that the GitHub task runners will fail.
+-->
+
+- [ ] I have checked my code with `black`, `pylint`, and `mypy`, or `./bw-dev formatters`
+
+### Tests
+<!-- Check one -->
+
+- [ ] My changes do not need new tests
+- [ ] All tests I have added are passing
+- [ ] I have written tests but need help to make them pass
+- [ ] I have not written tests and need help to write them
+
+## What type of Pull Request is this?
+<!-- Check all that apply -->
+
+- [ ] Bug Fix
+- [ ] Enhancement
+- [ ] Plumbing / Internals / Dependencies
+- [ ] Refactor
+
+## Does this PR change settings or dependencies, or break something?
+<!-- Check all that apply -->
+
+- [ ] This PR changes or adds default settings, configuration, or .env values
+- [ ] This PR changes or adds dependencies
+- [ ] This PR introduces other breaking changes
+
+### Details of breaking or configuration changes (if any of above checked)
+
+## Description
+
+<!--
+Describe what your pull request does here.
+
+For pull requests that relate or close an issue, please include them
+below.  We like to follow [Github's guidance on linking issues to pull requests](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue).
+
+For example having the text: "closes #1234" would connect the current pull
+request to issue 1234.  And when we merge the pull request, Github will
+automatically close the issue.
+-->
+
+- Related Issue #
+- Closes #
+
+## Documentation
+<!--
+Documentation for users, admins, and developers is an important way to keep the BookWyrm community welcoming and make Bookwyrm easy to use.
+Our documentation is maintained in a separate repository at https://github.com/bookwyrm-social/documentation
+-->
+
+<!-- Check all that apply -->
+
+- [ ] New or amended documentation will be required if this PR is merged
+- [ ] I have created a matching pull request in the Documentation repository
+- [ ] I intend to create a matching pull request in the Documentation repository after this PR is merged
+
diff --git a/.github/release.yml b/.github/release.yml
@@ -0,0 +1,26 @@
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: ‼️ Breaking Changes & New Settings ⚙️
+      labels:
+        - breaking-change
+        - config-change
+    - title: Updated Dependencies 🧸
+      labels:
+        - dependencies
+    - title: New Features 🎉
+      labels:
+        - enhancement
+    - title: Bug Fixes 🐛
+      labels:
+        - fix
+        - bug
+    - title: Internals/Plumbing 👩‍🔧
+        - plumbing
+        - tests
+        - deployment
+    - title: Other Changes
+      labels:
+        - "*"
diff --git a/.github/workflows/lint-frontend.yaml b/.github/workflows/lint-frontend.yaml
@@ -22,7 +22,8 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install modules
-        run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint
+      #  run: npm install stylelint stylelint-config-recommended stylelint-config-standard stylelint-order eslint
+        run: npm install eslint@^8.9.0
 
       # See .stylelintignore for files that are not linted.
       # - name: Run stylelint

diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@
 # BookWyrm
 .env
 /images/
+/exports/
 /static/
 bookwyrm/static/css/bookwyrm.css
 bookwyrm/static/css/themes/
@@ -37,3 +38,6 @@ nginx/default.conf
 
 #macOS
 **/.DS_Store
+
+# Docker
+docker-compose.override.yml
diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py
@@ -250,7 +250,10 @@ def serialize(self, **kwargs):
                 pass
         data = {k: v for (k, v) in data.items() if v is not None and k not in omit}
         if "@context" not in omit:
-            data["@context"] = "https://www.w3.org/ns/activitystreams"
+            data["@context"] = [
+                "https://www.w3.org/ns/activitystreams",
+                {"Hashtag": "as:Hashtag"},
+            ]
         return data
 
 
@@ -400,11 +403,11 @@ def get_representative():
     to sign outgoing HTTP GET requests"""
     return models.User.objects.get_or_create(
         username=f"{INSTANCE_ACTOR_USERNAME}@{DOMAIN}",
-        defaults=dict(
-            email="bookwyrm@localhost",
-            local=True,
-            localname=INSTANCE_ACTOR_USERNAME,
-        ),
+        defaults={
+            "email": "bookwyrm@localhost",
+            "local": True,
+            "localname": INSTANCE_ACTOR_USERNAME,
+        },
     )[0]
 
 

diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py
@@ -118,20 +118,22 @@ def get_connectors() -> Iterator[abstract_connector.AbstractConnector]:
 def get_or_create_connector(remote_id: str) -> abstract_connector.AbstractConnector:
     """get the connector related to the object's server"""
     url = urlparse(remote_id)
-    identifier = url.netloc
+    identifier = url.hostname
     if not identifier:
-        raise ValueError("Invalid remote id")
+        raise ValueError(f"Invalid remote id: {remote_id}")
+
+    base_url = f"{url.scheme}://{url.netloc}"
 
     try:
         connector_info = models.Connector.objects.get(identifier=identifier)
     except models.Connector.DoesNotExist:
         connector_info = models.Connector.objects.create(
             identifier=identifier,
             connector_file="bookwyrm_connector",
-            base_url=f"https://{identifier}",
-            books_url=f"https://{identifier}/book",
-            covers_url=f"https://{identifier}/images/covers",
-            search_url=f"https://{identifier}/search?q=",
+            base_url=base_url,
+            books_url=f"{base_url}/book",
+            covers_url=f"{base_url}/images/covers",
+            search_url=f"{base_url}/search?q=",
             priority=2,
         )
 
@@ -143,7 +145,9 @@ def load_more_data(connector_id: str, book_id: str) -> None:
     """background the work of getting all 10,000 editions of LoTR"""
     connector_info = models.Connector.objects.get(id=connector_id)
     connector = load_connector(connector_info)
-    book = models.Book.objects.select_subclasses().get(id=book_id)
+    book = models.Book.objects.select_subclasses().get(  # type: ignore[no-untyped-call]
+        id=book_id
+    )
     connector.expand_book_data(book)
 
 
@@ -154,7 +158,9 @@ def create_edition_task(
     """separate task for each of the 10,000 editions of LoTR"""
     connector_info = models.Connector.objects.get(id=connector_id)
     connector = load_connector(connector_info)
-    work = models.Work.objects.select_subclasses().get(id=work_id)
+    work = models.Work.objects.select_subclasses().get(  # type: ignore[no-untyped-call]
+        id=work_id
+    )
     connector.create_edition_from_data(work, data)
 
 
@@ -188,8 +194,11 @@ def raise_not_valid_url(url: str) -> None:
     if not parsed.scheme in ["http", "https"]:
         raise ConnectorException("Invalid scheme: ", url)
 
+    if not parsed.hostname:
+        raise ConnectorException("Hostname missing: ", url)
+
     try:
-        ipaddress.ip_address(parsed.netloc)
+        ipaddress.ip_address(parsed.hostname)
         raise ConnectorException("Provided url is an IP address: ", url)
     except ValueError:
         # it's not an IP address, which is good

diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py
@@ -229,7 +229,7 @@ def get_description(self, links: JsonDict) -> str:
             data = get_data(url)
         except ConnectorException:
             return ""
-        return data.get("extract", "")
+        return str(data.get("extract", ""))
 
     def get_remote_id_from_model(self, obj: models.BookDataModel) -> str:
         """use get_remote_id to figure out the link from a model obj"""

diff --git a/bookwyrm/emailing.py b/bookwyrm/emailing.py
@@ -4,7 +4,7 @@
 
 from bookwyrm import models, settings
 from bookwyrm.tasks import app, EMAIL
-from bookwyrm.settings import DOMAIN
+from bookwyrm.settings import DOMAIN, BASE_URL
 
 
 def email_data():
@@ -14,6 +14,7 @@ def email_data():
         "site_name": site.name,
         "logo": site.logo_small_url,
         "domain": DOMAIN,
+        "base_url": BASE_URL,
         "user": None,
     }
 

diff --git a/bookwyrm/forms/links.py b/bookwyrm/forms/links.py
@@ -26,7 +26,7 @@ def clean(self):
         url = cleaned_data.get("url")
         filetype = cleaned_data.get("filetype")
         book = cleaned_data.get("book")
-        domain = urlparse(url).netloc
+        domain = urlparse(url).hostname
         if models.LinkDomain.objects.filter(domain=domain).exists():
             status = models.LinkDomain.objects.get(domain=domain).status
             if status == "blocked":

diff --git a/bookwyrm/importers/calibre_import.py b/bookwyrm/importers/calibre_import.py
@@ -14,15 +14,10 @@ class CalibreImporter(Importer):
     def __init__(self, *args: Any, **kwargs: Any):
         # Add timestamp to row_mappings_guesses for date_added to avoid
         # integrity error
-        row_mappings_guesses = []
-
-        for field, mapping in self.row_mappings_guesses:
-            if field in ("date_added",):
-                row_mappings_guesses.append((field, mapping + ["timestamp"]))
-            else:
-                row_mappings_guesses.append((field, mapping))
-
-        self.row_mappings_guesses = row_mappings_guesses
+        self.row_mappings_guesses = [
+            (field, mapping + (["timestamp"] if field == "date_added" else []))
+            for field, mapping in self.row_mappings_guesses
+        ]
         super().__init__(*args, **kwargs)
 
     def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:

diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py
@@ -1,13 +1,14 @@
 """ PROCEED WITH CAUTION: uses deduplication fields to permanently
 merge book data objects """
+
 from django.core.management.base import BaseCommand
 from django.db.models import Count
 from bookwyrm import models
-from bookwyrm.management.merge import merge_objects
 
 
-def dedupe_model(model):
+def dedupe_model(model, dry_run=False):
     """combine duplicate editions and update related models"""
+    print(f"deduplicating {model.__name__}:")
     fields = model._meta.get_fields()
     dedupe_fields = [
         f for f in fields if hasattr(f, "deduplication_field") and f.deduplication_field
@@ -16,30 +17,42 @@ def dedupe_model(model):
         dupes = (
             model.objects.values(field.name)
             .annotate(Count(field.name))
-            .filter(**{"%s__count__gt" % field.name: 1})
+            .filter(**{f"{field.name}__count__gt": 1})
+            .exclude(**{field.name: ""})
+            .exclude(**{f"{field.name}__isnull": True})
         )
 
         for dupe in dupes:
             value = dupe[field.name]
-            if not value or value == "":
-                continue
             print("----------")
-            print(dupe)
             objs = model.objects.filter(**{field.name: value}).order_by("id")
             canonical = objs.first()
-            print("keeping", canonical.remote_id)
+            action = "would merge" if dry_run else "merging"
+            print(
+                f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
+            )
             for obj in objs[1:]:
-                print(obj.remote_id)
-                merge_objects(canonical, obj)
+                print(f"- {obj.remote_id}")
+                absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
+                print(f"  absorbed fields: {absorbed_fields}")
 
 
 class Command(BaseCommand):
     """deduplicate allllll the book data models"""
 
     help = "merges duplicate book data"
+
+    def add_arguments(self, parser):
+        """add the arguments for this command"""
+        parser.add_argument(
+            "--dry_run",
+            action="store_true",
+            help="don't actually merge, only print what would happen",
+        )
+
     # pylint: disable=no-self-use,unused-argument
     def handle(self, *args, **options):
         """run deduplications"""
-        dedupe_model(models.Edition)
-        dedupe_model(models.Work)
-        dedupe_model(models.Author)
+        dedupe_model(models.Edition, dry_run=options["dry_run"])
+        dedupe_model(models.Work, dry_run=options["dry_run"])
+        dedupe_model(models.Author, dry_run=options["dry_run"])