diff --git a/api/caching/tasks.py b/api/caching/tasks.py
index 0b7a4b6670f..3163fcf180e 100644
--- a/api/caching/tasks.py
+++ b/api/caching/tasks.py
@@ -1,11 +1,12 @@
+import logging
from urllib.parse import urlparse
+
+from django.apps import apps
+from django.contrib.contenttypes.models import ContentType
from django.db import connection
from django.db.models import Sum
-
import requests
-import logging
-from django.apps import apps
from api.caching.utils import storage_usage_cache
from framework.postcommit_tasks.handlers import enqueue_postcommit_task
@@ -114,32 +115,54 @@ def ban_url(instance):
def update_storage_usage_cache(target_id, target_guid, per_page=500000):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return
- sql = """
- SELECT count(size), sum(size) from
- (SELECT size FROM osf_basefileversionsthrough AS obfnv
- LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
- LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
- LEFT JOIN django_content_type type on file.target_content_type_id = type.id
+ from osf.models import Guid
+ storage_usage_total = compute_storage_usage_total(Guid.load(target_guid))
+ key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
+ storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+
+
+def compute_storage_usage_total(target_obj, per_page=500000):
+ sql = """SELECT count(file_page.size), sum(file_page.size) from (
+ SELECT version.size AS size
+ FROM osf_basefileversionsthrough AS obfnv
+ LEFT JOIN osf_basefilenode AS file ON obfnv.basefilenode_id = file.id
+ LEFT JOIN osf_fileversion AS version ON obfnv.fileversion_id = version.id
WHERE file.provider = 'osfstorage'
- AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
- AND file.target_object_id=%s
+ AND file.target_object_id=%(target_id)s
+ AND file.target_content_type_id = %(target_content_type_id)s
ORDER BY version.id
- LIMIT %s OFFSET %s) file_page
+ LIMIT %(per_page)s OFFSET %(offset)s
+ ) file_page
"""
- count = per_page
+ last_count = 1 # initialize non-zero
offset = 0
storage_usage_total = 0
with connection.cursor() as cursor:
- while count:
- cursor.execute(sql, [target_id, per_page, offset])
- result = cursor.fetchall()
- storage_usage_total += int(result[0][1]) if result[0][1] else 0
- count = int(result[0][0]) if result[0][0] else 0
- offset += count
-
- key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
- storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+ while last_count:
+ cursor.execute(sql, {
+ 'target_id': target_obj.pk,
+ 'target_content_type_id': ContentType.objects.get_for_model(target_obj).pk,
+ 'per_page': per_page,
+ 'offset': offset,
+ })
+ page_count, size_sum = cursor.fetchall()[0]
+ storage_usage_total += int(size_sum or 0)
+ last_count = (page_count or 0)
+ offset += last_count
+ return storage_usage_total
+
+
+def get_storage_usage_total(target_obj):
+ _storage_usage_total = None
+ if settings.ENABLE_STORAGE_USAGE_CACHE:
+ _cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
+ _storage_usage_total = storage_usage_cache.get(_cache_key)
+ if _storage_usage_total is None:
+ _storage_usage_total = compute_storage_usage_total(target_obj)
+ if settings.ENABLE_STORAGE_USAGE_CACHE:
+ storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+ return _storage_usage_total
def update_storage_usage(target):
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
index 97723e2e235..2456b412c68 100644
--- a/osf/metadata/osf_gathering.py
+++ b/osf/metadata/osf_gathering.py
@@ -8,6 +8,7 @@
from django import db
import rdflib
+from api.caching.tasks import get_storage_usage_total
from osf import models as osfdb
from osf.metadata import gather
from osf.metadata.rdfutils import (
@@ -213,19 +214,24 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.hasOsfAddon: None,
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.ProjectComponent: {
OSF.hasOsfAddon: None,
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Registration: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.RegistrationComponent: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Preprint: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.File: {
@@ -1149,3 +1155,13 @@ def gather_storage_region(focus):
_region_ref = rdflib.URIRef(_region.absolute_api_v2_url)
yield (OSF.storageRegion, _region_ref)
yield (_region_ref, SKOS.prefLabel, rdflib.Literal(_region.name, lang='en'))
+
+
+@gather.er(
+ OSF.storageByteCount,
+ focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
+)
+def gather_storage_byte_count(focus):
+ _storage_usage_total = get_storage_usage_total(focus.dbmodel)
+ if _storage_usage_total is not None:
+ yield (OSF.storageByteCount, _storage_usage_total)
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
index c30205f27c9..dd941d6092c 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
@@ -1,7 +1,8 @@
@prefix osf: .
@prefix skos: .
- osf:storageRegion .
+ osf:storageByteCount 1337 ;
+ osf:storageRegion .
a osf:Region ;
skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/project_supplement.turtle b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
index 41341cca00f..53ece58a049 100644
--- a/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
@@ -3,6 +3,7 @@
@prefix skos: .
osf:hasOsfAddon ;
+ osf:storageByteCount 7 ;
osf:storageRegion .
a osf:AddonImplementation ;
diff --git a/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
index ac9d0d08673..bc3d320771d 100644
--- a/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
@@ -1,7 +1,8 @@
@prefix osf: .
@prefix skos: .
- osf:storageRegion .
+ osf:storageByteCount 17 ;
+ osf:storageRegion .
a osf:Region ;
skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py
index 790be8679db..afc73c17947 100644
--- a/osf_tests/metadata/test_osf_gathering.py
+++ b/osf_tests/metadata/test_osf_gathering.py
@@ -821,3 +821,14 @@ def test_gather_storage_region(self):
(self.preprintfocus.iri, OSF.storageRegion, _default_region_ref),
(_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
})
+
+ def test_gather_storage_byte_count(self):
+ assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
+ (self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
+ })
+ assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
+ (self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
+ })
+ assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
+ (self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
+ })