diff --git a/environmental_justice/models.py b/environmental_justice/models.py
index e4e6b515..936d698f 100644
--- a/environmental_justice/models.py
+++ b/environmental_justice/models.py
@@ -25,6 +25,8 @@ class EnvironmentalJusticeRow(models.Model):
temporal_extent = models.CharField("Temporal Extent")
temporal_resolution = models.CharField("Temporal Resolution")
+ sde_links = models.CharField("SDE Links")
+
class Meta:
verbose_name = "Environmental Justice Row"
verbose_name_plural = "Environmental Justice Rows"
diff --git a/sde_collections/migrations/0040_collection_url_count_prod_and_more.py b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py
new file mode 100644
index 00000000..c106f522
--- /dev/null
+++ b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py
@@ -0,0 +1,32 @@
+# Generated by Django 4.2.6 on 2023-12-01 16:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("sde_collections", "0039_includepattern"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="collection",
+ name="url_count_prod",
+ field=models.IntegerField(default=0),
+ ),
+ migrations.AddField(
+ model_name="collection",
+ name="url_count_secret_prod",
+ field=models.IntegerField(default=0),
+ ),
+ migrations.AddField(
+ model_name="collection",
+ name="url_count_secret_test",
+ field=models.IntegerField(default=0),
+ ),
+ migrations.AddField(
+ model_name="collection",
+ name="url_count_test",
+ field=models.IntegerField(default=0),
+ ),
+ ]
diff --git a/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py
new file mode 100644
index 00000000..719b04ac
--- /dev/null
+++ b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py
@@ -0,0 +1,22 @@
+# Generated by Django 4.2.6 on 2023-12-01 17:40
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("sde_collections", "0040_collection_url_count_prod_and_more"),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name="collection",
+ old_name="url_count_prod",
+ new_name="url_count_production",
+ ),
+ migrations.RenameField(
+ model_name="collection",
+ old_name="url_count_secret_prod",
+ new_name="url_count_secret_production",
+ ),
+ ]
diff --git a/sde_collections/migrations/0042_merge_20240219_1422.py b/sde_collections/migrations/0042_merge_20240219_1422.py
new file mode 100644
index 00000000..fc60aeb7
--- /dev/null
+++ b/sde_collections/migrations/0042_merge_20240219_1422.py
@@ -0,0 +1,16 @@
+# Generated by Django 5.0.1 on 2024-02-19 20:22
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("sde_collections", "0041_alter_candidateurl_hash"),
+ (
+ "sde_collections",
+ "0041_rename_url_count_prod_collection_url_count_production_and_more",
+ ),
+ ]
+
+ operations = []
diff --git a/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py b/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py
new file mode 100644
index 00000000..9a8d81af
--- /dev/null
+++ b/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py
@@ -0,0 +1,22 @@
+# Generated by Django 5.0.1 on 2024-02-19 20:26
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("sde_collections", "0042_merge_20240219_1422"),
+ ]
+
+ operations = [
+ migrations.RenameField(
+ model_name="collection",
+ old_name="url_count_secret_production",
+ new_name="url_count_dev",
+ ),
+ migrations.RemoveField(
+ model_name="collection",
+ name="url_count_secret_test",
+ ),
+ ]
diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py
index f930f5cb..4f276f3b 100644
--- a/sde_collections/models/collection.py
+++ b/sde_collections/models/collection.py
@@ -8,6 +8,7 @@
from config_generation.db_to_xml import XmlEditor
+from ..sinequa_api import server_configs
from ..utils.github_helper import GitHubHandler
from .collection_choice_fields import (
ConnectorChoices,
@@ -19,6 +20,9 @@
WorkflowStatusChoices,
)
+# from ..tasks import import_candidate_urls_counts_from_api
+
+
User = get_user_model()
@@ -94,6 +98,10 @@ class Collection(models.Model):
)
curation_started = models.DateTimeField("Curation Started", null=True, blank=True)
+ url_count_dev = models.IntegerField(default=0)
+ url_count_test = models.IntegerField(default=0)
+ url_count_production = models.IntegerField(default=0)
+
class Meta:
"""Meta definition for Collection."""
@@ -154,6 +162,101 @@ def workflow_status_button_color(self) -> str:
}
return color_choices[self.workflow_status]
+ def get_server_url(self, server_name) -> str:
+ base_url = server_configs[server_name]["base_url"]
+ app_name = server_configs[server_name]["app_name"]
+ query_name = server_configs[server_name]["query_name"]
+
+ payload = {
+ "name": query_name,
+ "scope": "All",
+ "text": "",
+ "advanced": {
+ "collection": f"/SMD/{self.config_folder}/",
+ },
+ }
+ encoded_payload = urllib.parse.quote(json.dumps(payload))
+ return f"{base_url}/app/{app_name}/#/search?query={encoded_payload}"
+
+ # def fetch_url_counts(self):
+ # for collection in Collection.objects.all():
+ # if not (
+ # collection.url_count_test == 0
+ # and collection.url_count_secret_test == 0
+ # and collection.url_count_production == 0
+ # and collection.url_count_secret_production == 0
+ # ):
+ # continue
+ # collection_ids = [collection.id]
+ # server_names = [
+ # # "dev",
+ # "test",
+ # "production",
+ # ]
+ # for server_name in server_names:
+ # count = import_candidate_urls_counts_from_api(server_name, collection_ids)
+ # setattr(collection, f"url_count_{server_name}", count)
+ # collection.save()
+
+ # def fetch_url_counts(self):
+ # """Fetch the URL counts from the production webapp."""
+ # ENVIRONMENTS = {
+ # "test": {
+ # "url": "https://sciencediscoveryengine.test.nasa.gov",
+ # "query": "query-smd-primary",
+ # "app": "nasa-sba-smd",
+ # "variable": "url_count_test",
+ # "folder": "SMD",
+ # },
+ # "secret_test": {
+ # "url": "https://sciencediscoveryengine.test.nasa.gov",
+ # "query": "query-sde-primary",
+ # "app": "nasa-sba-sde",
+ # "variable": "url_count_secret_test",
+ # "folder": "SDE",
+ # },
+ # "prod": {
+ # "url": "https://sciencediscoveryengine.nasa.gov",
+ # "query": "query-smd-primary",
+ # "app": "nasa-sba-smd",
+ # "variable": "url_count_prod",
+ # "folder": "SMD",
+ # },
+ # "secret_prod": {
+ # "url": "https://sciencediscoveryengine.nasa.gov",
+ # "query": "query-sde-primary",
+ # "app": "nasa-sba-sde",
+ # "variable": "url_count_secret_prod",
+ # "folder": "SDE",
+ # },
+ # }
+
+ # totals = []
+
+ # for environment_name, environment_config in ENVIRONMENTS.items():
+ # count = import_candidate_urls_counts_from_api(
+ # server_name=environment_name, collection_ids=[self.id]
+ # )
+
+ # # setattr(self, environment_config["variable"], response_json["total"])
+ # totals.append(count)
+ # # self.save()
+ # return totals
+
+ # from sde_collections.tasks import import_candidate_urls_counts_from_api
+ # for collection in Collection.objects.all():
+ # collection_ids = [collection.id]
+ # server_names = [
+ # "test",
+ # "secret_test",
+ # "production",
+ # "secret_production",
+ # ]
+ # for server_name in server_names:
+ # count = import_candidate_urls_counts_from_api(server_name, collection_ids)
+ # setattr(collection, f"url_count_{server_name}", count)
+ # collection.save()
+
def _process_exclude_list(self):
"""Process the exclude list."""
return [
@@ -423,3 +526,30 @@ class RequiredUrls(models.Model):
def __str__(self) -> str:
return self.url
+
+
+class Server(models.Model):
+ name = models.CharField(max_length=255)
+ base_url = models.CharField(max_length=255)
+ app_name = models.CharField(max_length=255)
+ query_name = models.CharField(max_length=255)
+ username = models.CharField(max_length=255, blank=True, default="")
+ password = models.CharField(max_length=255, blank=True, default="")
+
+ def __str__(self) -> str:
+ return self.name
+
+
+class IndexingMetadata(models.Model):
+ server = models.ForeignKey("Server", on_delete=models.CASCADE)
+ collection = models.ForeignKey("Collection", on_delete=models.CASCADE)
+ last_indexed = models.DateTimeField(null=True, blank=True)
+ last_indexed_count = models.IntegerField(null=True, blank=True)
+
+ def __str__(self) -> str:
+ return f"{self.collection.name} - {self.server.name}"
+
+ def update_last_indexed(self, last_indexed, last_indexed_count):
+ self.last_indexed = last_indexed
+ self.last_indexed_count = last_indexed_count
+ self.save()
diff --git a/sde_collections/sinequa_api.py b/sde_collections/sinequa_api.py
index 483e21fe..8354853f 100644
--- a/sde_collections/sinequa_api.py
+++ b/sde_collections/sinequa_api.py
@@ -9,32 +9,22 @@
"dev": {
"app_name": "nasa-sba-smd",
"query_name": "query-smd-primary",
- "base_url": "http://sde-renaissance.nasa-impact.net",
+ "base_url": "http://sde-xli.nasa-impact.net",
},
- "test": {
+ "lis_server": {
"app_name": "nasa-sba-smd",
"query_name": "query-smd-primary",
- "base_url": "https://sciencediscoveryengine.test.nasa.gov",
+ "base_url": "http://sde-xli.nasa-impact.net",
},
- "production": {
+ "test": {
"app_name": "nasa-sba-smd",
"query_name": "query-smd-primary",
- "base_url": "https://sciencediscoveryengine.nasa.gov",
- },
- "secret_test": {
- "app_name": "nasa-sba-sde",
- "query_name": "query-sde-primary",
"base_url": "https://sciencediscoveryengine.test.nasa.gov",
},
- "secret_production": {
- "app_name": "nasa-sba-sde",
- "query_name": "query-sde-primary",
- "base_url": "https://sciencediscoveryengine.nasa.gov",
- },
- "lis_server": {
+ "production": {
"app_name": "nasa-sba-smd",
"query_name": "query-smd-primary",
- "base_url": "http://sde-xli.nasa-impact.net",
+ "base_url": "https://sciencediscoveryengine.nasa.gov",
},
}
@@ -45,6 +35,7 @@ def __init__(self, server_name: str) -> None:
self.app_name: str = server_configs[server_name]["app_name"]
self.query_name: str = server_configs[server_name]["query_name"]
self.base_url: str = server_configs[server_name]["base_url"]
+ self.folder: str = "SDE" if self.app_name == "nasa-sba-sde" else "SMD"
def process_response(self, url: str, payload: dict[str, Any]) -> Any:
response = requests.post(url, headers={}, json=payload, verify=False)
diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py
index 761d92a1..73434b15 100644
--- a/sde_collections/tasks.py
+++ b/sde_collections/tasks.py
@@ -13,6 +13,10 @@
from .sinequa_api import Api
from .utils.github_helper import GitHubHandler
+# from django.apps import apps
+
+# Collection = apps.get_model("Collection")
+
def _get_data_to_import(collection, server_name):
# ignore these because they are API collections and don't have URLs
@@ -24,6 +28,13 @@ def _get_data_to_import(collection, server_name):
"/SMD/CASEI_Platform/",
"/SMD/CMR_API/",
"/SMD/PDS_API_Legacy_All/",
+ "/SDE/ASTRO_NAVO_HEASARC/",
+ "/SDE/CASEI_Campaign/",
+ "/SDE/CASEI_Deployment/",
+ "/SDE/CASEI_Instrument/",
+ "/SDE/CASEI_Platform/",
+ "/SDE/CMR_API/",
+ "/SDE/PDS_API_Legacy_All/",
]
data_to_import = []
@@ -97,6 +108,38 @@ def import_candidate_urls_from_api(server_name="test", collection_ids=[]):
shutil.rmtree(TEMP_FOLDER_NAME)
+@celery_app.task(soft_time_limit=10000)
+def import_candidate_urls_counts_from_api(server_name, collection_ids=[]):
+ collections = Collection.objects.filter(id__in=collection_ids)
+
+ for collection in collections:
+ data_to_import = _get_data_to_import(
+ server_name=server_name, collection=collection
+ )
+ return len(data_to_import)
+
+
+@celery_app.task(soft_time_limit=10000)
+def import_candidate_urls_counts_from_api_all_collections_all_servers():
+ for collection in Collection.objects.all():
+ if not (
+ collection.url_count_dev == 0
+ and collection.url_count_test == 0
+ and collection.url_count_production == 0
+ ):
+ continue
+ collection_ids = [collection.id]
+ server_names = [
+ # "dev",
+ "test",
+ "production",
+ ]
+ for server_name in server_names:
+ count = import_candidate_urls_counts_from_api(server_name, collection_ids)
+ setattr(collection, f"url_count_{server_name}", count)
+ collection.save()
+
+
@celery_app.task()
def push_to_github_task(collection_ids):
collections = Collection.objects.filter(id__in=collection_ids)
diff --git a/sde_collections/templatetags/__init__.py b/sde_collections/templatetags/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sde_collections/templatetags/get_server_url.py b/sde_collections/templatetags/get_server_url.py
new file mode 100644
index 00000000..edc3c763
--- /dev/null
+++ b/sde_collections/templatetags/get_server_url.py
@@ -0,0 +1,8 @@
+from django import template
+
+register = template.Library()
+
+
+@register.simple_tag
+def get_server_url(collection, server_name):
+ return collection.get_server_url(server_name)
diff --git a/sde_collections/urls.py b/sde_collections/urls.py
index 3953517e..cf09a28e 100644
--- a/sde_collections/urls.py
+++ b/sde_collections/urls.py
@@ -40,6 +40,11 @@
view=views.WebappGitHubConsolidationView.as_view(),
name="consolidate_db_and_github_configs",
),
+ path(
+ "url-counts/",
+ view=views.URLCountView.as_view(),
+ name="consolidate_db_and_github_configs",
+ ),
# List all CandidateURL instances: /candidate-urls/
# Retrieve a specific CandidateURL instance: /candidate-urls/{id}/
# Create a new CandidateURL instance: /candidate-urls/
diff --git a/sde_collections/views.py b/sde_collections/views.py
index 6d65d61e..f2b20c9c 100644
--- a/sde_collections/views.py
+++ b/sde_collections/views.py
@@ -101,8 +101,12 @@ def post(self, request, *args, **kwargs):
collection.github_issue_number = github_issue_number
collection.save()
else:
- github_form.add_error("github_issue_link", "Invalid GitHub issue link format")
- return self.render_to_response(self.get_context_data(form=form, github_form=github_form))
+ github_form.add_error(
+ "github_issue_link", "Invalid GitHub issue link format"
+ )
+ return self.render_to_response(
+ self.get_context_data(form=form, github_form=github_form)
+ )
return redirect("sde_collections:detail", pk=collection.pk)
else:
@@ -128,7 +132,9 @@ def get_context_data(self, **kwargs):
context["github_form"] = CollectionGithubIssueForm(
initial={"github_issue_link": self.get_object().github_issue_link}
)
- context["required_urls"] = RequiredUrls.objects.filter(collection=self.get_object())
+ context["required_urls"] = RequiredUrls.objects.filter(
+ collection=self.get_object()
+ )
context["segment"] = "collection-detail"
return context
@@ -137,7 +143,9 @@ class RequiredUrlsDeleteView(LoginRequiredMixin, DeleteView):
model = RequiredUrls
def get_success_url(self, *args, **kwargs):
- return reverse("sde_collections:detail", kwargs={"pk": self.object.collection.pk})
+ return reverse(
+ "sde_collections:detail", kwargs={"pk": self.object.collection.pk}
+ )
class CandidateURLsListView(LoginRequiredMixin, ListView):
@@ -341,7 +349,9 @@ class PushToGithubView(APIView):
def post(self, request):
collection_ids = request.POST.getlist("collection_ids[]", [])
if len(collection_ids) == 0:
- return Response("collection_ids can't be empty.", status=status.HTTP_400_BAD_REQUEST)
+ return Response(
+ "collection_ids can't be empty.", status=status.HTTP_400_BAD_REQUEST
+ )
push_to_github_task.delay(collection_ids)
@@ -363,7 +373,9 @@ def get(self, request, *args, **kwargs):
self.data = generate_db_github_metadata_differences()
else:
# this needs to be a celery task eventually
- self.data = generate_db_github_metadata_differences(reindex_configs_from_github=True)
+ self.data = generate_db_github_metadata_differences(
+ reindex_configs_from_github=True
+ )
return super().get(request, *args, **kwargs)
@@ -381,8 +393,12 @@ def post(self, request, *args, **kwargs):
elif field == "connector":
new_value = ConnectorChoices.lookup_by_text(new_value)
- Collection.objects.filter(config_folder=config_folder).update(**{field: new_value})
- messages.success(request, f"Successfully updated {field} of {config_folder}.")
+ Collection.objects.filter(config_folder=config_folder).update(
+ **{field: new_value}
+ )
+ messages.success(
+ request, f"Successfully updated {field} of {config_folder}."
+ )
else:
messages.error(
request,
@@ -396,3 +412,58 @@ def get_context_data(self, **kwargs):
context["differences"] = self.data
return context
+
+
+class URLCountView(LoginRequiredMixin, ListView):
+ """
+ Show the count of URLs on various systems
+ """
+
+ template_name = "sde_collections/url_counts_by_environment.html"
+ model = Collection
+ context_object_name = "collections"
+
+ # def get(self, request, *args, **kwargs):
+ # if not request.GET.get("reindex") == "true":
+ # self.data = generate_db_github_metadata_differences()
+ # else:
+ # # this needs to be a celery task eventually
+ # self.data = generate_db_github_metadata_differences(
+ # reindex_configs_from_github=True
+ # )
+
+ # return super().get(request, *args, **kwargs)
+
+ # def post(self, request, *args, **kwargs):
+ # config_folder = self.request.POST.get("config_folder")
+ # field = self.request.POST.get("field")
+ # new_value = self.request.POST.get("github_value")
+
+ # if new_value and new_value != "None":
+ # new_value = new_value.strip()
+ # if field == "division":
+ # new_value = Divisions.lookup_by_text(new_value)
+ # elif field == "document_type":
+ # new_value = DocumentTypes.lookup_by_text(new_value)
+ # elif field == "connector":
+ # new_value = ConnectorChoices.lookup_by_text(new_value)
+
+ # Collection.objects.filter(config_folder=config_folder).update(
+ # **{field: new_value}
+ # )
+ # messages.success(
+ # request, f"Successfully updated {field} of {config_folder}."
+ # )
+ # else:
+ # messages.error(
+ # request,
+ # f"Can't update empty value from GitHub: {field} of {config_folder}.",
+ # )
+
+ # return redirect("sde_collections:consolidate_db_and_github_configs")
+
+ # def get_context_data(self, **kwargs):
+ # context = super().get_context_data(**kwargs)
+ # context["differences"] = self.data
+
+ # return context
diff --git a/sde_indexing_helper/static/js/url_counts.js b/sde_indexing_helper/static/js/url_counts.js
new file mode 100644
index 00000000..e4b15b90
--- /dev/null
+++ b/sde_indexing_helper/static/js/url_counts.js
@@ -0,0 +1,99 @@
+$(document).ready(function () {
+ let table = $('#url_counts_table').DataTable({
+ initComplete: function (settings, json) {
+ // calculate the sum when table is first created:
+ doSum();
+ },
+ "paging": false,
+ "stateSave": true,
+ "fixedHeader": true,
+ });
+
+ $('#url_counts_table').on('draw.dt', function () {
+ // re-calculate the sum whenever the table is re-displayed:
+ doSum();
+ });
+
+ // This provides the sum of all records:
+ function doSum() {
+ // get the DataTables API object:
+ var table = $('#url_counts_table').DataTable();
+ // set up the initial (unsummed) data array for the footer row:
+ var totals = ['Totals', '', 0, 0, 0, 0];
+ // iterate all rows - use table.rows( {search: 'applied'} ).data()
+ // if you want to sum only filtered (visible) rows:
+ totals = table.rows().data()
+ // sum the amounts:
+ .reduce(function (sum, record) {
+ for (let i = 2; i <= 8; i++) {
+ sum[i] = sum[i] + numberFromString(record[i]);
+ }
+ return sum;
+ }, totals);
+ // place the sum in the relevant footer cell:
+ for (let i = 1; i <= 8; i++) {
+ var column = table.column(i);
+ $(column.footer()).html(formatNumber(totals[i]));
+ }
+ }
+
+ function numberFromString(s) {
+ // Check if the input is a string
+ if (typeof s === 'string') {
+ // Create a temporary div element
+ var tempDiv = document.createElement('div');
+ // Set the inner HTML of the div to the input string
+ tempDiv.innerHTML = s;
+ // Extract the text content from the div
+ var text = tempDiv.textContent || tempDiv.innerText || "";
+
+ // Remove any non-numeric characters (except for the decimal point)
+ return text.replace(/[^\d.-]/g, '') * 1;
+ } else if (typeof s === 'number') {
+ // If it's already a number, return it as is
+ return s;
+ } else {
+ // If the input is neither a string nor a number, return 0
+ return 0;
+ }
+ }
+
+ function formatNumber(n) {
+ return n.toLocaleString(); // or whatever you prefer here
+ }
+
+});
+
+// let table = $('#url_counts_table').DataTable({
+// "paging": false,
+// "stateSave": true,
+// "fixedHeader": true,
+// initComplete: function (settings, json) {
+// // calculate the sum when table is first created:
+// doSum();
+// }
+
+// $('#url_counts_table').on('draw.dt', function () {
+// // re-calculate the sum whenever the table is re-displayed:
+// doSum();
+// })
+
+
+// // "footerCallback": function (row, data, start, end, display) {
+// // var api = this.api();
+
+// // // Calculate the total for the first column in the displayed data
+// // var total = api
+// // .column(2, { page: 'current' })
+// // .data()
+// // .reduce(function (a, b) {
+// // return a + b;
+// // }, 0);
+
+// // // Update the footer
+// // $(api.column(2).footer()).html(total);
+// // $(api.column(3).footer()).html(total);
+// // $(api.column(4).footer()).html(total);
+// // $(api.column(5).footer()).html(total);
+// // }
+// });
diff --git a/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html
new file mode 100644
index 00000000..2a095435
--- /dev/null
+++ b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html
@@ -0,0 +1,60 @@
+{% extends "layouts/base.html" %}
+{% load static %}
+{% load i18n %}
+{% load get_server_url %}
+{% block title %}URL counts by environment{% endblock %}
+{% block stylesheets %}
+ {{ block.super }}
+
+{% endblock stylesheets %}
+{% block content %}
+ {% csrf_token %}
+
+
Folder | +Name | +Dev | +Test | +Production | +
---|---|---|---|---|
{{ collection.config_folder }} | +{{ collection.name }} | +{{ collection.url_count_dev }} | +{{ collection.url_count_test }} | +{{ collection.url_count_production }} | +
+ | + | + | + | + |