diff --git a/environmental_justice/models.py b/environmental_justice/models.py index e4e6b515..936d698f 100644 --- a/environmental_justice/models.py +++ b/environmental_justice/models.py @@ -25,6 +25,8 @@ class EnvironmentalJusticeRow(models.Model): temporal_extent = models.CharField("Temporal Extent") temporal_resolution = models.CharField("Temporal Resolution") + sde_links = models.CharField("SDE Links") + class Meta: verbose_name = "Environmental Justice Row" verbose_name_plural = "Environmental Justice Rows" diff --git a/sde_collections/migrations/0040_collection_url_count_prod_and_more.py b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py new file mode 100644 index 00000000..c106f522 --- /dev/null +++ b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py @@ -0,0 +1,32 @@ +# Generated by Django 4.2.6 on 2023-12-01 16:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("sde_collections", "0039_includepattern"), + ] + + operations = [ + migrations.AddField( + model_name="collection", + name="url_count_prod", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_secret_prod", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_secret_test", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_test", + field=models.IntegerField(default=0), + ), + ] diff --git a/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py new file mode 100644 index 00000000..719b04ac --- /dev/null +++ b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.6 on 2023-12-01 17:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("sde_collections", "0040_collection_url_count_prod_and_more"), + ] + + operations = [ + migrations.RenameField( + model_name="collection", + old_name="url_count_prod", + new_name="url_count_production", + ), + migrations.RenameField( + model_name="collection", + old_name="url_count_secret_prod", + new_name="url_count_secret_production", + ), + ] diff --git a/sde_collections/migrations/0042_merge_20240219_1422.py b/sde_collections/migrations/0042_merge_20240219_1422.py new file mode 100644 index 00000000..fc60aeb7 --- /dev/null +++ b/sde_collections/migrations/0042_merge_20240219_1422.py @@ -0,0 +1,16 @@ +# Generated by Django 5.0.1 on 2024-02-19 20:22 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0041_alter_candidateurl_hash"), + ( + "sde_collections", + "0041_rename_url_count_prod_collection_url_count_production_and_more", + ), + ] + + operations = [] diff --git a/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py b/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py new file mode 100644 index 00000000..9a8d81af --- /dev/null +++ b/sde_collections/migrations/0043_rename_url_count_secret_production_collection_url_count_dev_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.1 on 2024-02-19 20:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0042_merge_20240219_1422"), + ] + + operations = [ + migrations.RenameField( + model_name="collection", + old_name="url_count_secret_production", + new_name="url_count_dev", + ), + migrations.RemoveField( + model_name="collection", + name="url_count_secret_test", + ), + ] diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index f930f5cb..4f276f3b 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -8,6 +8,7 @@ from config_generation.db_to_xml import XmlEditor +from ..sinequa_api import server_configs from ..utils.github_helper import GitHubHandler from .collection_choice_fields import ( ConnectorChoices, @@ -19,6 +20,9 @@ WorkflowStatusChoices, ) +# from ..tasks import import_candidate_urls_counts_from_api + + User = get_user_model() @@ -94,6 +98,10 @@ class Collection(models.Model): ) curation_started = models.DateTimeField("Curation Started", null=True, blank=True) + url_count_dev = models.IntegerField(default=0) + url_count_test = models.IntegerField(default=0) + url_count_production = models.IntegerField(default=0) + class Meta: """Meta definition for Collection.""" @@ -154,6 +162,101 @@ def workflow_status_button_color(self) -> str: } return color_choices[self.workflow_status] + def get_server_url(self, server_name) -> str: + base_url = server_configs[server_name]["base_url"] + app_name = server_configs[server_name]["app_name"] + query_name = server_configs[server_name]["query_name"] + + payload = { + "name": query_name, + "scope": "All", + "text": "", + "advanced": { + "collection": f"/SMD/{self.config_folder}/", + }, + } + encoded_payload = urllib.parse.quote(json.dumps(payload)) + return f"{base_url}/app/{app_name}/#/search?query={encoded_payload}" + + # def fetch_url_counts(self): + # for collection in Collection.objects.all(): + # if not ( + # collection.url_count_test == 0 + # and collection.url_count_secret_test == 0 + # and collection.url_count_production == 0 + # and collection.url_count_secret_production == 0 + # ): + # continue + # collection_ids = [collection.id] + # server_names = [ + # # "dev", + # "test", + # "production", + # ] + # for server_name in server_names: + # count = import_candidate_urls_counts_from_api(server_name, collection_ids) + # setattr(collection, f"url_count_{server_name}", count) + # collection.save() + + # def fetch_url_counts(self): + # """Fetch the URL counts from the production webapp.""" + # ENVIRONMENTS = { + # "test": { + # "url": "https://sciencediscoveryengine.test.nasa.gov", + # "query": "query-smd-primary", + # "app": "nasa-sba-smd", + # "variable": "url_count_test", + # "folder": "SMD", + # }, + # "secret_test": { + # "url": "https://sciencediscoveryengine.test.nasa.gov", + # "query": "query-sde-primary", + # "app": "nasa-sba-sde", + # "variable": "url_count_secret_test", + # "folder": "SDE", + # }, + # "prod": { + # "url": "https://sciencediscoveryengine.nasa.gov", + # "query": "query-smd-primary", + # "app": "nasa-sba-smd", + # "variable": "url_count_prod", + # "folder": "SMD", + # }, + # "secret_prod": { + # "url": "https://sciencediscoveryengine.nasa.gov", + # "query": "query-sde-primary", + # "app": "nasa-sba-sde", + # "variable": "url_count_secret_prod", + # "folder": "SDE", + # }, + # } + + # totals = [] + + # for environment_name, environment_config in ENVIRONMENTS.items(): + # count = import_candidate_urls_counts_from_api( + # server_name=environment_name, collection_ids=[self.id] + # ) + + # # setattr(self, environment_config["variable"], response_json["total"]) + # totals.append(count) + # # self.save() + # return totals + + # from sde_collections.tasks import import_candidate_urls_counts_from_api + # for collection in Collection.objects.all(): + # collection_ids = [collection.id] + # server_names = [ + # "test", + # "secret_test", + # "production", + # "secret_production", + # ] + # for server_name in server_names: + # count = import_candidate_urls_counts_from_api(server_name, collection_ids) + # setattr(collection, f"url_count_{server_name}", count) + # collection.save() + def _process_exclude_list(self): """Process the exclude list.""" return [ @@ -423,3 +526,30 @@ class RequiredUrls(models.Model): def __str__(self) -> str: return self.url + + +class Server(models.Model): + name = models.CharField(max_length=255) + base_url = models.CharField(max_length=255) + app_name = models.CharField(max_length=255) + query_name = models.CharField(max_length=255) + username = models.CharField(max_length=255, blank=True, default="") + password = models.CharField(max_length=255, blank=True, default="") + + def __str__(self) -> str: + return self.name + + +class IndexingMetadata(models.Model): + server = models.ForeignKey("Server", on_delete=models.CASCADE) + collection = models.ForeignKey("Collection", on_delete=models.CASCADE) + last_indexed = models.DateTimeField(null=True, blank=True) + last_indexed_count = models.IntegerField(null=True, blank=True) + + def __str__(self) -> str: + return f"{self.collection.name} - {self.server.name}" + + def update_last_indexed(self, last_indexed, last_indexed_count): + self.last_indexed = last_indexed + self.last_indexed_count = last_indexed_count + self.save() diff --git a/sde_collections/sinequa_api.py b/sde_collections/sinequa_api.py index 483e21fe..8354853f 100644 --- a/sde_collections/sinequa_api.py +++ b/sde_collections/sinequa_api.py @@ -9,32 +9,22 @@ "dev": { "app_name": "nasa-sba-smd", "query_name": "query-smd-primary", - "base_url": "http://sde-renaissance.nasa-impact.net", + "base_url": "http://sde-xli.nasa-impact.net", }, - "test": { + "lis_server": { "app_name": "nasa-sba-smd", "query_name": "query-smd-primary", - "base_url": "https://sciencediscoveryengine.test.nasa.gov", + "base_url": "http://sde-xli.nasa-impact.net", }, - "production": { + "test": { "app_name": "nasa-sba-smd", "query_name": "query-smd-primary", - "base_url": "https://sciencediscoveryengine.nasa.gov", - }, - "secret_test": { - "app_name": "nasa-sba-sde", - "query_name": "query-sde-primary", "base_url": "https://sciencediscoveryengine.test.nasa.gov", }, - "secret_production": { - "app_name": "nasa-sba-sde", - "query_name": "query-sde-primary", - "base_url": "https://sciencediscoveryengine.nasa.gov", - }, - "lis_server": { + "production": { "app_name": "nasa-sba-smd", "query_name": "query-smd-primary", - "base_url": "http://sde-xli.nasa-impact.net", + "base_url": "https://sciencediscoveryengine.nasa.gov", }, } @@ -45,6 +35,7 @@ def __init__(self, server_name: str) -> None: self.app_name: str = server_configs[server_name]["app_name"] self.query_name: str = server_configs[server_name]["query_name"] self.base_url: str = server_configs[server_name]["base_url"] + self.folder: str = "SDE" if self.app_name == "nasa-sba-sde" else "SMD" def process_response(self, url: str, payload: dict[str, Any]) -> Any: response = requests.post(url, headers={}, json=payload, verify=False) diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py index 761d92a1..73434b15 100644 --- a/sde_collections/tasks.py +++ b/sde_collections/tasks.py @@ -13,6 +13,10 @@ from .sinequa_api import Api from .utils.github_helper import GitHubHandler +# from django.apps import apps + +# Collection = apps.get_model("Collection") + def _get_data_to_import(collection, server_name): # ignore these because they are API collections and don't have URLs @@ -24,6 +28,13 @@ def _get_data_to_import(collection, server_name): "/SMD/CASEI_Platform/", "/SMD/CMR_API/", "/SMD/PDS_API_Legacy_All/", + "/SDE/ASTRO_NAVO_HEASARC/", + "/SDE/CASEI_Campaign/", + "/SDE/CASEI_Deployment/", + "/SDE/CASEI_Instrument/", + "/SDE/CASEI_Platform/", + "/SDE/CMR_API/", + "/SDE/PDS_API_Legacy_All/", ] data_to_import = [] @@ -97,6 +108,38 @@ def import_candidate_urls_from_api(server_name="test", collection_ids=[]): shutil.rmtree(TEMP_FOLDER_NAME) +@celery_app.task(soft_time_limit=10000) +def import_candidate_urls_counts_from_api(server_name, collection_ids=[]): + collections = Collection.objects.filter(id__in=collection_ids) + + for collection in collections: + data_to_import = _get_data_to_import( + server_name=server_name, collection=collection + ) + return len(data_to_import) + + +@celery_app.task(soft_time_limit=10000) +def import_candidate_urls_counts_from_api_all_collections_all_servers(): + for collection in Collection.objects.all(): + if not ( + collection.url_count_dev == 0 + and collection.url_count_test == 0 + and collection.url_count_production == 0 + ): + continue + collection_ids = [collection.id] + server_names = [ + # "dev", + "test", + "production", + ] + for server_name in server_names: + count = import_candidate_urls_counts_from_api(server_name, collection_ids) + setattr(collection, f"url_count_{server_name}", count) + collection.save() + + @celery_app.task() def push_to_github_task(collection_ids): collections = Collection.objects.filter(id__in=collection_ids) diff --git a/sde_collections/templatetags/__init__.py b/sde_collections/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sde_collections/templatetags/get_server_url.py b/sde_collections/templatetags/get_server_url.py new file mode 100644 index 00000000..edc3c763 --- /dev/null +++ b/sde_collections/templatetags/get_server_url.py @@ -0,0 +1,8 @@ +from django import template + +register = template.Library() + + +@register.simple_tag +def get_server_url(collection, server_name): + return collection.get_server_url(server_name) diff --git a/sde_collections/urls.py b/sde_collections/urls.py index 3953517e..cf09a28e 100644 --- a/sde_collections/urls.py +++ b/sde_collections/urls.py @@ -40,6 +40,11 @@ view=views.WebappGitHubConsolidationView.as_view(), name="consolidate_db_and_github_configs", ), + path( + "url-counts/", + view=views.URLCountView.as_view(), + name="consolidate_db_and_github_configs", + ), # List all CandidateURL instances: /candidate-urls/ # Retrieve a specific CandidateURL instance: /candidate-urls/{id}/ # Create a new CandidateURL instance: /candidate-urls/ diff --git a/sde_collections/views.py b/sde_collections/views.py index 6d65d61e..f2b20c9c 100644 --- a/sde_collections/views.py +++ b/sde_collections/views.py @@ -101,8 +101,12 @@ def post(self, request, *args, **kwargs): collection.github_issue_number = github_issue_number collection.save() else: - github_form.add_error("github_issue_link", "Invalid GitHub issue link format") - return self.render_to_response(self.get_context_data(form=form, github_form=github_form)) + github_form.add_error( + "github_issue_link", "Invalid GitHub issue link format" + ) + return self.render_to_response( + self.get_context_data(form=form, github_form=github_form) + ) return redirect("sde_collections:detail", pk=collection.pk) else: @@ -128,7 +132,9 @@ def get_context_data(self, **kwargs): context["github_form"] = CollectionGithubIssueForm( initial={"github_issue_link": self.get_object().github_issue_link} ) - context["required_urls"] = RequiredUrls.objects.filter(collection=self.get_object()) + context["required_urls"] = RequiredUrls.objects.filter( + collection=self.get_object() + ) context["segment"] = "collection-detail" return context @@ -137,7 +143,9 @@ class RequiredUrlsDeleteView(LoginRequiredMixin, DeleteView): model = RequiredUrls def get_success_url(self, *args, **kwargs): - return reverse("sde_collections:detail", kwargs={"pk": self.object.collection.pk}) + return reverse( + "sde_collections:detail", kwargs={"pk": self.object.collection.pk} + ) class CandidateURLsListView(LoginRequiredMixin, ListView): @@ -341,7 +349,9 @@ class PushToGithubView(APIView): def post(self, request): collection_ids = request.POST.getlist("collection_ids[]", []) if len(collection_ids) == 0: - return Response("collection_ids can't be empty.", status=status.HTTP_400_BAD_REQUEST) + return Response( + "collection_ids can't be empty.", status=status.HTTP_400_BAD_REQUEST + ) push_to_github_task.delay(collection_ids) @@ -363,7 +373,9 @@ def get(self, request, *args, **kwargs): self.data = generate_db_github_metadata_differences() else: # this needs to be a celery task eventually - self.data = generate_db_github_metadata_differences(reindex_configs_from_github=True) + self.data = generate_db_github_metadata_differences( + reindex_configs_from_github=True + ) return super().get(request, *args, **kwargs) @@ -381,8 +393,12 @@ def post(self, request, *args, **kwargs): elif field == "connector": new_value = ConnectorChoices.lookup_by_text(new_value) - Collection.objects.filter(config_folder=config_folder).update(**{field: new_value}) - messages.success(request, f"Successfully updated {field} of {config_folder}.") + Collection.objects.filter(config_folder=config_folder).update( + **{field: new_value} + ) + messages.success( + request, f"Successfully updated {field} of {config_folder}." + ) else: messages.error( request, @@ -396,3 +412,58 @@ def get_context_data(self, **kwargs): context["differences"] = self.data return context + + +class URLCountView(LoginRequiredMixin, ListView): + """ + Show the count of URLs on various systems + """ + + template_name = "sde_collections/url_counts_by_environment.html" + model = Collection + context_object_name = "collections" + + # def get(self, request, *args, **kwargs): + # if not request.GET.get("reindex") == "true": + # self.data = generate_db_github_metadata_differences() + # else: + # # this needs to be a celery task eventually + # self.data = generate_db_github_metadata_differences( + # reindex_configs_from_github=True + # ) + + # return super().get(request, *args, **kwargs) + + # def post(self, request, *args, **kwargs): + # config_folder = self.request.POST.get("config_folder") + # field = self.request.POST.get("field") + # new_value = self.request.POST.get("github_value") + + # if new_value and new_value != "None": + # new_value = new_value.strip() + # if field == "division": + # new_value = Divisions.lookup_by_text(new_value) + # elif field == "document_type": + # new_value = DocumentTypes.lookup_by_text(new_value) + # elif field == "connector": + # new_value = ConnectorChoices.lookup_by_text(new_value) + + # Collection.objects.filter(config_folder=config_folder).update( + # **{field: new_value} + # ) + # messages.success( + # request, f"Successfully updated {field} of {config_folder}." + # ) + # else: + # messages.error( + # request, + # f"Can't update empty value from GitHub: {field} of {config_folder}.", + # ) + + # return redirect("sde_collections:consolidate_db_and_github_configs") + + # def get_context_data(self, **kwargs): + # context = super().get_context_data(**kwargs) + # context["differences"] = self.data + + # return context diff --git a/sde_indexing_helper/static/js/url_counts.js b/sde_indexing_helper/static/js/url_counts.js new file mode 100644 index 00000000..e4b15b90 --- /dev/null +++ b/sde_indexing_helper/static/js/url_counts.js @@ -0,0 +1,99 @@ +$(document).ready(function () { + let table = $('#url_counts_table').DataTable({ + initComplete: function (settings, json) { + // calculate the sum when table is first created: + doSum(); + }, + "paging": false, + "stateSave": true, + "fixedHeader": true, + }); + + $('#url_counts_table').on('draw.dt', function () { + // re-calculate the sum whenever the table is re-displayed: + doSum(); + }); + + // This provides the sum of all records: + function doSum() { + // get the DataTables API object: + var table = $('#url_counts_table').DataTable(); + // set up the initial (unsummed) data array for the footer row: + var totals = ['Totals', '', 0, 0, 0, 0]; + // iterate all rows - use table.rows( {search: 'applied'} ).data() + // if you want to sum only filtered (visible) rows: + totals = table.rows().data() + // sum the amounts: + .reduce(function (sum, record) { + for (let i = 2; i <= 8; i++) { + sum[i] = sum[i] + numberFromString(record[i]); + } + return sum; + }, totals); + // place the sum in the relevant footer cell: + for (let i = 1; i <= 8; i++) { + var column = table.column(i); + $(column.footer()).html(formatNumber(totals[i])); + } + } + + function numberFromString(s) { + // Check if the input is a string + if (typeof s === 'string') { + // Create a temporary div element + var tempDiv = document.createElement('div'); + // Set the inner HTML of the div to the input string + tempDiv.innerHTML = s; + // Extract the text content from the div + var text = tempDiv.textContent || tempDiv.innerText || ""; + + // Remove any non-numeric characters (except for the decimal point) + return text.replace(/[^\d.-]/g, '') * 1; + } else if (typeof s === 'number') { + // If it's already a number, return it as is + return s; + } else { + // If the input is neither a string nor a number, return 0 + return 0; + } + } + + function formatNumber(n) { + return n.toLocaleString(); // or whatever you prefer here + } + +}); + +// let table = $('#url_counts_table').DataTable({ +// "paging": false, +// "stateSave": true, +// "fixedHeader": true, +// initComplete: function (settings, json) { +// // calculate the sum when table is first created: +// doSum(); +// } + +// $('#url_counts_table').on('draw.dt', function () { +// // re-calculate the sum whenever the table is re-displayed: +// doSum(); +// }) + + +// // "footerCallback": function (row, data, start, end, display) { +// // var api = this.api(); + +// // // Calculate the total for the first column in the displayed data +// // var total = api +// // .column(2, { page: 'current' }) +// // .data() +// // .reduce(function (a, b) { +// // return a + b; +// // }, 0); + +// // // Update the footer +// // $(api.column(2).footer()).html(total); +// // $(api.column(3).footer()).html(total); +// // $(api.column(4).footer()).html(total); +// // $(api.column(5).footer()).html(total); +// // } +// }); diff --git a/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html new file mode 100644 index 00000000..2a095435 --- /dev/null +++ b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html @@ -0,0 +1,60 @@ +{% extends "layouts/base.html" %} +{% load static %} +{% load i18n %} +{% load get_server_url %} +{% block title %}URL counts by environment{% endblock %} +{% block stylesheets %} + {{ block.super }} + +{% endblock stylesheets %} +{% block content %} + {% csrf_token %} +
+

URL counts by environment

+ + + + + + + + + + + + + {% for collection in collections %} + + + + + + + + {% endfor %} + + + + + + + + + + +
FolderNameDevTestProduction
{{ collection.config_folder }}{{ collection.name }}{{ collection.url_count_dev }}{{ collection.url_count_test }}{{ collection.url_count_production }}
+{% endblock content %} +{% block javascripts %} + + + + + +{% endblock javascripts %}