From a157a20d6eac5fb435893ff45edab60c7b16db18 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 25 Jul 2021 18:37:08 -0400 Subject: [PATCH 01/42] First pass on LoD Reconciling Not perfect yet. Still some tiny issues missing and some re-reconciling/correcting UI needed. Should be ready tonight? Left on this pull still a few DPM()s for my own enjoyment. --- ami.links.task.yml | 6 + ami.routing.yml | 8 + ami.services.yml | 5 + composer.json | 2 +- src/AmiLoDBatchQueue.php | 145 ++++++ src/AmiLoDService.php | 336 ++++++++++++ src/AmiUtilityService.php | 30 +- src/Entity/amiSetEntity.php | 2 + src/Form/AmiMultiStepIngest.php | 8 +- src/Form/AmiMultiStepIngestBaseForm.php | 1 - src/Form/amiSetAdminOverview.php | 205 -------- src/Form/amiSetEntityForm.php | 1 - src/Form/amiSetEntityProcessForm.php | 2 +- src/Form/amiSetEntityReconcileForm.php | 493 ++++++++++++++++++ src/Plugin/ImporterAdapter/SolrImporter.php | 2 +- .../QueueWorker/IngestADOQueueWorker.php | 4 +- src/Plugin/QueueWorker/LoDQueueWorker.php | 177 +++++++ 17 files changed, 1198 insertions(+), 229 deletions(-) create mode 100644 src/AmiLoDBatchQueue.php create mode 100644 src/AmiLoDService.php delete mode 100644 src/Form/amiSetAdminOverview.php create mode 100644 src/Form/amiSetEntityReconcileForm.php create mode 100644 src/Plugin/QueueWorker/LoDQueueWorker.php diff --git a/ami.links.task.yml b/ami.links.task.yml index 10faba2..8d50a3f 100644 --- a/ami.links.task.yml +++ b/ami.links.task.yml @@ -38,3 +38,9 @@ ami_set_entity.delete_process_form: base_route: entity.ami_set_entity.canonical title: Delete Processed ADOs weight: 12 + +ami_set_entity.reconcile_form: + route_name: entity.ami_set_entity.reconcile_form + base_route: entity.ami_set_entity.canonical + title: Reconcile LoD + weight: 13 diff --git a/ami.routing.yml b/ami.routing.yml index d6b001c..36d5104 100644 --- a/ami.routing.yml +++ b/ami.routing.yml @@ -86,3 +86,11 @@ entity.ami_set_entity.delete_process_form: _title: 'Process Ami Set' requirements: _entity_access: 'ami_set_entity.deleteados' + +entity.ami_set_entity.reconcile_form: + path: '/amiset/{ami_set_entity}/reconcile' + defaults: + _entity_form: ami_set_entity.reconcile + _title: 'Reconcile LoD' + requirements: + _entity_access: 'ami_set_entity.process' \ No newline at end of file diff --git a/ami.services.yml b/ami.services.yml index 0666163..9594bc3 100644 --- a/ami.services.yml +++ b/ami.services.yml @@ -8,3 +8,8 @@ services: arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client'] tags: - { name: backend_overridable } + ami.lod: + class: Drupal\ami\AmiLoDService + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@http_client', '@ami.utility' ] + tags: + - { name: backend_overridable } diff --git a/composer.json b/composer.json index 64e9459..1d0171a 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,7 @@ "swaggest/json-diff": "^3.7.5", "phpoffice/phpspreadsheet": "^1.15.0", "maennchen/zipstream-php": "^1.2 || ^2.1", - "drupal/google_api_client": "^3.0", + "drupal/google_api_client": "^3.0 || ^4.0", "ramsey/uuid": "^4.1" }, "minimum-stability": "dev", diff --git a/src/AmiLoDBatchQueue.php b/src/AmiLoDBatchQueue.php new file mode 100644 index 0000000..1bff41d --- /dev/null +++ b/src/AmiLoDBatchQueue.php @@ -0,0 +1,145 @@ +get($queue_name)->createQueue(); + // The actual queue worker is the one from the general AMI ingest Queue + // That way this "per set" queue does not appear in any queue_ui listings + // not can be processed out of the context of a UI facing batch. + $queue_worker = $queue_manager->createInstance('ami_lod_ado'); + $queue = $queue_factory->get($queue_name); + + $num_of_items = $queue->numberOfItems(); + if (!array_key_exists('max', $context['sandbox']) + || $context['sandbox']['max'] < $num_of_items + ) { + $context['sandbox']['max'] = $num_of_items; + } + + $context['finished'] = 0; + $context['results']['queue_name'] = $queue_name; + $context['results']['queue_label'] = 'AMI Set '. ($set_id ?? '') .' LoD reconciling'; + + try { + // Only process Items of this Set if $context['set_id'] is set. + if ($item = $queue->claimItem()) { + $label = 'Reconciling '.$item->data->info['label']; + $title = t('For %name processing %label, %count items remaining', [ + '%name' => $context['results']['queue_label'], + '%label' => $label, + '%count' => $num_of_items, + ]); + $context['message'] = $title; + + // Process and delete queue item + $queue_worker->processItem($item->data); + $queue->deleteItem($item); + + $num_of_items = $queue->numberOfItems(); + + // Update context + $context['results']['processed'][] = $item->item_id; + $context['finished'] = ($context['sandbox']['max'] - $num_of_items) / $context['sandbox']['max']; + } + else { + // Done processing if can not claim. + $context['finished'] = 1; + } + } catch (RequeueException $e) { + if (isset($item)) { + $queue->releaseItem($item); + } + } catch (SuspendQueueException $e) { + if (isset($item)) { + $queue->releaseItem($item); + } + + watchdog_exception('ami', $e); + $context['results']['errors'][] = $e->getMessage(); + + // Marking the batch job as finished will stop further processing. + $context['finished'] = 1; + } catch (\Exception $e) { + // In case of any other kind of exception, log it and leave the item + // in the queue to be processed again later. + watchdog_exception('ami', $e); + $context['results']['errors'][] = $e->getMessage(); + } + } + + /** + * Callback when finishing a batch job. + * + * @param $success + * @param $results + * @param $operations + */ + public static function finish($success, $results, $operations) { + // Display success of no results. + if (!empty($results['processed'])) { + \Drupal::messenger()->addMessage( + \Drupal::translation()->formatPlural( + count($results['processed']), + '%queue: One item processed.', + '%queue: @count items processed.', + ['%queue' => $results['queue_label']] + ) + ); + } + elseif (!isset($results['processed'])) { + \Drupal::messenger()->addMessage(\Drupal::translation() + ->translate("Items were not processed. Try to release existing items or add new items to the queues."), + 'warning' + ); + } + + if (!empty($results['errors'])) { + \Drupal::messenger()->addError( + \Drupal::translation()->formatPlural( + count($results['errors']), + 'Queue %queue error: @errors', + 'Queue %queue errors: ', + [ + '%queue' => $results['queue_label'], + '@errors' => Markup::create(implode('
  • ', $results['errors'])), + ] + ) + ); + } + // Cleanup and remove the queue. This is a live batch operation. + /** @var \Drupal\Core\Queue\QueueFactory $queue_factory */ + $queue_name = $results['queue_name']; + $queue_factory = \Drupal::service('queue'); + $queue_factory->get($queue_name)->deleteQueue(); + } + +} + diff --git a/src/AmiLoDService.php b/src/AmiLoDService.php new file mode 100644 index 0000000..200b6e7 --- /dev/null +++ b/src/AmiLoDService.php @@ -0,0 +1,336 @@ +fileSystem = $file_system; + $this->fileUsage = $file_usage; + $this->entityTypeManager = $entity_type_manager; + $this->streamWrapperManager = $stream_wrapper_manager; + $this->archiverManager = $archiver_manager; + //@TODO evaluate creating a ServiceFactory instead of reading this on construct. + $this->destinationScheme = $config_factory->get( + 'strawberryfield.storage_settings' + )->get('file_scheme'); + $this->config = $config_factory->get( + 'strawberryfield.filepersister_service_settings' + ); + $this->languageManager = $language_manager; + $this->transliteration = $transliteration; + $this->moduleHandler = $module_handler; + $this->loggerFactory = $logger_factory; + $this->strawberryfieldUtility = $strawberryfield_utility_service; + $this->currentUser = $current_user; + $this->httpClient = $http_client; + $this->AmiUtilityService = $ami_utility; + + } + + + public function invokeLoDRoute(string $domain, string $query, string $auth_type, $vocab = 'subjects', $rdftype = 'thing', $lang = 'en' , $count = 5):array { + + $current_laguage = $lang ?? \Drupal::languageManager() + ->getCurrentLanguage() + ->getId(); + + switch ($auth_type) { + case 'nominatim': + $controller_url = Url::fromRoute( + 'webform_strawberryfield.nominatim', + ['api_type' => 'search', 'count' => $count, 'lang' => $current_laguage]); + break; + default: + $controller_url = Url::fromRoute( + 'webform_strawberryfield.auth_autocomplete', + ['auth_type' => $auth_type, 'vocab' => $vocab, 'rdftype' => $rdftype, 'count' => $count] + ); + } + // When using this on localhost:8001/Docker the cookie domain won't match with the called one. + // That is expected and webform_strawberryfield will use instead the X-CSRF-TOKEN. + if ($domain == 'http://localhost:8001') { + $domain = $_SERVER['REQUEST_SCHEME'].'://'.$_SERVER['SERVER_ADDR'].':'.$_SERVER['SERVER_PORT']; + } + $cookieJar = CookieJar::fromArray($_COOKIE, $domain); + + + $controller_path = $controller_url->setAbsolute()->toString(TRUE)->getGeneratedUrl(); + error_log($controller_url->setAbsolute(FALSE)->toString(TRUE)->getGeneratedUrl()); + $csrf_token = \Drupal::csrfToken()->get($controller_url->setAbsolute(FALSE)->toString(TRUE)->getGeneratedUrl()); + $options = [ + 'headers' => [ + 'Content-Type' => 'application/json', + 'X-CSRF-Token' => $csrf_token, + ], + 'cookies' => $cookieJar, + ]; + // When o docker and running a local instance the server domain is localhost:8001 (normally in our ensemble) + // But localhost does not resolve internally to the right IP. + // @TODO make this configurable since we can also use esmero-web, but that won't work for multisites + // OR SSL certs. So better this way. We could also check if IP actually matches localhost? (127.0.0.1 or 0.0.0.0) + if (substr($controller_path, 0, 21 ) === "http://localhost:8001") { + $controller_path = str_replace("http://localhost:8001", $_SERVER['REQUEST_SCHEME'].'://'.$_SERVER['SERVER_ADDR'].':'.$_SERVER['SERVER_PORT'], $controller_path); + } + + $options = array_merge_recursive(['query' => ['_format' => 'json', 'q' => $query]], $options); + $response = $this->httpClient->request('GET', $controller_path, $options); + $sucessfull = $response->getStatusCode() >= 200 && $response->getStatusCode() < 300; + $response_encoded = $sucessfull ? json_decode($response->getBody()->getContents()) : []; + return $response_encoded; + } + + /** + * From a given CSV files returns different values for a list of columns + * + * @param \Drupal\file\Entity\File $file + * @param array $columns + * + * @return array + * An Associative Array keyed by Column name + */ + public function provideLoDColumnValues(File $file, array $columns):array { + $data = $this->AmiUtilityService->csv_read($file); + $column_keys = $data['headers'] ?? []; + $alldifferent = []; + foreach ($columns as $column) { + $column_index = array_search($column, $column_keys); + error_log($column_index); + if ($column_index !== FALSE) { + $alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data, + $column_index); + } + } + error_log(var_export($alldifferent, true)); + return $alldifferent; + } + + /** + * For a given Numeric Column index, get different/non json, split values + * + * @param array $data + * @param int $key + * + * @return array + */ + public function getDifferentValuesfromColumnSplit(array $data, int $key, array $delimiters = ['|@|', ';'] ): array { + $unique = []; + $all = array_column($data['data'], $key); + $all_notJson = array_filter($all, array($this, 'isNotJson')); + $all_entries = []; + // The difficulty. In case of multiple delimiters we need to see which one + // works/works better. But if none, assume it may be also right since a single + // Value is valid. So we need to accumulate, count and discern + foreach ($all_notJson as $entries) { + $current_entries = []; + foreach ($delimiters as $delimiter) { + $split_entries = explode($delimiter, $entries) ?? []; + $current_entries[$delimiter] = (array) $split_entries; + } + $chosen_entries = []; + foreach ($current_entries as $delimiter => $current_entry) { + $chosen_entries = $current_entry; + if (count($chosen_entries) > 1) { + break; + } + } + foreach ($chosen_entries as $chosen_entry) { + $all_entries[] = $chosen_entry; + } + } + $unique = array_map('trim', $all_entries); + $unique = array_unique(array_values($unique), SORT_STRING); + return $unique; + } + + /** + * Checks if a string is valid JSON + * + * @param $string + * + * @return bool + */ + public function isJson($string) { + json_decode($string); + return json_last_error() === JSON_ERROR_NONE; + } + + /** + * Helper function that negates ::isJson. + * @param $string + * + * @return bool + */ + public function isNotJson($string) { + return !$this->isJson($string); + } +} diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 1db0bab..1f165a3 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -762,7 +762,9 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { * * @param \Drupal\file\Entity\File $file * - * @param string $uuid_key + * @param string|null $uuid_key + * IF NULL then no attempt of using UUIDS will be made. + * Needed for LoD Reconciling CSVs * @param bool $append_header * * @return int|string|null @@ -780,26 +782,28 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } array_walk($data['headers'], 'htmlspecialchars'); // How we want to get the key number that contains the $uuid_key - $haskey = array_search($uuid_key, $data['headers']); - if ($haskey === FALSE) { - array_unshift($data['headers'], $uuid_key); + if ($uuid_key) { + $haskey = array_search($uuid_key, $data['headers']); + if ($haskey === FALSE) { + array_unshift($data['headers'], $uuid_key); + } } - if ($append_header) { $fh->fputcsv($data['headers']); } foreach ($data['data'] as $row) { - if ($haskey === FALSE) { - array_unshift($row, $uuid_key); - $row[0] = Uuid::uuid4(); - } - else { - if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { - $row[$haskey] = Uuid::uuid4(); + if ($uuid_key) { + if ($haskey === FALSE) { + array_unshift($row, $uuid_key); + $row[0] = Uuid::uuid4(); + } + else { + if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { + $row[$haskey] = Uuid::uuid4(); + } } } - //array_walk($row, 'htmlspecialchars'); array_walk($row,'htmlentities'); $fh->fputcsv($row); diff --git a/src/Entity/amiSetEntity.php b/src/Entity/amiSetEntity.php index b85b14b..42b0f32 100644 --- a/src/Entity/amiSetEntity.php +++ b/src/Entity/amiSetEntity.php @@ -83,6 +83,7 @@ * "delete" = "Drupal\ami\Form\amiSetEntityDeleteForm", * "process" = "Drupal\ami\Form\amiSetEntityProcessForm", * "deleteprocessed" = "Drupal\ami\Form\amiSetEntityDeleteProcessedForm", + * "reconcile" = "Drupal\ami\Form\amiSetEntityReconcileForm" * }, * "access" = "Drupal\ami\Entity\Controller\amiSetEntityAccessControlHandler", * }, @@ -100,6 +101,7 @@ * "edit-form" = "/amiset/{ami_set_entity}/edit", * "process-form" = "/amiset/{ami_set_entity}/process", * "delete-process-form" = "/amiset/{ami_set_entity}/deleteprocessed", + * "reconcile-form" = "/amiset/{ami_set_entity}/reconcile", * "delete-form" = "/amiset/{ami_set_entity}/delete", * "collection" = "/amiset/list" * }, diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 1c4a8f8..44f10a5 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -534,12 +534,12 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->csv = $fileid; if ($plugin_instance->getPluginDefinition()['batch']) { $data = $this->store->get('data'); - $amisetdata->column_keys = $data['headers']; + $amisetdata->column_keys = []; + $id = $this->AmiUtilityService->createAmiSet($amisetdata); $batch = $plugin_instance->getBatch($form_state, $this->store->get('pluginconfig'), $amisetdata); - $batch_data = $this->store->get('batch_finished'); - $amisetdata->total_rows = $batch_data['totalrows'] ?? 0; - $id = $this->AmiUtilityService->createAmiSet($amisetdata); + + if ($id) { $url = Url::fromRoute('entity.ami_set_entity.canonical', ['ami_set_entity' => $id]); diff --git a/src/Form/AmiMultiStepIngestBaseForm.php b/src/Form/AmiMultiStepIngestBaseForm.php index d08fec2..77f7ba0 100644 --- a/src/Form/AmiMultiStepIngestBaseForm.php +++ b/src/Form/AmiMultiStepIngestBaseForm.php @@ -192,7 +192,6 @@ public function validateForm(array &$form, FormStateInterface $form_state) { if ($form_state->getTriggeringElement()['#name'] == 'prev') { // No validation my friends. } else { - //@TODO each step has its own validation. return parent::validateForm($form, $form_state); } } diff --git a/src/Form/amiSetAdminOverview.php b/src/Form/amiSetAdminOverview.php deleted file mode 100644 index bac1fa1..0000000 --- a/src/Form/amiSetAdminOverview.php +++ /dev/null @@ -1,205 +0,0 @@ -entityTypeManager = $entity_type_manager; - $this->dateFormatter = $date_formatter; - $this->moduleHandler = $module_handler; - $this->tempStoreFactory = $temp_store_factory; - } - - /** - * {@inheritdoc} - */ - public static function create(ContainerInterface $container) { - return new static( - $container->get('entity_type.manager'), - $container->get('date.formatter'), - $container->get('module_handler'), - $container->get('tempstore.private') - ); - } - - /** - * {@inheritdoc} - */ - public function getFormId() { - return 'metadatadisplay_admin_overview'; - } - - /** - * Form constructor for the metadatadisplay overview administration form. - * - * @param array $form - * An associative array containing the structure of the form. - * @param \Drupal\Core\Form\FormStateInterface $form_state - * The current state of the form. - * @param string $type - * The type of the overview form ('approval' or 'new'). - * - * @return array - * The form structure. - */ - public function buildForm(array $form, FormStateInterface $form_state, $type = 'new') { - - // Build an 'Update options' form. - $form['options'] = [ - '#type' => 'details', - '#title' => $this->t('Update options'), - '#open' => TRUE, - '#attributes' => ['class' => ['container-inline']], - ]; - - if ($type == 'approval') { - $options['publish'] = $this->t('Publish the selected Ami Sets'); - } - else { - $options['unpublish'] = $this->t('Unpublish the selected Ami Sets'); - } - $options['delete'] = $this->t('Delete the selected Ami Sets'); - $options['process'] = $this->t('Process the selected Ami Sets'); - $options['deleteados'] = $this->t('Process the selected Ami Sets'); - - $form['options']['operation'] = [ - '#type' => 'select', - '#title' => $this->t('Action'), - '#title_display' => 'invisible', - '#options' => $options, - '#default_value' => 'process', - ]; - $form['options']['submit'] = [ - '#type' => 'submit', - '#value' => $this->t('Update'), - ]; - - // Load the comments that need to be displayed. - $header['id'] = $this->t('Ami Set ID'); - $header['name'] = $this->t('Name'); - $header['last update'] = $this->t('Last update'); - $header['operations'] = $this->t('Operations'); - - $cids = $this->entityTypeManager->getStorage('ami_set_entity')->getQuery() - ->tableSort($header) - ->pager(50) - ->execute(); - - /** @var $amisets \Drupal\comment\CommentInterface[] */ - $amisets = $this->entityTypeManager->getStorage('ami_set_entity')->loadMultiple($cids); - - // Build a table listing the appropriate comments. - $options = []; - - - foreach ($amisets as $amiset) { - /** @var $commented_entity \Drupal\Core\Entity\EntityInterface */ - - $options[$amiset->id()] = [ - 'title' => ['data' => ['#title' => $amiset->id()]], - 'name' => [ - 'data' => [ - '#type' => 'link', - '#title' => $amiset->name->value, - '#url' => $amiset->toUrl('edit-form'), - ], - ], - 'last update' => [ - 'data' => [ - '#theme' => 'username', - '#account' => \Drupal::service('date.formatter')->format($amiset->changed->value, 'custom', 'd/m/Y'), - ], - ], - - ]; - $links = []; - $links['edit'] = [ - 'title' => $this->t('Edit'), - 'url' => $amiset->toUrl('edit-form'), - ]; - if ($this->moduleHandler->moduleExists('content_translation') && $this->moduleHandler->invoke('content_translation', 'translate_access', [$amiset])->isAllowed()) { - $links['translate'] = [ - 'title' => $this->t('Translate'), - 'url' => $amiset->toUrl('drupal:content-translation-overview'), - ]; - } - $options[$amiset->id()]['operations']['data'] = [ - '#type' => 'operations', - '#links' => $links, - ]; - } - - $form['comments'] = [ - '#type' => 'tableselect', - '#header' => $header, - '#options' => $options, - '#empty' => $this->t('No Ami Sets available.'), - ]; - - $form['pager'] = ['#type' => 'pager']; - - return $form; - } - - public function submitForm(array &$form, FormStateInterface $form_state) { - // TODO: Implement submitForm() method. - } - - -} diff --git a/src/Form/amiSetEntityForm.php b/src/Form/amiSetEntityForm.php index 8b842f0..c6942d1 100644 --- a/src/Form/amiSetEntityForm.php +++ b/src/Form/amiSetEntityForm.php @@ -31,5 +31,4 @@ public function save(array $form, FormStateInterface $form_state) { $form_state->setRedirectUrl($entity->toUrl('collection')); return $status; } - } diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 151e074..f306502 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -51,7 +51,7 @@ public static function create(ContainerInterface $container) { $container->get('entity_type.bundle.info'), $container->get('datetime.time'), $container->get('ami.utility'), - $container->get('strawberryfield.utility'), + $container->get('strawberryfield.utility') ); } diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php new file mode 100644 index 0000000..7b8be81 --- /dev/null +++ b/src/Form/amiSetEntityReconcileForm.php @@ -0,0 +1,493 @@ +AmiUtilityService = $ami_utility; + $this->AmiLoDService = $ami_lod; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('entity.repository'), + $container->get('entity_type.bundle.info'), + $container->get('datetime.time'), + $container->get('ami.utility'), + $container->get('ami.lod'), + $container->get('strawberryfield.utility') + ); + } + + + public function getQuestion() { + return $this->t( + 'Are you sure you want to Reconcile Lod for %name?', + ['%name' => $this->entity->label()] + ); + } + + /** + * {@inheritdoc} + */ + public function getCancelUrl() { + return new Url('entity.ami_set_entity.collection'); + } + + /** + * {@inheritdoc} + */ + public function buildForm(array $form, FormStateInterface $form_state) { + // Read Config first to get the Selected Bundles based on the Config + // type selected. Based on that we can set Moderation Options here + + $data = new \stdClass(); + foreach ($this->entity->get('set') as $item) { + /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ + $data = $item->provideDecoded(FALSE); + } + $domain = $this->getRequest()->getSchemeAndHttpHost(); + + //$lod = $this->AmiLoDService->invokeLoDRoute($domain,'Diego', 'wikidata', 'subjects', 'thing', 'en', 5); + + + if ($data !== new \stdClass()) { + // Only Show this form if we got data from the SBF field. + // we can't assume the user did not mess with the AMI set data? + $op = $data->pluginconfig->op ?? NULL; + $ops = [ + 'create', + 'update', + 'patch', + ]; + if (!in_array($op, $ops)) { + $form['status'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t( + 'Error' + ), + '#markup' => $this->t( + 'Sorry. This AMI set has no right Operation (Create, Update, Patch) set. Please fix this or contact your System Admin to fix it.' + ), + ]; + return $form; + } + $form['mapping'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t('LoD reconciling'), + ]; + $access = TRUE; + $csv_file_reference = $this->entity->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + if ($file) { + $reconcile_settings = $data->reconcile_settings->columns ?? []; + $file_data_all = $this->AmiUtilityService->csv_read($file); + $column_keys = $file_data_all['headers'] ?? []; + + $form['mapping']['lod_columns'] = [ + '#type' => 'select', + '#title' => $this->t('Select which columns you want to reconcile against LoD providers'), + '#default_value' => $reconcile_settings, + '#options' => array_combine($column_keys, $column_keys), + '#size' => count($column_keys), + '#multiple' => TRUE, + '#description' => $this->t('Columns that contain data you want to reconcile against LoD providers'), + '#empty_option' => $this->t('- Please select columns -'), + '#ajax' => [ + 'callback' => [$this, 'lodOptionsAjaxCallback'], + 'wrapper' => 'lod-options-wrapper', + 'event' => 'change', + ], + ]; + $form['lod_options'] = [ + '#type' => 'hidden', + '#prefix' => '
    ', + '#suffix' => '
    ', + ]; + if ($form_state->getValue(['mapping', 'lod_columns'], NULL)) { + error_log(var_export($form_state->getValue([ + 'mapping', + 'lod_columns' + ]), TRUE)); + + $source_options = $form_state->getValue(['mapping', 'lod_columns']); + $column_options = [ + 'loc;subjects;thing' => 'LoC subjects(LCSH)', + 'loc;names;thing' => 'LoC Name Authority File (LCNAF)', + 'loc;genreForms;thing' => 'LoC Genre/Form Terms (LCGFT)', + 'loc;graphicMaterials;thing' => 'LoC Thesaurus of Graphic Materials (TGN)', + 'loc;geographicAreas;thing' => 'LoC MARC List for Geographic Areas', + 'loc;relators;thing' => 'LoC Relators Vocabulary (Roles)', + 'loc;rdftype;CorporateName' => 'LoC MADS RDF by type: Corporate Name', + 'loc;rdftype;PersonalName' => 'LoC MADS RDF by type: Personal Name', + 'loc;rdftype;FamilyName' => 'LoC MADS RDF by type: Family Name', + 'loc;rdftype;Topic' => 'LoC MADS RDF by type: Topic', + 'loc;rdftype;GenreForm' => 'LoC MADS RDF by type: Genre Form', + 'loc;rdftype;Geographic' => 'LoC MADS RDF by type: Geographic', + 'loc;rdftype;Temporal' => 'LoC MADS RDF by type: Temporal', + 'loc;rdftype;ExtraterrestrialArea' => 'LoC MADS RDF by type: Extraterrestrial Area', + 'viaf;subjects;thing' => 'Viaf', + 'getty;aat;fuzzy' => 'Getty aat Fuzzy', + 'getty;aat;terms' => 'Getty aat Terms', + 'getty;aat;exact' => 'Getty aat Exact Label Match', + 'wikidata;subjects;thing' => 'Wikidata Q Items' + ]; + $form['lod_options']['#type'] = 'fieldset'; + $form['lod_options']['#tree'] = TRUE; + + $form['lod_options']['mappings'] = [ + '#type' => 'webform_mapping', + '#title' => $this->t('LoD Sources'), + '#description' => $this->t( + 'Please select how your chosen Columns will be LoD reconciled' + ), + '#description_display' => 'before', + '#empty_option' => $this->t('- Let AMI decide -'), + '#empty_value' => NULL, + '#default_value' => [], + '#required' => TRUE, + '#destination__multiple' => TRUE, + '#source' => $source_options, + '#source__title' => $this->t('LoD reconcile options'), + '#destination__title' => $this->t('LoD Authority Sources'), + '#destination' => $column_options, + '#destination__size' => count($column_options), + ]; + $form['lod_options']['select_preview'] = [ + '#type' => 'select', + '#title' => $this->t('Choose a Column to Preview'), + '#options' => array_combine($source_options, $source_options), + '#default_value' => $form_state->getValue(['lod_options','select_preview']) + ]; + $form['lod_options']['preview'] = [ + '#type' => 'button', + '#op' => 'preview', + '#value' => $this->t('Inspect cleaned/split up column values'), + '#ajax' => [ + 'callback' => [$this, 'ajaxColumPreview'], + ], + /* '#states' => [ + 'visible' => ['input[name="ado_context_preview"' => ['filled' => true]], + ],*/ + ]; + } + } + } + + $notprocessnow = $form_state->getValue('not_process_now', NULL); + + $form['not_process_now'] = [ + '#type' => 'checkbox', + '#title' => $this->t( + 'Enqueue but do not process Batch in realtime.' + ), + '#description' => $this->t( + 'Check this to enqueue but not trigger the interactive Batch processing. Cron or any other mechanism you have enabled will do the actual operation. This queue is shared by all AMI Sets in this repository and will be processed on a First-In First-Out basis.' + ), + '#required' => FALSE, + '#default_value' => !empty($notprocessnow) ? $notprocessnow : FALSE, + ]; + } + $form = $form + parent::buildForm($form, $form_state); + return $form; + } + + /** + * {@inheritdoc} + */ + public function submitForm(array &$form, FormStateInterface $form_state) { + + $csv_file_reference = $this->entity->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + } + + $csv_file_processed = $this->entity->get('processed_data')->getValue(); + if (isset($csv_file_processed[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file_lod */ + $file_lod = $this->entityTypeManager->getStorage('file')->load( + $csv_file_processed[0]['target_id']); + $file_lod_id = $file_lod->id(); + } else { + $file_lod_id = $this->AmiUtilityService->csv_touch(); + $file_lod = $file_lod_id ? $this->entityTypeManager->getStorage('file')->load( + $file_lod_id) : NULL; + if ($file_lod) { + $this->entity->set('processed_data', $file_lod_id); + $this->entity->save(); + } + else { + $this->messenger()->addError( + $this->t( + 'So Sorry. We could not create a new CSV to store your LoD Reconciled data for @label. Please check your filesystem permissions or contact your System Admin', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRebuild(); + return; + } + } + + + $data = new \stdClass(); + foreach ($this->entity->get('set') as $item) { + /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ + $data = $item->provideDecoded(FALSE); + } + if ($file && $file_lod && $data !== new \stdClass()) { + $domain = $this->getRequest()->getSchemeAndHttpHost(); + $invalid = []; + $mappings = $form_state->getValue(['lod_options','mappings']); + $form_state->setRebuild(TRUE); + $file_data_all = $this->AmiUtilityService->csv_read($file); + $column_keys = $file_data_all['headers'] ?? []; + $output = []; + $output['table'] = [ + '#type' => 'table', + '#caption' => t('Unique processed values for this column'), + ]; + $columns = array_keys($mappings) ?? []; + $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, + $columns); + $inverted = []; + $headers = ['original','csv_columns']; + foreach($values_per_column as $column => $labels) { + foreach($labels as $label) { + $inverted[$label] = $inverted[$label] ?? []; + $headers = array_unique(array_merge($headers,$mappings[$column])); + $inverted[$label] = array_unique(array_merge($inverted[$label], $mappings[$column])); + } + } + ksort($inverted,SORT_NATURAL); + foreach($headers as &$header) { + // same is done in \Drupal\ami\Plugin\QueueWorker\LoDQueueWorker::processItem + $exploded = explode(';', $header); + $header = implode('_', $exploded); + } + + if (!count($inverted)) { + $this->messenger()->addError( + $this->t( + 'So Sorry. Your Ami Set @label selected column(s) has(have) not values. Please select of Columns and inspect them before submitting', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRebuild(); + return; + } + + $SetURL = $this->entity->toUrl('canonical', ['absolute' => TRUE]) + ->toString(); + $notprocessnow = $form_state->getValue('not_process_now', NULL); + $queue_name = 'ami_lod_ado'; + if (!$notprocessnow) { + // This queues have no queue workers. That is intended since they + // are always processed by the ami_ingest_ado one manually. + $queue_name = 'ami_ingest_lod_set_' . $this->entity->id(); + // Destroy first here. + \Drupal::queue($queue_name)->deleteQueue(); + \Drupal::queue($queue_name, TRUE)->createQueue(); + // @TODO acquire a Lock that is renewed for each queue item processing + // To avoid same batch to be send to processing by different users at + // the same time. + } + $added = []; + foreach ($inverted as $label => $lodconfig) { + // We pass all reconciliation endpoints to a single queue item per label + // because we need to create a CSV row per label + // If we split it would get super messy? + // Or we could use key storage instead too (i guess) + // @TODO explore single LoD endpoint per queue item + $data->info = [ + 'label' => $label, + 'domain' => $domain, + 'headers' => $headers, + 'lodconfig' => $lodconfig, + 'set_id' => $this->entity->id(), + 'csv' => $file_lod_id, + 'uid' => $this->currentUser()->id(), + 'set_url' => $SetURL, + 'attempt' => 1, + ]; + $added[] = \Drupal::queue($queue_name) + ->createItem($data); + } + if ($notprocessnow) { + $this->messenger()->addMessage( + $this->t( + 'Set @label enqueued and processed .', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRedirectUrl($this->getCancelUrl()); + } + else { + $count = count(array_filter($added)); + if ($count) { + $this->submitBatch($form_state, $queue_name, $count); + } + } + } + else { + $this->messenger()->addError( + $this->t( + 'So Sorry. Ami Set @label has incorrect Metadata and/or has its Source CSV file missing or its LoD Reconciled CSV file missing. Please correct or delete and generate a new AMI set.', + [ + '@label' => $this->entity->label(), + ] + ) + ); + $form_state->setRebuild(); + } + } + + /* + * Process queue(s) with batch. + * + * @param \Drupal\Core\Form\FormStateInterface $form_state + * @param $queue + */ + public function submitBatch(FormStateInterface $form_state, $queue_name) { + $batch = [ + 'title' => $this->t('Batch processing LoD Reconciling'), + 'operations' => [], + 'finished' => ['\Drupal\ami\AmiLoDBatchQueue', 'finish'], + 'progress_message' => t('Processing Set @current of @total.'), + ]; + $batch['operations'][] = [ + '\Drupal\ami\AmiLoDBatchQueue::takeOne', + [$queue_name, $this->entity->id()], + ]; + batch_set($batch); + } + + + /** + * Ajax callback for the plugin configuration form elements. + * + * @param $form + * @param \Drupal\Core\Form\FormStateInterface $form_state + * + * @return array + */ + public function lodOptionsAjaxCallback($form, FormStateInterface $form_state) { + return $form['lod_options'] ?? []; + } + + /** + * AJAX callback. + */ + public function ajaxColumPreview($form, FormStateInterface $form_state) { + $response = new AjaxResponse(); + $form['#attached']['library'][] = 'core/drupal.dialog.off_canvas'; + $response->setAttachments($form['#attached']); + + if (!empty($form_state->getValue(['lod_options','select_preview']))) { + $entity = $form_state->getFormObject()->getEntity(); + $csv_file_reference = $entity->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + if ($file) { + $file_data_all = $this->AmiUtilityService->csv_read($file); + $column_keys = $file_data_all['headers'] ?? []; + $output = []; + $output['table'] = [ + '#type' => 'table', + '#caption' => t('Unique processed values for this column'), + ]; + $column_preview = (array) $form_state->getValue(['lod_options','select_preview']) ?? []; + $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, + $column_preview); + dpm($form_state->getValue(['lod_options','select_preview'])); + $rows = $values_per_column[$form_state->getValue(['lod_options','select_preview'])] ?? ['Emtpy Column']; + sort($rows, SORT_STRING); + + foreach ($rows as &$row) { + $row = [$row]; + } + $output['table']['#rows'] = $rows; + } + $response->addCommand(new OpenOffCanvasDialogCommand(t('Values for @column', [ + '@column' => reset($column_preview), + ]), + $output, ['width' => '30%'])); + if ($form_state->getErrors()) { + // Clear errors so the user does not get confused when reloading. + \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_ERROR); + $form_state->clearErrors(); + } + } + } + return $response; + } + + +} + diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index cfc4b1d..f52cbf0 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -1131,7 +1131,7 @@ public static function fetchBatch(array $config, ImporterPluginAdapterInterface } public static function finishfetchFromSolr($success, $results, $operations) { - error_log('finished'); + $allheaders = $results['processed']['headers'] ?? []; $data['headers'] = array_values($allheaders); diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index e76da93..e075f0a 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -17,7 +17,7 @@ use Swaggest\JsonDiff\JsonPatch; /** - * Process the JSON payload provided by the webhook. + * Processes and Ingests each AMI Set CSV row. * * @QueueWorker( * id = "ami_ingest_ado", @@ -119,7 +119,7 @@ public static function create( * {@inheritdoc} */ public function processItem($data) { - /* Data info has this structire + /* Data info has this structure $data->info = [ 'row' => The actual data 'set_id' => The Set id diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php new file mode 100644 index 0000000..7dc1a1c --- /dev/null +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -0,0 +1,177 @@ +entityTypeManager = $entity_type_manager; + $this->loggerFactory = $logger_factory; + $this->strawberryfieldUtility = $strawberryfield_utility_service; + $this->AmiUtilityService = $ami_utility; + $this->AmiLoDService = $ami_lod; + $this->messenger = $messenger; + } + + /** + * Implementation of the container interface to allow dependency injection. + * + * @param \Symfony\Component\DependencyInjection\ContainerInterface $container + * @param array $configuration + * @param string $plugin_id + * @param mixed $plugin_definition + * + * @return static + */ + public static function create( + ContainerInterface $container, + array $configuration, + $plugin_id, + $plugin_definition + ) { + return new static( + empty($configuration) ? [] : $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + $container->get('logger.factory'), + $container->get('strawberryfield.utility'), + $container->get('ami.utility'), + $container->get('ami.lod'), + $container->get('messenger') + ); + } + + /** + * {@inheritdoc} + */ + public function processItem($data) { + /* Data info has this structure + $data->info = [ + 'label' => The label passed to the Reconciling URL, + 'domain' => This Server's Domain name + 'headers' => All headers (LoD Routes) + 'lodconfig' => an array of LoD URL Route arguments separated by comma in the form of, + 0 => "loc;subjects;thing" + 1 => "loc;rdftype;GenreForm" + 2 => "getty;aat;exact" + 'set_id' => The Set id + 'csv' => The ID of the CSV file that will hold the results + 'uid' => The User ID that processed the Set + 'set_url' => A direct URL to the set. + 'attempt' => The number of attempts to process. We always start with a 1 + ]; + */ + + // Load the CSV + /** @var \Drupal\file\Entity\File $file_lod */ + $file_lod = $this->entityTypeManager->getStorage('file')->load( + $data->info['csv']); + + if (empty($data->info['label']) || empty($data->info['domain']) || empty ($data->info['lodconfig'])) { + // Exception, means we have no label, no domain or emprt lodconfig + return; + } + $newdata['headers'] = $data->info['headers']; + $newdata['data'][0] = array_fill_keys($newdata['headers'], ''); + if (isset($data->info['lodconfig']) && is_array($data->info['lodconfig']) && $file_lod) { + $lod_route_arguments = $data->info['lodconfig']; + + foreach ($lod_route_arguments as $lod_route_argument) { + $lod_route_argument_list = explode(';', $lod_route_argument); + //@TODO allow the number of results to be set on the \Drupal\ami\Form\amiSetEntityReconcileForm + // And passed as an argument. Same with Language? Not all LoD Routes can make use or more languages. + $lod_route_column_name = implode('_', $lod_route_argument_list); + + + $lod = $this->AmiLoDService->invokeLoDRoute($data->info['domain'], + $data->info['label'], $lod_route_argument_list[0], + $lod_route_argument_list[1], $lod_route_argument_list[2], 'en', 1); + dpm($lod); + $newdata['data'][0][$lod_route_column_name] = json_encode($lod); + } + + $this->AmiUtilityService->csv_append($newdata, $file_lod,NULL, FALSE); + } + } + +} From cd1b043b0eeed3d62d98812c1bfcaefead94906e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 26 Jul 2021 10:49:55 -0400 Subject: [PATCH 02/42] Push headers/erase old LoD reconciled when re-running --- src/AmiUtilityService.php | 7 ++++-- src/Form/AmiMultiStepIngest.php | 5 +---- src/Form/amiSetEntityReconcileForm.php | 27 +++++++++-------------- src/Plugin/QueueWorker/LoDQueueWorker.php | 12 +++++----- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 1f165a3..074468f 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -615,12 +615,15 @@ public function create_file_from_uri($localpath) { /** * Creates an empty CSV returns file. * + * @param string|null $filename + * If given it will use that, if null will create a new one + * * @return int|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_touch() { + public function csv_touch(string $filename = NULL) { $path = 'public://ami/csv'; - $filename = $this->currentUser->id() . '-' . uniqid() . '.csv'; + $filename = $filename ?? $this->currentUser->id() . '-' . uniqid() . '.csv'; // Ensure the directory if (!$this->fileSystem->prepareDirectory( $path, diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 44f10a5..2817fd0 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -536,10 +536,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $data = $this->store->get('data'); $amisetdata->column_keys = []; $id = $this->AmiUtilityService->createAmiSet($amisetdata); - $batch = $plugin_instance->getBatch($form_state, - $this->store->get('pluginconfig'), $amisetdata); - - + $batch = $plugin_instance->getBatch($form_state, $this->store->get('pluginconfig'), $amisetdata); if ($id) { $url = Url::fromRoute('entity.ami_set_entity.canonical', ['ami_set_entity' => $id]); diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index 7b8be81..3668420 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -95,10 +95,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ $data = $item->provideDecoded(FALSE); } - $domain = $this->getRequest()->getSchemeAndHttpHost(); - - //$lod = $this->AmiLoDService->invokeLoDRoute($domain,'Diego', 'wikidata', 'subjects', 'thing', 'en', 5); - if ($data !== new \stdClass()) { // Only Show this form if we got data from the SBF field. @@ -135,10 +131,9 @@ public function buildForm(array $form, FormStateInterface $form_state) { $csv_file_reference[0]['target_id'] ); if ($file) { - $reconcile_settings = $data->reconcile_settings->columns ?? []; + $reconcile_settings = $data->reconcileconfig->columns ?? []; $file_data_all = $this->AmiUtilityService->csv_read($file); $column_keys = $file_data_all['headers'] ?? []; - $form['mapping']['lod_columns'] = [ '#type' => 'select', '#title' => $this->t('Select which columns you want to reconcile against LoD providers'), @@ -160,10 +155,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#suffix' => '', ]; if ($form_state->getValue(['mapping', 'lod_columns'], NULL)) { - error_log(var_export($form_state->getValue([ - 'mapping', - 'lod_columns' - ]), TRUE)); $source_options = $form_state->getValue(['mapping', 'lod_columns']); $column_options = [ @@ -265,7 +256,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { /** @var \Drupal\file\Entity\File $file_lod */ $file_lod = $this->entityTypeManager->getStorage('file')->load( $csv_file_processed[0]['target_id']); - $file_lod_id = $file_lod->id(); + // Reset all values + $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); } else { $file_lod_id = $this->AmiUtilityService->csv_touch(); $file_lod = $file_lod_id ? $this->entityTypeManager->getStorage('file')->load( @@ -296,11 +288,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } if ($file && $file_lod && $data !== new \stdClass()) { $domain = $this->getRequest()->getSchemeAndHttpHost(); - $invalid = []; $mappings = $form_state->getValue(['lod_options','mappings']); $form_state->setRebuild(TRUE); - $file_data_all = $this->AmiUtilityService->csv_read($file); - $column_keys = $file_data_all['headers'] ?? []; $output = []; $output['table'] = [ '#type' => 'table', @@ -310,19 +299,23 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, $columns); $inverted = []; + $column_map_inverted = []; $headers = ['original','csv_columns']; foreach($values_per_column as $column => $labels) { foreach($labels as $label) { $inverted[$label] = $inverted[$label] ?? []; $headers = array_unique(array_merge($headers,$mappings[$column])); $inverted[$label] = array_unique(array_merge($inverted[$label], $mappings[$column])); + $column_map_inverted[$label][] = $column; + $column_map_inverted[$label] = array_unique($column_map_inverted[$label]); } } + ksort($inverted,SORT_NATURAL); foreach($headers as &$header) { // same is done in \Drupal\ami\Plugin\QueueWorker\LoDQueueWorker::processItem $exploded = explode(';', $header); - $header = implode('_', $exploded); + $header = strtolower(implode('_', $exploded)); } if (!count($inverted)) { @@ -337,7 +330,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $form_state->setRebuild(); return; } - + // Append the header to the CSV + $file_lod_id = $this->AmiUtilityService->csv_append(['headers' => $headers, 'data' => []], $file_lod, NULL, TRUE ); $SetURL = $this->entity->toUrl('canonical', ['absolute' => TRUE]) ->toString(); $notprocessnow = $form_state->getValue('not_process_now', NULL); @@ -364,6 +358,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { 'label' => $label, 'domain' => $domain, 'headers' => $headers, + 'csv_columns' => $column_map_inverted[$label], 'lodconfig' => $lodconfig, 'set_id' => $this->entity->id(), 'csv' => $file_lod_id, diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index 7dc1a1c..379f244 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -148,7 +148,7 @@ public function processItem($data) { $data->info['csv']); if (empty($data->info['label']) || empty($data->info['domain']) || empty ($data->info['lodconfig'])) { - // Exception, means we have no label, no domain or emprt lodconfig + // Exception, means we have no label, no domain or empty lodconfig return; } $newdata['headers'] = $data->info['headers']; @@ -160,16 +160,14 @@ public function processItem($data) { $lod_route_argument_list = explode(';', $lod_route_argument); //@TODO allow the number of results to be set on the \Drupal\ami\Form\amiSetEntityReconcileForm // And passed as an argument. Same with Language? Not all LoD Routes can make use or more languages. - $lod_route_column_name = implode('_', $lod_route_argument_list); - - + $lod_route_column_name = strtolower(implode('_', $lod_route_argument_list)); $lod = $this->AmiLoDService->invokeLoDRoute($data->info['domain'], $data->info['label'], $lod_route_argument_list[0], $lod_route_argument_list[1], $lod_route_argument_list[2], 'en', 1); - dpm($lod); - $newdata['data'][0][$lod_route_column_name] = json_encode($lod); + $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT); + $newdata['data'][0]['original'] = $data->info['label']; + $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']); } - $this->AmiUtilityService->csv_append($newdata, $file_lod,NULL, FALSE); } } From 9909ff75268893ace6595f3a42afba9d1ba2412d Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 27 Jul 2021 23:08:24 -0400 Subject: [PATCH 03/42] Wow. We got this even with LoD Fix/Edit form!! @alliomeria gosh. I made it i think? OF course the UI can get some better things and we need to add a LOT of options to avoid "overwriting" all your lovely Manually LoD. But gosh. This is so good!! --- ami.links.task.yml | 6 + ami.routing.yml | 7 + ami.services.yml | 4 +- src/AmiLoDService.php | 66 ++--- src/AmiUtilityService.php | 175 ++++++++++-- src/Entity/amiSetEntity.php | 6 +- src/Form/AmiMultiStepIngest.php | 2 +- src/Form/amiSetEntityReconcileCleanUpForm.php | 263 ++++++++++++++++++ src/Form/amiSetEntityReconcileForm.php | 119 +++++++- .../AmiStrawberryfieldJsonAsWebform.php | 1 + src/Plugin/ImporterAdapter/SolrImporter.php | 8 +- .../QueueWorker/IngestADOQueueWorker.php | 25 +- src/Plugin/QueueWorker/LoDQueueWorker.php | 42 ++- 13 files changed, 636 insertions(+), 88 deletions(-) create mode 100644 src/Form/amiSetEntityReconcileCleanUpForm.php diff --git a/ami.links.task.yml b/ami.links.task.yml index 8d50a3f..702b196 100644 --- a/ami.links.task.yml +++ b/ami.links.task.yml @@ -44,3 +44,9 @@ ami_set_entity.reconcile_form: base_route: entity.ami_set_entity.canonical title: Reconcile LoD weight: 13 + +entity.ami_set_entity.reconcileedit_form: + route_name: entity.ami_set_entity.reconcileedit_form + base_route: entity.ami_set_entity.canonical + title: Edit Reconciled LoD + weight: 14 diff --git a/ami.routing.yml b/ami.routing.yml index 36d5104..11be50e 100644 --- a/ami.routing.yml +++ b/ami.routing.yml @@ -92,5 +92,12 @@ entity.ami_set_entity.reconcile_form: defaults: _entity_form: ami_set_entity.reconcile _title: 'Reconcile LoD' + requirements: + _entity_access: 'ami_set_entity.process' +entity.ami_set_entity.reconcileedit_form: + path: '/amiset/{ami_set_entity}/editreconcile' + defaults: + _entity_form: ami_set_entity.editreconcile + _title: 'Clean Reconciled LoD' requirements: _entity_access: 'ami_set_entity.process' \ No newline at end of file diff --git a/ami.services.yml b/ami.services.yml index 9594bc3..2b7503c 100644 --- a/ami.services.yml +++ b/ami.services.yml @@ -5,11 +5,11 @@ services: arguments: ['@entity_type.manager'] ami.utility: class: Drupal\ami\AmiUtilityService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client', '@keyvalue'] tags: - { name: backend_overridable } ami.lod: class: Drupal\ami\AmiLoDService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@http_client', '@ami.utility' ] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@http_client', '@ami.utility', '@keyvalue'] tags: - { name: backend_overridable } diff --git a/src/AmiLoDService.php b/src/AmiLoDService.php index 200b6e7..95fae3e 100644 --- a/src/AmiLoDService.php +++ b/src/AmiLoDService.php @@ -17,6 +17,7 @@ use \Drupal\Core\Entity\EntityFieldManagerInterface; use Drupal\Core\Extension\ModuleHandlerInterface; use Drupal\Core\File\FileSystemInterface; +use Drupal\Core\KeyValueStore\KeyValueFactoryInterface; use Drupal\Core\Language\LanguageManagerInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\Core\Messenger\MessengerTrait; @@ -140,6 +141,14 @@ class AmiLoDService { */ protected $AmiUtilityService; + /** + * Key value service. + * + * @var \Drupal\Core\KeyValueStore\KeyValueFactoryInterface + */ + protected $keyValue; + + /** * AmiLoDService constructor. * @@ -157,6 +166,7 @@ class AmiLoDService { * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service * @param \GuzzleHttp\ClientInterface $http_client * @param \Drupal\ami\AmiUtilityService $ami_utility + * @param \Drupal\Core\KeyValueStore\KeyValueFactoryInterface $key_value */ public function __construct( FileSystemInterface $file_system, @@ -172,7 +182,8 @@ public function __construct( LoggerChannelFactoryInterface $logger_factory, StrawberryfieldUtilityService $strawberryfield_utility_service, ClientInterface $http_client, - AmiUtilityService $ami_utility + AmiUtilityService $ami_utility, + KeyValueFactoryInterface $key_value ) { $this->fileSystem = $file_system; $this->fileUsage = $file_usage; @@ -194,6 +205,7 @@ public function __construct( $this->currentUser = $current_user; $this->httpClient = $http_client; $this->AmiUtilityService = $ami_utility; + $this->keyValue = $key_value; } @@ -223,9 +235,7 @@ public function invokeLoDRoute(string $domain, string $query, string $auth_type, } $cookieJar = CookieJar::fromArray($_COOKIE, $domain); - $controller_path = $controller_url->setAbsolute()->toString(TRUE)->getGeneratedUrl(); - error_log($controller_url->setAbsolute(FALSE)->toString(TRUE)->getGeneratedUrl()); $csrf_token = \Drupal::csrfToken()->get($controller_url->setAbsolute(FALSE)->toString(TRUE)->getGeneratedUrl()); $options = [ 'headers' => [ @@ -246,7 +256,13 @@ public function invokeLoDRoute(string $domain, string $query, string $auth_type, $response = $this->httpClient->request('GET', $controller_path, $options); $sucessfull = $response->getStatusCode() >= 200 && $response->getStatusCode() < 300; $response_encoded = $sucessfull ? json_decode($response->getBody()->getContents()) : []; - return $response_encoded; + // Removes desc , changes value for uri to make it SBF webform element compliant + $response_cleaned = []; + foreach ($response_encoded as $key => $entry) { + $response_cleaned[$key]['uri'] = $entry->value ?? ''; + $response_cleaned[$key]['label'] = !empty($entry->desc) ? substr($entry->label ?? '', 0, -strlen($entry->desc)) : $entry->label ?? ''; + } + return $response_cleaned; } /** @@ -264,54 +280,14 @@ public function provideLoDColumnValues(File $file, array $columns):array { $alldifferent = []; foreach ($columns as $column) { $column_index = array_search($column, $column_keys); - error_log($column_index); if ($column_index !== FALSE) { - $alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data, + $alldifferent[$column] = $this->AmiUtilityService->getDifferentValuesfromColumnSplit($data, $column_index); } } - error_log(var_export($alldifferent, true)); return $alldifferent; } - /** - * For a given Numeric Column index, get different/non json, split values - * - * @param array $data - * @param int $key - * - * @return array - */ - public function getDifferentValuesfromColumnSplit(array $data, int $key, array $delimiters = ['|@|', ';'] ): array { - $unique = []; - $all = array_column($data['data'], $key); - $all_notJson = array_filter($all, array($this, 'isNotJson')); - $all_entries = []; - // The difficulty. In case of multiple delimiters we need to see which one - // works/works better. But if none, assume it may be also right since a single - // Value is valid. So we need to accumulate, count and discern - foreach ($all_notJson as $entries) { - $current_entries = []; - foreach ($delimiters as $delimiter) { - $split_entries = explode($delimiter, $entries) ?? []; - $current_entries[$delimiter] = (array) $split_entries; - } - $chosen_entries = []; - foreach ($current_entries as $delimiter => $current_entry) { - $chosen_entries = $current_entry; - if (count($chosen_entries) > 1) { - break; - } - } - foreach ($chosen_entries as $chosen_entry) { - $all_entries[] = $chosen_entry; - } - } - $unique = array_map('trim', $all_entries); - $unique = array_unique(array_values($unique), SORT_STRING); - return $unique; - } - /** * Checks if a string is valid JSON * diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 074468f..3bac299 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -17,6 +17,7 @@ use \Drupal\Core\Entity\EntityFieldManagerInterface; use Drupal\Core\Extension\ModuleHandlerInterface; use Drupal\Core\File\FileSystemInterface; +use Drupal\Core\KeyValueStore\KeyValueFactoryInterface; use Drupal\Core\Language\LanguageManagerInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\Core\Messenger\MessengerTrait; @@ -148,6 +149,13 @@ class AmiUtilityService { */ protected $httpClient; + /** + * Key value service. + * + * @var \Drupal\Core\KeyValueStore\KeyValueFactoryInterface + */ + protected $keyValue; + /** * StrawberryfieldFilePersisterService constructor. * @@ -165,6 +173,8 @@ class AmiUtilityService { * @param StrawberryfieldUtilityService $strawberryfield_utility_service , * @param \Drupal\Core\Entity\EntityFieldManagerInterface $entity_field_manager * @param \Drupal\Core\Entity\EntityTypeBundleInfoInterface $entity_type_bundle_info + * @param \GuzzleHttp\ClientInterface $http_client + * @param \Drupal\Core\KeyValueStore\KeyValueFactoryInterface $key_value */ public function __construct( FileSystemInterface $file_system, @@ -181,7 +191,8 @@ public function __construct( StrawberryfieldUtilityService $strawberryfield_utility_service, EntityFieldManagerInterface $entity_field_manager, EntityTypeBundleInfoInterface $entity_type_bundle_info, - ClientInterface $http_client + ClientInterface $http_client, + KeyValueFactoryInterface $key_value ) { $this->fileSystem = $file_system; $this->fileUsage = $file_usage; @@ -204,6 +215,7 @@ public function __construct( $this->entityTypeBundleInfo = $entity_type_bundle_info; $this->currentUser = $current_user; $this->httpClient = $http_client; + $this->keyValue = $key_value; } @@ -807,8 +819,15 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } } - //array_walk($row, 'htmlspecialchars'); - array_walk($row,'htmlentities'); + if (is_array($row) && !empty($row)) { + foreach ($row as &$value) { + if (!is_array($value) && !empty($value)) { + if (!$this->isJson($value)) { + $value = htmlspecialchars($value, ENT_COMPAT, 'UTF-8', FALSE); + } + } + } + } $fh->fputcsv($row); } // PHP Bug! This should happen automatically @@ -1648,13 +1667,18 @@ public function expandJson(array $row) { * "uuid" => "5d4f8ed7-7471-4115-beed-39dc1e625180" * ] * + * @param array $additional_context + * Any additional $context that may be passed. This is appended to the + * Twig context but will never replace/override the one provided + * by this method. + * * @return string|NULL * Either a valid JSON String or NULL if casting via Twig template failed * * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException */ - public function processMetadataDisplay(\stdClass $data) { + public function processMetadataDisplay(\stdClass $data, array $additional_context = []) { $op = $data->pluginconfig->op; $ophuman = [ 'create' => 'created', @@ -1680,11 +1704,28 @@ public function processMetadataDisplay(\stdClass $data) { } $jsonstring = NULL; if ($data->mapping->globalmapping == "custom") { - $metadatadisplay_id = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata_config->template; + $metadatadisplay_id = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata_config->template ?? NULL; } else { - $metadatadisplay_id = $data->mapping->globalmapping_settings->metadata_config->template; + $metadatadisplay_id = $data->mapping->globalmapping_settings->metadata_config->template ?? NULL; + } + if (!$metadatadisplay_id) { + if (!$data->info['row']['data']) { + $message = $this->t( + 'Ups. No template mapping for type @type. Skipping for AMI Set ID @setid, Row @row, future ADO with UUID @uuid.', + [ + '@uuid' => $data->info['row']['uuid'], + '@row' => $row_id, + '@setid' => $set_id, + '@type' => $data->info['row']['type'], + ] + ); + $this->loggerFactory->get('ami')->error($message); + return NULL; + } } + + $metadatadisplay_entity = $this->entityTypeManager->getStorage('metadatadisplay_entity') ->load($metadatadisplay_id); if ($metadatadisplay_entity) { @@ -1769,6 +1810,33 @@ public function processMetadataDisplay(\stdClass $data) { } $context['data'] = $this->expandJson($data->info['row']['data']); + $context_lod = []; + // get the mappings for this set if any + // @TODO Refactor into a Method? + $lod_mappings = $this->getKeyValueValueMappingsPerAmiSet($set_id); + if ($lod_mappings) { + foreach($lod_mappings as $source_column => $destination) { + if (isset($context['data'][$source_column])) { + // sad here. Ok, this is a work around for our normally + // Strange CSV data structure + $data_to_clean['data'][0] = [$context['data'][$source_column]]; + $labels = $this->getDifferentValuesfromColumnSplit($data_to_clean, + 0); + foreach($labels as $label) { + $lod_for_label = $this->getKeyValuePerAmiSet($label, $set_id); + if (is_array($lod_for_label) && count($lod_for_label) > 0) { + foreach ($lod_for_label as $approach => $lod) { + if (isset($lod['lod'])) { + $context_lod[$source_column][$approach] = array_merge($context_lod[$source_column][$approach] ?? [], $lod['lod']); + } + } + } + } + } + } + } + + $context['data_lod'] = $context_lod; $context['dataOriginal'] = $original_value; $context['setURL'] = $setURL; $context['setId'] = $set_id; @@ -1776,7 +1844,8 @@ public function processMetadataDisplay(\stdClass $data) { $context['setOp'] = ucfirst($op); $context['node'] = $node; - + // Add any extras passed to the caller. + $context = $context + $additional_context; $original_context = $context; // Allow other modules to provide extra Context! // Call modules that implement the hook, and let them add items. @@ -1829,27 +1898,95 @@ function () use ($context, $metadatadisplay_entity) { } return $jsonstring; } - - public function processWebform($data, array $row) { - + public function setKeyValuePerAmiSet($label, $data, $set_id) { + // Too much trouble dealing with encodings/UTF-8 and MYSQL + // And drupal here. Simpler if the label is md5-ed + $label = md5($label); + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + $this->keyValue->get($keyvalue_collection) + ->set($label, $data); } - - public function ingestAdo($data, array $row) { - + public function setKeyValueMappingsPerAmiSet($data, $set_id) { + $keyvalue_collection = 'ami_lod_temp_mappings'; + $this->keyValue->get($keyvalue_collection) + ->set($set_id, $data); } - - public function updateAdo($data) { - + public function getKeyValuePerAmiSet($label, $set_id) { + $label = md5($label); + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + return $this->keyValue->get($keyvalue_collection) + ->get($label, NULL); } - public function patchAdo($data) { - + public function getKeyValueValueMappingsPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_mappings'; + return $this->keyValue->get($keyvalue_collection) + ->get($set_id, NULL); } - public function deleteAdo($data) { + public function cleanKeyValuesPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + $this->keyValue->get($keyvalue_collection)->deleteAll(); + } + /** + * For a given Numeric Column index, get different/non json, split values + * + * @param array $data + * @param int $key + * + * @return array + */ + public function getDifferentValuesfromColumnSplit(array $data, int $key, array $delimiters = ['|@|', ';'] ): array { + $unique = []; + $all = array_column($data['data'], $key); + $all_notJson = array_filter($all, array($this, 'isNotJson')); + $all_entries = []; + // The difficulty. In case of multiple delimiters we need to see which one + // works/works better. But if none, assume it may be also right since a single + // Value is valid. So we need to accumulate, count and discern + foreach ($all_notJson as $entries) { + $current_entries = []; + foreach ($delimiters as $delimiter) { + $split_entries = explode($delimiter, $entries) ?? []; + $current_entries[$delimiter] = (array) $split_entries; + } + $chosen_entries = []; + foreach ($current_entries as $delimiter => $current_entry) { + $chosen_entries = $current_entry; + if (count($chosen_entries) > 1) { + break; + } + } + foreach ($chosen_entries as $chosen_entry) { + $all_entries[] = $chosen_entry; + } + } + $unique = array_map('trim', $all_entries); + $unique = array_unique(array_values($unique), SORT_STRING); + return $unique; + } + /** + * Checks if a string is valid JSON + * + * @param $string + * + * @return bool + */ + public function isJson($string) { + json_decode($string); + return json_last_error() === JSON_ERROR_NONE; } + /** + * Helper function that negates ::isJson. + * @param $string + * + * @return bool + */ + public function isNotJson($string) { + return !$this->isJson($string); + } } diff --git a/src/Entity/amiSetEntity.php b/src/Entity/amiSetEntity.php index 42b0f32..9b7bca1 100644 --- a/src/Entity/amiSetEntity.php +++ b/src/Entity/amiSetEntity.php @@ -83,7 +83,8 @@ * "delete" = "Drupal\ami\Form\amiSetEntityDeleteForm", * "process" = "Drupal\ami\Form\amiSetEntityProcessForm", * "deleteprocessed" = "Drupal\ami\Form\amiSetEntityDeleteProcessedForm", - * "reconcile" = "Drupal\ami\Form\amiSetEntityReconcileForm" + * "reconcile" = "Drupal\ami\Form\amiSetEntityReconcileForm", + * "editreconcile" = "Drupal\ami\Form\amiSetEntityReconcileCleanUpForm" * }, * "access" = "Drupal\ami\Entity\Controller\amiSetEntityAccessControlHandler", * }, @@ -102,6 +103,7 @@ * "process-form" = "/amiset/{ami_set_entity}/process", * "delete-process-form" = "/amiset/{ami_set_entity}/deleteprocessed", * "reconcile-form" = "/amiset/{ami_set_entity}/reconcile", + * "edit-reconcile-form" = "/amiset/{ami_set_entity}/editreconcile", * "delete-form" = "/amiset/{ami_set_entity}/delete", * "collection" = "/amiset/list" * }, @@ -379,7 +381,7 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) { 'weight' => -3, ]) ->setDisplayConfigurable('view', TRUE) - ->setDisplayConfigurable('form', FALSE); + ->setDisplayConfigurable('form', TRUE); $validatorszip = [ 'file_validate_extensions' => ['zip'], 'file_validate_size' => [Environment::getUploadMaxSize()], diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 2817fd0..5ad66cc 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -295,7 +295,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { $mapping = $this->store->get('mapping'); $adomapping = $this->store->get('adomapping'); $required_maps = [ - 'sequence' => 'Sequence Order', 'label' => 'Ado Label', ]; $form['ingestsetup']['adomapping'] = [ @@ -535,6 +534,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { if ($plugin_instance->getPluginDefinition()['batch']) { $data = $this->store->get('data'); $amisetdata->column_keys = []; + $amisetdata->total_rows = NULL; // because we do not know yet $id = $this->AmiUtilityService->createAmiSet($amisetdata); $batch = $plugin_instance->getBatch($form_state, $this->store->get('pluginconfig'), $amisetdata); if ($id) { diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php new file mode 100644 index 0000000..eaef01d --- /dev/null +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -0,0 +1,263 @@ +AmiUtilityService = $ami_utility; + $this->AmiLoDService = $ami_lod; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('entity.repository'), + $container->get('entity_type.bundle.info'), + $container->get('datetime.time'), + $container->get('ami.utility'), + $container->get('ami.lod'), + $container->get('strawberryfield.utility') + ); + } + + public function getConfirmText() { + return $this->t('Save LoD'); + } + + + public function getQuestion() { + return $this->t( + 'Are you sure you want to Save Modified Reconcile Lod for %name?', + ['%name' => $this->entity->label()] + ); + } + + /** + * {@inheritdoc} + */ + public function getCancelUrl() { + return new Url('entity.ami_set_entity.collection'); + } + + /** + * {@inheritdoc} + */ + public function buildForm(array $form, FormStateInterface $form_state) { + // Read Config first to get the Selected Bundles based on the Config + // type selected. Based on that we can set Moderation Options here + + $data = new \stdClass(); + foreach ($this->entity->get('set') as $item) { + /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ + $data = $item->provideDecoded(FALSE); + } + $csv_file_processed = $this->entity->get('processed_data')->getValue(); + if (isset($csv_file_processed[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file_lod */ + $file_lod = $this->entityTypeManager->getStorage('file')->load( + $csv_file_processed[0]['target_id']); + + if ($data !== new \stdClass()) { + // Only Show this form if we got data from the SBF field. + // we can't assume the user did not mess with the AMI set data? + $op = $data->pluginconfig->op ?? NULL; + $ops = [ + 'create', + 'update', + 'patch', + ]; + if (!in_array($op, $ops)) { + $form['status'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t( + 'Error' + ), + '#markup' => $this->t( + 'Sorry. This AMI set has no right Operation (Create, Update, Patch) set. Please fix this or contact your System Admin to fix it.' + ), + ]; + return $form; + } + $form['lod_cleanup'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t('LoD reconciled Clean Up'), + ]; + $access = TRUE; + + if ($file_lod) { + $file_data_all = $this->AmiUtilityService->csv_read($file_lod); + $column_keys = $file_data_all['headers'] ?? []; + + $form['lod_cleanup']['table-row'] = [ + '#type' => 'table', + '#tree' => TRUE, + '#prefix' => '
    ', + '#suffix' => '
    ', + '#header' => $column_keys, + '#empty' => $this->t('Sorry, There are LoD no items!'), + ]; + foreach ($column_keys as $column) { + $arguments = explode('_', $column); + if ($column !== 'original' && $column != 'csv_columns') { + $elements[$column] = [ + '#type' => 'webform_metadata_' . $arguments[0], + '#title' => implode(' ' , $arguments), + ]; + if ($arguments[1] == 'rdftype') { + $elements[$column]['#rdftype'] = $arguments[2] ?? ''; + $elements[$column]['#vocab'] = 'rdftype'; + } + } + } + + foreach ($file_data_all['data'] as $index => $row) { + foreach($file_data_all['headers'] as $key => $header) { + if ($header == 'original' || $header == 'csv_columns') { + $form['lod_cleanup']['table-row'][$index - 1][$header.'-'.$index] = [ + '#type' => 'markup', + '#markup' => $row[$key], + ]; + } + else { + $form['lod_cleanup']['table-row'][$index - 1][$header.'-'.$index] = [ + '#multiple' => 5, + '#multiple__header' => FALSE, + '#multiple__no_items_message' => '', + '#multiple__min_items' => 1, + '#multiple__empty_items' => 0, + '#multiple__sorting' => FALSE, + '#multiple__add_more' => FALSE, + '#multiple__add_more_input' => FALSE, + '#label__title' => 'Label', + '#default_value' => json_decode($row[$key], TRUE), + ] + $elements[$header]; + + } + } + } + \Drupal::service('plugin.manager.webform.element')->processElements($form); + // Attach the webform library. + $form['#attached']['library'][] = 'webform/webform.form'; + } + } + $form = $form + parent::buildForm($form, $form_state); + return $form; + } + else { + $form['status'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t( + 'No Reconciled LoD Found.' + ), + '#markup' => $this->t( + 'Start by visiting the LoD Reconcile tab and running a reconciliation. Once done you can come back here.' + ), + ]; + return $form; + } + } + + /** + * {@inheritdoc} + */ + public function submitForm(array &$form, FormStateInterface $form_state) { + $csv_file_processed = $this->entity->get('processed_data')->getValue(); + if (isset($csv_file_processed[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file_lod */ + $file_lod = $this->entityTypeManager->getStorage('file')->load( + $csv_file_processed[0]['target_id']); + if ($file_lod) { + $file_data_all = $this->AmiUtilityService->csv_read($file_lod); + $column_keys = $file_data_all['headers'] ?? []; + foreach ($file_data_all['data'] as $id => &$row) { + foreach ($file_data_all['headers'] as $index => $column) { + if ($column !== 'original' && $column != 'csv_columns') { + $lod = $form_state->getValue($column . '-' . ((int)$id), NULL); + $row[$index] = json_encode($lod, + JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) ?? ''; + $context_data[$column]['lod'] = $lod; + $context_data[$column]['columns'] = json_decode($row[1], TRUE); + $this->AmiUtilityService->setKeyValuePerAmiSet($row[0], $context_data, $this->entity->id()); + } + } + } + $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); + $success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); + if (!$success) { + $this->messenger()->addError( + $this->t( + 'So Sorry. We could not update the CSV to store your Fixed LoD Reconciled data for @label. Please check your filesystem permissions or contact your System Admin', + [ + '@label' => $this->entity->label(), + ] + ) + ); + } + else { + $this->messenger()->addMessage( + $this->t( + 'Success. your Fixed LoD Reconciled data for @label was updated and is ready to be used.', + [ + '@label' => $this->entity->label(), + ] + ) + ); + } + } + } + $form_state->setRebuild(TRUE); + } +} + diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index 3668420..454958b 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -68,6 +68,9 @@ public static function create(ContainerInterface $container) { ); } + public function getConfirmText() { + return $this->t('Process LoD from Source'); + } public function getQuestion() { return $this->t( @@ -118,6 +121,29 @@ public function buildForm(array $form, FormStateInterface $form_state) { ]; return $form; } + $csv_file_processed = $this->entity->get('processed_data')->getValue(); + if (isset($csv_file_processed[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $lod_file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_processed[0]['target_id'] + ); + if ($lod_file) { + $form['status'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#title' => $this->t( + 'You have LoD reconciled data!' + ), + '#markup' => $this->t( + 'Please use the Edit Reconciled LoD tab to Fix/Correct/Enhance or ' + ), + ]; + $form['status']['download'] = Url::fromUri(file_create_url($lod_file->getFileUri()))->toRenderArray(); + $form['status']['download']['#type'] = 'link'; + $form['status']['download']['#title'] = $this->t('Download LoD CSV'); + + } + } $form['mapping'] = [ '#tree' => TRUE, '#type' => 'fieldset', @@ -131,13 +157,15 @@ public function buildForm(array $form, FormStateInterface $form_state) { $csv_file_reference[0]['target_id'] ); if ($file) { - $reconcile_settings = $data->reconcileconfig->columns ?? []; + + $reconcile_column_settings = $form_state->getValue(['mapping', 'lod_columns'], NULL) ?? ($data->reconcileconfig->columns ?? []); + $reconcile_column_settings = (array) $reconcile_column_settings; $file_data_all = $this->AmiUtilityService->csv_read($file); $column_keys = $file_data_all['headers'] ?? []; $form['mapping']['lod_columns'] = [ '#type' => 'select', '#title' => $this->t('Select which columns you want to reconcile against LoD providers'), - '#default_value' => $reconcile_settings, + '#default_value' => $reconcile_column_settings, '#options' => array_combine($column_keys, $column_keys), '#size' => count($column_keys), '#multiple' => TRUE, @@ -154,9 +182,11 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#prefix' => '
    ', '#suffix' => '
    ', ]; - if ($form_state->getValue(['mapping', 'lod_columns'], NULL)) { + $reconcile_mapping_settings = $form_state->getValue(['mapping', 'lod_columns'], NULL) ?? ($data->reconcileconfig->mappings ?? NULL); + $reconcile_mapping_settings = (array) $reconcile_mapping_settings; + if ($reconcile_column_settings) { - $source_options = $form_state->getValue(['mapping', 'lod_columns']); + $source_options = $reconcile_column_settings; $column_options = [ 'loc;subjects;thing' => 'LoC subjects(LCSH)', 'loc;names;thing' => 'LoC Name Authority File (LCNAF)', @@ -190,7 +220,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#description_display' => 'before', '#empty_option' => $this->t('- Let AMI decide -'), '#empty_value' => NULL, - '#default_value' => [], + '#default_value' => $reconcile_mapping_settings, '#required' => TRUE, '#destination__multiple' => TRUE, '#source' => $source_options, @@ -242,7 +272,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { * {@inheritdoc} */ public function submitForm(array &$form, FormStateInterface $form_state) { - $csv_file_reference = $this->entity->get('source_data')->getValue(); if (isset($csv_file_reference[0]['target_id'])) { /** @var \Drupal\file\Entity\File $file */ @@ -251,6 +280,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { ); } + + $csv_file_processed = $this->entity->get('processed_data')->getValue(); if (isset($csv_file_processed[0]['target_id'])) { /** @var \Drupal\file\Entity\File $file_lod */ @@ -280,11 +311,29 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } } - $data = new \stdClass(); foreach ($this->entity->get('set') as $item) { /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ $data = $item->provideDecoded(FALSE); + // Set also the new config back + $data->reconcileconfig = new \stdClass(); + $data->reconcileconfig->columns = $form_state->getValue(['mapping', 'lod_columns'], NULL); + $data->reconcileconfig->mappings = $form_state->getValue(['lod_options','mappings'], NULL); + $jsonvalue = json_encode($data, JSON_PRETTY_PRINT); + $this->entity->set('set', $jsonvalue); + try { + $this->entity->save(); + } + catch (\Exception $exception) { + $this->messenger()->addError( + t( + 'Ami Set LoD Settings Failed to be persisted because of @message', + ['@message' => $exception->getMessage()] + ) + ); + $form_state->setRebuild(TRUE); + return; + } } if ($file && $file_lod && $data !== new \stdClass()) { $domain = $this->getRequest()->getSchemeAndHttpHost(); @@ -310,6 +359,19 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $column_map_inverted[$label] = array_unique($column_map_inverted[$label]); } } + $normalized_mapping = []; + foreach($mappings as $source_column => $approaches) { + foreach($approaches as $approach) { + $exploded = explode(';', $approach); + $normalized_mapping[$source_column][] = strtolower(implode('_', $exploded)); + } + } + + // This will be used to fetch the right values when passing to the twig template + // Could be read from the config but this is faster during process. + $this->AmiUtilityService->setKeyValueMappingsPerAmiSet($normalized_mapping, $this->entity->id()); + // Clears old values before processing new ones. + $this->AmiUtilityService->cleanKeyValuesPerAmiSet($this->entity->id()); ksort($inverted,SORT_NATURAL); foreach($headers as &$header) { @@ -334,6 +396,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $file_lod_id = $this->AmiUtilityService->csv_append(['headers' => $headers, 'data' => []], $file_lod, NULL, TRUE ); $SetURL = $this->entity->toUrl('canonical', ['absolute' => TRUE]) ->toString(); + $notprocessnow = $form_state->getValue('not_process_now', NULL); $queue_name = 'ami_lod_ado'; if (!$notprocessnow) { @@ -359,6 +422,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { 'domain' => $domain, 'headers' => $headers, 'csv_columns' => $column_map_inverted[$label], + 'normalized_mappings' => $normalized_mapping, 'lodconfig' => $lodconfig, 'set_id' => $this->entity->id(), 'csv' => $file_lod_id, @@ -383,6 +447,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { else { $count = count(array_filter($added)); if ($count) { + $form_state->setRebuild(); $this->submitBatch($form_state, $queue_name, $count); } } @@ -460,7 +525,6 @@ public function ajaxColumPreview($form, FormStateInterface $form_state) { $column_preview = (array) $form_state->getValue(['lod_options','select_preview']) ?? []; $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, $column_preview); - dpm($form_state->getValue(['lod_options','select_preview'])); $rows = $values_per_column[$form_state->getValue(['lod_options','select_preview'])] ?? ['Emtpy Column']; sort($rows, SORT_STRING); @@ -483,6 +547,45 @@ public function ajaxColumPreview($form, FormStateInterface $form_state) { return $response; } + /** + * AJAX callback. + */ + public function ajaxLoDPreview($form, FormStateInterface $form_state) { + return $form['lod_cleanup']; + + + + $response = new AjaxResponse(); + $form['#attached']['library'][] = 'core/drupal.dialog.off_canvas'; + $response->setAttachments($form['#attached']); + + if (!empty($form_state->getValue(['edit']))) { + $entity = $form_state->getFormObject()->getEntity(); + $csv_file_reference = $entity->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + if ($file) { + $form = \Drupal::service('entity.form_builder')->getForm($entity, 'editreconcile', []);; + } + $response->addCommand(new OpenOffCanvasDialogCommand(t('Lod for @label', [ + '@label' => $this->entity->label(), + ]), + $form, ['width' => '70%'])); + if ($form_state->getErrors()) { + // Clear errors so the user does not get confused when reloading. + \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_ERROR); + $form_state->clearErrors(); + } + } + } + return $response; + } + + + } diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php index 3600015..267fff4 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php @@ -130,6 +130,7 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta $cleanelement['#submit'] = [[$this, 'dynamic_field_submit']]; $cleanelement['#executes_submit_callback'] = TRUE; $form['elements_rendered']['jsonfind_element']= $cleanelement; + dpm($cleanelement); dpm($form['elements_rendered']['jsonfind_element']); } diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index f52cbf0..030f4e6 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -1146,7 +1146,13 @@ public static function finishfetchFromSolr($success, $results, $operations) { public function provideTypes(array $config, array $data): array { $keys = $config['solarium_mapping']['cmodel_mapping'] ?? []; - $keys_children = $config['solarium_mapping']['cmodel_children'] ?? []; + // Remove children types if collapse is enabled + if ($config['solarium_mapping']['collapse'] == 0) { + $keys_children = $config['solarium_mapping']['cmodel_children'] ?? []; + } + else { + $keys_children = []; + } $keys = array_unique(array_merge(array_values($keys), array_values($keys_children))); unset($keys_children); return $keys; diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index e075f0a..e9392c1 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -2,6 +2,7 @@ namespace Drupal\ami\Plugin\QueueWorker; +use Drupal\ami\AmiLoDService; use Drupal\ami\AmiUtilityService; use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; @@ -61,13 +62,23 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl */ protected $messenger; + /** + * @var \Drupal\ami\AmiLoDService + */ + protected $AmiLoDService; + /** * Constructor. * * @param array $configuration * @param string $plugin_id * @param mixed $plugin_definition - * @param \Drupal\Core\Entity\EntityTypeManager $entity_field_manager + * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager + * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory + * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service + * @param \Drupal\ami\AmiUtilityService $ami_utility + * @param \Drupal\ami\AmiLoDService $ami_lod + * @param \Drupal\Core\Messenger\MessengerInterface $messenger */ public function __construct( array $configuration, @@ -77,6 +88,7 @@ public function __construct( LoggerChannelFactoryInterface $logger_factory, StrawberryfieldUtilityService $strawberryfield_utility_service, AmiUtilityService $ami_utility, + AmiLoDService $ami_lod, MessengerInterface $messenger ) { parent::__construct($configuration, $plugin_id, $plugin_definition); @@ -85,6 +97,7 @@ public function __construct( $this->strawberryfieldUtility = $strawberryfield_utility_service; $this->AmiUtilityService = $ami_utility; $this->messenger = $messenger; + $this->AmiLoDService = $ami_lod; } /** @@ -111,6 +124,7 @@ public static function create( $container->get('logger.factory'), $container->get('strawberryfield.utility'), $container->get('ami.utility'), + $container->get('ami.lod'), $container->get('messenger') ); } @@ -170,6 +184,12 @@ public function processItem($data) { } } } + // let's attach the LoD Context here + // - We need the columns that were Reconciliated from keystore + // - We need to fetch for this row the un-reconciliated columns and split them into labels + // - We need to fetch for every label from keystore the reconciliated values + // - Push them into $additional_context keyed by vocab and column + $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); if (!$processed_metadata) { @@ -330,7 +350,8 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { } $label_column = $data->adomapping->base->label ?? 'label'; - $label = $processed_metadata[$label_column] ?? NULL; + // Always (becaye of processed metadata via template) try to fetch again the mapped version + $label = $processed_metadata[$label_column] ?? ($processed_metadata['label'] ?? NULL); $property_path_split = explode(':', $property_path); if (!$property_path_split || count($property_path_split) < 2 ) { diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index 379f244..51bc32e 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -70,7 +70,12 @@ class LoDQueueWorker extends QueueWorkerBase implements ContainerFactoryPluginIn * @param array $configuration * @param string $plugin_id * @param mixed $plugin_definition - * @param \Drupal\Core\Entity\EntityTypeManager $entity_field_manager + * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager + * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory + * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service + * @param \Drupal\ami\AmiUtilityService $ami_utility + * @param \Drupal\ami\AmiLoDService $ami_lod + * @param \Drupal\Core\Messenger\MessengerInterface $messenger */ public function __construct( array $configuration, @@ -130,6 +135,19 @@ public function processItem($data) { 'label' => The label passed to the Reconciling URL, 'domain' => This Server's Domain name 'headers' => All headers (LoD Routes) + 'normalized_mappings' => an array with source columns and where to find the results + like + array:2 [▼ + "mods_name_personal_namepart" => array:2 [▼ + 0 => "loc_names_thing" + 1 => "loc_rdftype_personalname" + ] + "mods_genre" => array:3 [▼ + 0 => "loc_rdftype_genreform" + 1 => "getty_aat_fuzzy" + 2 => "wikidata_subjects_thing" + ] + ] 'lodconfig' => an array of LoD URL Route arguments separated by comma in the form of, 0 => "loc;subjects;thing" 1 => "loc;rdftype;GenreForm" @@ -141,7 +159,6 @@ public function processItem($data) { 'attempt' => The number of attempts to process. We always start with a 1 ]; */ - // Load the CSV /** @var \Drupal\file\Entity\File $file_lod */ $file_lod = $this->entityTypeManager->getStorage('file')->load( @@ -153,22 +170,31 @@ public function processItem($data) { } $newdata['headers'] = $data->info['headers']; $newdata['data'][0] = array_fill_keys($newdata['headers'], ''); + $context_data = []; if (isset($data->info['lodconfig']) && is_array($data->info['lodconfig']) && $file_lod) { $lod_route_arguments = $data->info['lodconfig']; - foreach ($lod_route_arguments as $lod_route_argument) { $lod_route_argument_list = explode(';', $lod_route_argument); //@TODO allow the number of results to be set on the \Drupal\ami\Form\amiSetEntityReconcileForm // And passed as an argument. Same with Language? Not all LoD Routes can make use or more languages. $lod_route_column_name = strtolower(implode('_', $lod_route_argument_list)); $lod = $this->AmiLoDService->invokeLoDRoute($data->info['domain'], - $data->info['label'], $lod_route_argument_list[0], - $lod_route_argument_list[1], $lod_route_argument_list[2], 'en', 1); - $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT); - $newdata['data'][0]['original'] = $data->info['label']; - $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']); + $data->info['label'], $lod_route_argument_list[0], + $lod_route_argument_list[1], $lod_route_argument_list[2], 'en', 1); + + $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE) ?? ''; + $newdata['data'][0]['original'] = (string) $data->info['label']; + $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']) ?? ''; + // Context data is simpler + $context_data[$lod_route_column_name]['lod'] = $lod; + $context_data[$lod_route_column_name]['columns'] = $data->info['csv_columns']; } + $this->AmiUtilityService->csv_append($newdata, $file_lod,NULL, FALSE); + // Sets the same data, per label (as key) into keystore so we can fetch it as Twig Context when needed. + //@TODO also do similar if going for a "direct" in that case we replace the columns found in the original data + + $this->AmiUtilityService->setKeyValuePerAmiSet($data->info['label'], $context_data, $data->info['set_id']); } } From 95922b1a591d467d16fd63d0b030dc7a1605c303 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 27 Jul 2021 23:59:06 -0400 Subject: [PATCH 04/42] Because MADS RDF is case Sensitive.. add CONST mapping And our CSV headers are not (lowercase/normalized) And gosh. Well. But this seems to work. Last commit for tonight. --- src/Form/amiSetEntityReconcileCleanUpForm.php | 53 ++++++++++++++++--- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index eaef01d..1358bf7 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -23,6 +23,28 @@ */ class amiSetEntityReconcileCleanUpForm extends ContentEntityConfirmFormBase { + + CONST LOD_COLUMN_TO_ARGUMENTS = [ + 'loc_subjects_thing' => 'loc;subjects;thing', + 'loc_names_thing' => 'loc;names;thing', + 'loc_genreforms_thing' => 'loc;genreForms;thing', + 'loc_graphicmaterials_thing' => 'loc;graphicMaterials;thing', + 'loc_geographicareas_thing' => 'loc;geographicAreas;thing', + 'loc_relators_thing' => 'loc;relators;thing', + 'loc_rdftype_corporatename' => 'loc;rdftype;CorporateName', + 'loc_rdftype_personalname' => 'loc;rdftype;PersonalName', + 'loc_rdftype_familyname' => 'loc;rdftype;FamilyName', + 'loc_rdftype_topic' => 'loc;rdftype;Topic', + 'loc_rdftype_genreform' => 'loc;rdftype;GenreForm', + 'loc_rdftype_geographic' => 'loc;rdftype;Geographic', + 'loc_rdftype_temporal' => 'loc;rdftype;Temporal', + 'loc_rdftype_extraterrestrialarea' => 'loc;rdftype;ExtraterrestrialArea', + 'viaf_subjects_thing' => 'viaf;subjects;thing', + 'getty_aat_fuzzy' => 'getty;aat;fuzzy', + 'getty_aat_terms' => 'getty;aat;terms', + 'getty_aat_exact' => 'getty;aat;exact', + 'wikidata_subjects_thing' => 'wikidata;subjects;thing' + ]; /** * @var \Drupal\ami\AmiUtilityService */ @@ -136,6 +158,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { $access = TRUE; if ($file_lod) { + $file_data_all = $this->AmiUtilityService->csv_read($file_lod); $column_keys = $file_data_all['headers'] ?? []; @@ -147,16 +170,30 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#header' => $column_keys, '#empty' => $this->t('Sorry, There are LoD no items!'), ]; + $elements = []; foreach ($column_keys as $column) { - $arguments = explode('_', $column); if ($column !== 'original' && $column != 'csv_columns') { - $elements[$column] = [ - '#type' => 'webform_metadata_' . $arguments[0], - '#title' => implode(' ' , $arguments), - ]; - if ($arguments[1] == 'rdftype') { - $elements[$column]['#rdftype'] = $arguments[2] ?? ''; - $elements[$column]['#vocab'] = 'rdftype'; + $argument_string = static::LOD_COLUMN_TO_ARGUMENTS[$column] ?? NULL; + if ($argument_string) { + + $arguments = explode(';', $argument_string); + $elements[$column] = [ + '#type' => 'webform_metadata_' . $arguments[0], + '#title' => implode(' ', $arguments), + ]; + + if ($arguments[1] == 'rdftype') { + $elements[$column]['#rdftype'] = $arguments[2] ?? ''; + $elements[$column]['#vocab'] = 'rdftype'; + } + else { + $elements[$column]['#vocab'] = $arguments[1] ?? ''; + } + + } + else { + // Fallback to WIKIDATA + $elements[$column] = ['#type' => 'webform_metadata_wikidata']; } } } From 07bd252a2e736459f1d7cfb569d2b16744fc2d5c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 30 Jul 2021 09:54:01 -0400 Subject: [PATCH 05/42] Make sure sort by PID ::getData is given so offset is consistent --- src/Plugin/ImporterAdapter/SolrImporter.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 030f4e6..7e772c5 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -688,7 +688,7 @@ public function getData(array $config, $page = 0, $per_page = 20): array { $query->setQuery('RELS_EXT_isMemberOfCollection_uri_s:' . $helper->escapePhrase($input)); // PLEASE REMOVE Collection Objects that ARE ALSO part of a compound. WE DO NOT WANT THOSE $query->createFilterQuery('notconstituent')->setQuery('-RELS_EXT_isConstituentOf_uri_ms:[ * TO * ]'); - + $query->addSort('PID', 'asc'); $query->setStart($offset)->setRows($per_page); $query->setFields([ 'PID', @@ -974,7 +974,6 @@ protected function buildDatastreamURL(array $config, \Solarium\QueryType\Select\ // Calculate the destination json key $dsid = 'OBJ'; $mime = $document->fedora_datastream_latest_OBJ_MIMETYPE_ms[0]; - } elseif (!empty($document->fedora_datastream_latest_PDF_MIMETYPE_ms)) { $dsid = 'PDF'; From 579d4b541d3ad5d483f6c0b97545e3a67e0b889c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 3 Aug 2021 18:28:55 -0400 Subject: [PATCH 06/42] Header normalization finally done the right way @alliomeria i will need you to test this when you can. This is finally working as expected. For DCMNY this gives us initially 714 headers! but then the cleanup reduces things significantly to lower numbers (e.g for gsmt:icc to 87 or so) We get more data of course now!! (because of children really providing their correct data when not collapsing). Uff. Was super hard to find my own error but the gist for my own future me: Batch driven Plugins like the SolrImport one provide the headers via their ::getInfo method but because we do not really know until the batch ends how many of the headers will really survive the cleanup we do not set them into the actual Config (config that is stored in each AMI set) which really means we have to manually pass them to the batch! Good, so good --- src/Form/AmiMultiStepIngest.php | 9 +- .../ImporterAdapter/JsonAPIImporter.php | 160 ------------------ src/Plugin/ImporterAdapter/SolrImporter.php | 2 +- 3 files changed, 9 insertions(+), 162 deletions(-) delete mode 100644 src/Plugin/ImporterAdapter/JsonAPIImporter.php diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 5ad66cc..59d25a9 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -533,10 +533,17 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->csv = $fileid; if ($plugin_instance->getPluginDefinition()['batch']) { $data = $this->store->get('data'); + $config = $this->store->get('pluginconfig'); $amisetdata->column_keys = []; $amisetdata->total_rows = NULL; // because we do not know yet $id = $this->AmiUtilityService->createAmiSet($amisetdata); - $batch = $plugin_instance->getBatch($form_state, $this->store->get('pluginconfig'), $amisetdata); + // Batch Plugins may provide their Headers in the ::getInfo method by returning + // A 'headers' key. That may not be in the actual config! + if (empty($config['headers']) || is_array($config['headers']) && count($config['headers']) == 0 ) { + $config['headers'] = $data['headers'] ?? []; + } + + $batch = $plugin_instance->getBatch($form_state, $config, $amisetdata); if ($id) { $url = Url::fromRoute('entity.ami_set_entity.canonical', ['ami_set_entity' => $id]); diff --git a/src/Plugin/ImporterAdapter/JsonAPIImporter.php b/src/Plugin/ImporterAdapter/JsonAPIImporter.php deleted file mode 100644 index ac28a55..0000000 --- a/src/Plugin/ImporterAdapter/JsonAPIImporter.php +++ /dev/null @@ -1,160 +0,0 @@ -getPluginConfiguration(); - $form['url'] = [ - '#type' => 'url', - '#default_value' => isset($config['url']) ? $config['url'] : '', - '#title' => $this->t('Url'), - '#description' => $this->t('The URL to the import resource'), - '#required' => TRUE, - ]; - return $form; - } - - - /** - * {@inheritdoc} - */ - public function import() { - $data = $this->getData(); - if (!$data) { - return FALSE; - } - - if (!isset($data->nodes)) { - return FALSE; - } - - $nodes = $data->nodes; - - $batch_builder = (new BatchBuilder()) - ->setTitle($this->t('Importing ADOs')) - ->setFinishCallback([$this, 'importNodesFinished']); - - $batch_builder->addOperation([$this, 'importNodes'], [$nodes]); - batch_set($batch_builder->toArray()); - - if (PHP_SAPI == 'cli') { - drush_backend_batch_process(); - } - - return TRUE; - } - - - /** - * Batch operation to import the products from the JSON file. - * - * @param $nodes - * @param $context - */ - public function importNodes($nodes, &$context) { - if (!isset($context['results']['imported'])) { - $context['results']['imported'] = []; - } - - if (!$nodes) { - return; - } - - $sandbox = &$context['sandbox']; - if (!$sandbox) { - $sandbox['progress'] = 0; - $sandbox['max'] = count($nodes); - $sandbox['products'] = $nodes; - } - - $slice = array_splice($sandbox['products'], 0, 3); - foreach ($slice as $node) { - $context['message'] = $this->t('Importing product @name', ['@name' => $node->name]); - $this->persistEntity($node); - $context['results']['imported'][] = $node->name; - $sandbox['progress']++; - } - - $context['finished'] = $sandbox['progress'] / $sandbox['max']; - } - - /** - * Callback for when the batch processing completes. - * - * @param $success - * @param $results - * @param $operations - */ - public function importNodesFinished($success, $results, $operations) { - if (!$success) { - $this->messenger()->addmessage($this->t('There was a problem with the batch'), 'error'); - return; - } - - $imported = count($results['imported']); - if ($imported == 0) { - $this->messenger()->addmessage($this->t('No ADOs found to be imported.')); - } - else { - $this->messenger()->addmessage($this->formatPlural($imported, '1 ADO imported.', '@count ADOs imported.')); - } - } - - /** - * Loads the product data from the remote URL. - * - * @return \stdClass - */ - public function getData(array $config, $page = 0, $per_page = 20):array { - - /** @var \Drupal\ami\Entity\ImporterAdapterInterface $importer_config */ - $importer_config = $this->configuration['config']; - $config = $importer_config->getPluginConfiguration(); - $getArguments = $url = isset($config['getargs']) ? $config['getargs'] : NULL; - $url = isset($config['url']) ? $config['url'] : NULL; - if (!$url) { - return []; - } - $default_bundles = $importer_config->getTargetEntityTypes(); - $default_bundle = reset($default_bundles); - // If we have no default bundle setup do not process anything - if (!$default_bundle) { return []; }; - - // Super naive really. - $request = $this->httpClient->get($url); - $json_string = $request->getBody()->getContents(); - //@TODO here is where the Twig template gets applied? - //OR do we do it on Ingest time? (QueueWorker?) - $json = json_decode($json_string, TRUE); - $json_error = json_last_error(); - if ($json_error == JSON_ERROR_NONE) { - $count = $this->enqueue($json); - } - else { - // ERROR - } - return []; - } -} diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 7e772c5..2f4a216 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -1101,7 +1101,7 @@ public static function fetchBatch(array $config, ImporterPluginAdapterInterface $config['prev_index'] = $context['sandbox']['prev_index']; // Pass the headers into the config so we have a unified/normalized version // And not the mess each doc returns - $config['headers'] = $amisetdata->column_keys ?? []; + $config['headers'] = !empty($amisetdata->column_keys) ? $amisetdata->column_keys : (!empty($config['headers']) ? $config['headers'] : []); $config['headerswithdata'] = $context['results']['processed']['headerswithdata'] ?? []; $data = $plugin_instance->getData($config, $context['sandbox']['progress'] + $offset, $increment); From 36f53a8abc94e8719284c8653f8096f9ce78c296 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 4 Aug 2021 10:15:35 -0400 Subject: [PATCH 07/42] use RELS_EXT_isPageNumber_literal_intDerivedFromString_l for page ordering On Books with pages --- src/Plugin/ImporterAdapter/SolrImporter.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 2f4a216..81e706e 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -877,6 +877,7 @@ protected function getDataChildren(array $config, SolariumClient $client, string $escaped = $helper->escapePhrase('info:fedora/' . $input); $escaped_pid = str_replace(':', '_', $input); $query->addSort("RELS_EXT_isSequenceNumberOf{$escaped_pid}_literal_intDerivedFromString_l", 'asc'); + $query->addSort("RELS_EXT_isPageNumber_literal_intDerivedFromString_l", 'asc'); $query->createFilterQuery('constituent')->setQuery('RELS_EXT_isConstituentOf_uri_ms:'.$escaped .' OR RELS_EXT_isPageOf_uri_ms:'.$escaped .' OR RELS_EXT_isMemberOf_uri_ms:'.$escaped ); $query->setQuery('*:*'); $query->setStart(0)->setRows(3000); From fd52fb6899c510cb7c4632deb1f35e1046d7e559 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 19 Aug 2021 18:05:43 -0400 Subject: [PATCH 08/42] Does less manual cleaning and trusts fputcsv() more for escaping @alliomeria this should fix the double encoding issue, but just to be sure, could you try Solr import 7 with a collection that has complex weird characters in its descriptions/titles (maybe NYHS?) and also with a google sheet using, e.g Japanese + "" and & and ' somewher? WE can test tomorrow together. Thing is i removed a lot of code that i "for some reason" had added, so wonder if i will not break now something somewhere else, and thus "testing" is required! --- src/AmiUtilityService.php | 13 +------------ src/Form/AmiMultiStepIngest.php | 8 ++++---- src/Form/amiSetEntityProcessForm.php | 2 +- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 3bac299..7a80b51 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -721,7 +721,6 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { ); return NULL; } - array_walk($data['headers'], 'htmlspecialchars'); // How we want to get the key number that contains the $uuid_key $haskey = array_search($uuid_key, $data['headers']); if ($haskey === FALSE) { @@ -741,7 +740,6 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { } } - array_walk($row, 'htmlspecialchars'); $fh->fputcsv($row); } // PHP Bug! This should happen automatically @@ -795,7 +793,6 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo ); return NULL; } - array_walk($data['headers'], 'htmlspecialchars'); // How we want to get the key number that contains the $uuid_key if ($uuid_key) { $haskey = array_search($uuid_key, $data['headers']); @@ -819,15 +816,7 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo } } } - if (is_array($row) && !empty($row)) { - foreach ($row as &$value) { - if (!is_array($value) && !empty($value)) { - if (!$this->isJson($value)) { - $value = htmlspecialchars($value, ENT_COMPAT, 'UTF-8', FALSE); - } - } - } - } + $fh->fputcsv($row); } // PHP Bug! This should happen automatically diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 59d25a9..6f5ab9f 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -81,7 +81,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { ]) . '', ]; if ($this->step == 1) { - $pluginValue = $this->store->get('plugin', NULL); + $pluginValue = $this->store->get('plugin'); $definitions = $this->importerManager->getDefinitions(); $options = []; foreach ($definitions as $id => $definition) { @@ -100,8 +100,8 @@ public function buildForm(array $form, FormStateInterface $form_state) { } if ($this->step == 2) { $parents = ['pluginconfig']; - $form_state->setValue('pluginconfig', $this->store->get('pluginconfig',[])); - $pluginValue = $this->store->get('plugin', NULL); + $form_state->setValue('pluginconfig', $this->store->get('pluginconfig')); + $pluginValue = $this->store->get('plugin'); // Only create a new instance if we do not have the PluginInstace around /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface | NULL */ $plugin_instance = $this->store->get('plugininstance'); @@ -227,7 +227,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#open' => TRUE, // Controls the HTML5 'open' attribute. Defaults to FALSE. ]; $form['ingestsetup']['custommapping'][$type]['metadata'] = [ - //'#name' => 'metadata_'.$machine_type, + '#name' => 'metadata_'.$machine_type, '#type' => 'select', '#title' => $this->t('Select the data transformation approach for @type', ['@type' => $type]), '#default_value' => isset($mapping['custommapping_settings'][$type]['metadata']) ? $mapping['custommapping_settings'][$type]['metadata'] : reset($metadata), diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index f306502..116b01b 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -256,7 +256,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'Error' ), '#markup' => $this->t( - 'Sorry. You have either no permissions to create ADOs of some configured bundles (Content Types) or the bundles are non existent in this system. Correct your CSV data or ask for access. You can also ask an administrator to process the set for you.', + 'Sorry. You have either no permissions to create ADOs of some configured bundles (Content Types) or the bundles are non existent in this system. Correct your CSV data or ask for access. You can also ask an administrator to process the set for you.' ), ]; return $form; From fb8cd1762796e4a75a022c234e9ee9d800558a9c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 19 Aug 2021 18:06:01 -0400 Subject: [PATCH 09/42] weird left over ; --- src/Plugin/Action/AmiStrawberryfieldJsonAsText.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php index e365dd9..54faf4e 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php @@ -88,7 +88,7 @@ public function executeMultiple(array $objects) { * {@inheritdoc} */ public function execute($entity = NULL) { - ; + /** @var \Drupal\Core\Entity\EntityInterface $entity */ $patched = FALSE; if ($entity) { From 8f1cba735dfe71a5404dd65aad0173700c9b8bb3 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 Sep 2021 18:15:04 -0400 Subject: [PATCH 10/42] Fix some silly select defaults I was resetting the array to give this a default value. happens that i needed the first key, not the first value and now that our "labels" are Capitalized and not == to the value (e.g Direct v/s direct) i was breaking the Form State. What a mess! --- src/Form/AmiMultiStepIngest.php | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 6f5ab9f..3698c5e 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -125,15 +125,13 @@ public function buildForm(array $form, FormStateInterface $form_state) { $column_keys = $plugin_instance->provideKeys($pluginconfig, $data); $mapping = $this->store->get('mapping'); $metadata = [ - 'direct' => 'Direct ', + 'direct' => 'Direct', 'template' => 'Template', - //'webform' => 'Webform', ]; $template = $this->getMetadatadisplays(); - $webform = $this->getWebforms(); + // $webform = $this->getWebforms(); $bundle = $this->getBundlesAndFields(); - $global_metadata_options = $metadata + ['custom' => 'Custom (Expert Mode)']; //Each row (based on its type column) can have its own approach setup(expert mode) $element_conditional = []; @@ -152,24 +150,24 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#description' => $this->t('Columns will be casted to ADO metadata (JSON) using a Twig template setup for JSON output'), ]; - $element_conditional['webform'] =[ + /* $element_conditional['webform'] =[ '#type' => 'select', '#title' => $this->t('Webform'), '#options' => $webform, '#description' => $this->t('Columns are casted to ADO metadata (JSON) by passing/validating Data through an existing Webform'), - ]; + ];*/ $form['ingestsetup']['globalmapping'] = [ '#type' => 'select', '#title' => $this->t('Select the data transformation approach'), - '#default_value' => isset($mapping['globalmapping']) && !empty($mapping['globalmapping']) ? $mapping['globalmapping'] : reset($global_metadata_options), + '#default_value' => isset($mapping['globalmapping']) && !empty($mapping['globalmapping']) ? $mapping['globalmapping'] : key($global_metadata_options), '#options' => $global_metadata_options, '#description' => $this->t('How your source data will be transformed into ADOs Metadata.'), '#required' => TRUE, ]; $newelements_global = $element_conditional; foreach ($newelements_global as $key => &$subelement) { - $subelement['#default_value'] = isset($mapping['globalmapping_settings']['metadata_config'][$key]) ? $mapping['globalmapping_settings']['metadata_config'][$key]: reset(${$key}); + $subelement['#default_value'] = isset($mapping['globalmapping_settings']['metadata_config'][$key]) ? $mapping['globalmapping_settings']['metadata_config'][$key]: key(${$key}); $subelement['#states'] = [ 'visible' => [ ':input[name*="globalmapping"]' => ['value' => $key], @@ -195,7 +193,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { ]; $form['ingestsetup']['bundle'] = $element['bundle']; - $form['ingestsetup']['bundle']['#default_value'] = isset($mapping['globalmapping_settings']['bundle']) ? $mapping['globalmapping_settings']['bundle'] : reset($bundle); + $form['ingestsetup']['bundle']['#default_value'] = isset($mapping['globalmapping_settings']['bundle']) ? $mapping['globalmapping_settings']['bundle'] : key($bundle); $form['ingestsetup']['bundle']['#states'] = [ 'visible' => [ ':input[name*="globalmapping"]' => ['!value' => 'custom'], @@ -230,7 +228,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#name' => 'metadata_'.$machine_type, '#type' => 'select', '#title' => $this->t('Select the data transformation approach for @type', ['@type' => $type]), - '#default_value' => isset($mapping['custommapping_settings'][$type]['metadata']) ? $mapping['custommapping_settings'][$type]['metadata'] : reset($metadata), + '#default_value' => isset($mapping['custommapping_settings'][$type]['metadata']) ? $mapping['custommapping_settings'][$type]['metadata'] : (key($metadata) ?? NULL), '#options' => $metadata, '#description' => $this->t('How your source data will be transformed into ADOs (JSON) Metadata.'), '#required' => TRUE, @@ -241,7 +239,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { // We need to reassign or if not circular references mess with the render array $newelements = $element_conditional; foreach ($newelements as $key => &$subelement) { - $subelement['#default_value'] = isset($mapping['custommapping_settings'][$type]['metadata_config'][$key]) ? $mapping['custommapping_settings'][$type]['metadata_config'][$key] : reset(${$key}); + $subelement['#default_value'] = isset($mapping['custommapping_settings'][$type]['metadata_config'][$key]) ? $mapping['custommapping_settings'][$type]['metadata_config'][$key] : key(${$key}); $subelement['#states'] = [ 'visible' => [ ':input[data-adotype="metadata_'.$machine_type.'"]' => ['value' => $key], @@ -255,7 +253,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { $form['ingestsetup']['custommapping'][$type]['metadata_config'] = $newelements; $form['ingestsetup']['custommapping'][$type]['bundle'] = $element['bundle']; - $form['ingestsetup']['custommapping'][$type]['bundle']['#default_value'] = isset($mapping['custommapping_settings'][$type]['bundle']) ? $mapping['custommapping_settings'][$type]['bundle'] : reset($bundle); + $form['ingestsetup']['custommapping'][$type]['bundle']['#default_value'] = isset($mapping['custommapping_settings'][$type]['bundle']) ? $mapping['custommapping_settings'][$type]['bundle'] : key($bundle); if ($op == 'update' || $op == 'patch') { $files_title = $this->t('Select which columns contain filenames or URLs where we can fetch the files for @type replacing/clearing existing ones if there is already data in the same key in your ADO.', ['@type' => $type]); From a1472beaf2301b183d099f85d628922a9151d095 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 Sep 2021 18:15:18 -0400 Subject: [PATCH 11/42] New Permissions to allow export to CSV --- ami.permissions.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ami.permissions.yml b/ami.permissions.yml index 6a94ed3..19ba982 100644 --- a/ami.permissions.yml +++ b/ami.permissions.yml @@ -1,6 +1,9 @@ Bulk Patch Strawberryfield: title: 'JSON Patch Archipelago Digital Objects in Bulk using Views Batch Operations' description: 'Allows JSON Patch actions to be executed against a "Strawberryfield" bearing Entity. Still on Patch time, individual permissions may apply and/or override this permission' +CSV Export Archipelago Digital Objects: + title: 'Export Archipelago Digital Objects in Bulk using Views Batch Operations to CSV' + description: 'CSV Export actions to be executed against a "Strawberryfield" bearing Entity. Still on export time, individual permissions may apply and/or override this permission' Multi Import Digital Objects: title: 'Import or Update assets from remote sources or files using Archipelago Multi Importer' description: 'Allows AMI to be used to import or update Digital Objects and assets. Still on import(create) and update time, individual permissions may apply and/or override this permission' From d70a72622c04e4d6c3b9f8aab517e9ed6b89b6d2 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 Sep 2021 18:16:59 -0400 Subject: [PATCH 12/42] Small Improvements to Ami Utility Service - Can not handle CSV rows that are either indexed arrays (default) or associative ones. - ::createAmiSet() can be passed a Name (title/label) via the \stdClass $data using $data->name. used for now on the CSV export but as easy-peasy as it gets to be added to the AMI normal Multi Step one (next pull?) --- src/AmiUtilityService.php | 44 ++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 7a80b51..a502e6f 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -27,6 +27,7 @@ use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\file\Entity\File; use Drupal\file\FileUsage\FileUsageInterface; +use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use GuzzleHttp\ClientInterface; use Drupal\strawberryfield\StrawberryfieldUtilityService; use Ramsey\Uuid\Uuid; @@ -442,10 +443,11 @@ public function retrieve_remote_file( catch (\Exception $exception) { $this->messenger()->addError( $this->t( - 'Unable to download remote file from @uri to local @path. Verify URL exists, its openly accessible and destination is writable.', + 'Unable to download remote file from @uri to local @path with error: @error. Verify URL exists, its openly accessible and destination is writable.', [ '@uri' => $uri, '@path' => $path, + '@error' => $exception->getMessage() ] ) ); @@ -671,6 +673,8 @@ public function csv_touch(string $filename = NULL) { * * @param array $data * Same as import form handles, to be dumped to CSV. + * $data should contain two keys, 'headers' and 'data' + * 'data' will be rows and may/not be associative. * * @param string $uuid_key * @@ -735,8 +739,16 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { $row[0] = Uuid::uuid4(); } else { - if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { - $row[$haskey] = Uuid::uuid4(); + // In case Data is passed as an associative Array + if (StrawberryfieldJsonHelper::arrayIsMultiSimple($row)) { + if (!isset($row[$uuid_key]) || empty(trim($row[$uuid_key])) || !Uuid::isValid(trim($row[$uuid_key]))) { + $row[$uuid_key] = Uuid::uuid4(); + } + } + else { + if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { + $row[$haskey] = Uuid::uuid4(); + } } } @@ -768,7 +780,7 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { /** - * Creates an CSV from array and returns file. + * Appends CSV from array and returns file. * * @param array $data * Same as import form handles, to be dumped to CSV. @@ -811,8 +823,16 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo $row[0] = Uuid::uuid4(); } else { - if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { - $row[$haskey] = Uuid::uuid4(); + // In case Data is passed as an associative Array + if (StrawberryfieldJsonHelper::arrayIsMultiSimple($row)) { + if (!isset($row[$uuid_key]) || empty(trim($row[$uuid_key])) || !Uuid::isValid(trim($row[$uuid_key]))) { + $row[$uuid_key] = Uuid::uuid4(); + } + } + else { + if (empty(trim($row[$haskey])) || !Uuid::isValid(trim($row[$haskey]))) { + $row[$haskey] = Uuid::uuid4(); + } } } } @@ -1219,6 +1239,15 @@ public function checkFieldAccess($bundle, AccountInterface $account = NULL) { return $fields; } + /** + * Creates an AMI Set using a stdClass Object + * + * @param \stdClass $data + * + * @return int|mixed|string|null + * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException + * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + */ public function createAmiSet(\stdClass $data) { // See \Drupal\ami\Entity\amiSetEntity $current_user_name = $this->currentUser->getDisplayName(); @@ -1232,10 +1261,11 @@ public function createAmiSet(\stdClass $data) { 'column_keys' => $data->column_keys, 'total_rows' => $data->total_rows, ]; + $name = $data->name ?? 'AMI Set of ' . $current_user_name; $jsonvalue = json_encode($set, JSON_PRETTY_PRINT); /* @var \Drupal\ami\Entity\amiSetEntity $entity */ $entity = $this->entityTypeManager->getStorage('ami_set_entity')->create( - ['name' => 'AMI Set of ' . $current_user_name] + ['name' => $name] ); $entity->set('set', $jsonvalue); $entity->set('source_data', [$data->csv]); From 28969c62f72a32851d98a2db29b699b14e9bafdb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 Sep 2021 18:19:32 -0400 Subject: [PATCH 13/42] This still needs more work. Webform find and replace breaks Form State And mostly the logic generated by \Drupal\views_bulk_operations\Form\ConfigureAction::buildForm where form_state does not survive the ajax triggered dynamic fields that have internally a submit (e.g OpenStreetmaps). So will mark as experimental until i find a strange/dark hack to get this rolling. webform elements, gosh --- .../AmiStrawberryfieldJsonAsWebform.php | 329 +++++++++++++----- 1 file changed, 242 insertions(+), 87 deletions(-) diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php index 267fff4..3c29d21 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php @@ -2,6 +2,8 @@ namespace Drupal\ami\Plugin\Action; +use Drupal\Component\Diff\Diff; +use Drupal\Component\Diff\DiffFormatter; use Drupal\Core\Ajax\AjaxResponse; use Drupal\Core\Ajax\HtmlCommand; use Drupal\Core\Form\FormStateInterface; @@ -10,7 +12,10 @@ use Drupal\views\ViewExecutable; use Drupal\webform\Plugin\WebformElement\WebformManagedFileBase; use Drupal\webform\Plugin\WebformElementEntityReferenceInterface; +use Swaggest\JsonDiff\Exception as JsonDiffException; +use Swaggest\JsonDiff\JsonDiff; use Symfony\Component\DependencyInjection\ContainerInterface; +use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; /** @@ -80,22 +85,25 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta '#prefix' => '
    ', '#suffix' => '
    ', ]; + $webform_element_options = []; - dpm($form_state->getValues()); $webform_entity = NULL; + foreach($form_state->getStorage() as $prop => $value) { + $form_state->set($prop, $value); + } if (!empty($form_state->getValues()) && !empty($form_state->getValue('webform'))) { + /* @var \Drupal\webform\Entity\Webform $webform_entity */ $webform_entity = $this->entityTypeManager->getStorage('webform')->load($form_state->getValue('webform')); $anyelement = $webform_entity->getElementsInitializedAndFlattened(); foreach ($anyelement as $elementkey => $element) { $element_plugin = $this->webformElementManager->getElementInstance($element); - if (($element_plugin->getTypeName() != 'webform_wizard_page') && !($element_plugin instanceof WebformManagedFileBase)) { - $webform_element_options[$elementkey] = $element['#title']; + if (($element_plugin->getTypeName() != 'webform_wizard_page') && !($element_plugin instanceof WebformManagedFileBase) && $element_plugin->isInput($element)) { + $webform_element_options[$elementkey] = ($element['#title'] ?? ' Unamed ') . $this->t('(@elementkey JSON key )',[ + '@elementkey' => $elementkey + ]); } } - //dpm($webform_entity->getElementsDecodedAndFlattened()); - //dpm($webform_entity->getElementInitialized($form_state->getValue('elements'))); - //$form['webform_elements']['elements_render']['jsonfind2']= $webform_entity->getElementInitialized($form_state->getValue('elements')); } $form['webform_elements']['elements_for_this_form'] = [ '#type' => 'select', @@ -104,120 +112,257 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta '#title' => $this->t('Select which Form Element you want to use'), '#options' => count($webform_element_options) ? $webform_element_options: [], '#default_value' => NULL, - '#submit' => [[$this, 'field_submit']], - '#executes_submit_callback' => TRUE, '#empty_option' => $this->t('- Please select an element -'), '#ajax' => array( - 'callback' => [get_class($this),'webformElementAjaxCallback'], + 'callback' => [$this,'webformElementAjaxCallback'], 'wrapper' => 'webform-elements-render-wrapper', 'event' => 'change', ), ]; - dpm($form_state->getValues()); - dpm($form_state->getValue(['webform_elements,elements_for_this_form'])); - if ($webform_entity && $form_state->getValue(['webform_elements','elements_for_this_form'])){ - $myelement = $webform_entity->getElement($form_state->getValue(['webform_elements','elements_for_this_form'])); - $cleanelement = []; - foreach($myelement as $key => $value) { - if (strpos($key, '#webform') === FALSE && strpos($key, '#access_') === FALSE) { - $cleanelement[$key] = $value; - } - } - $cleanelement['#element_validate'][] = [get_class($this),'elementDynamicValidate']; - $cleanelement['#required'] = FALSE; - $cleanelement['#validated'] = FALSE; - $cleanelement['#default_value'] = NULL; - $cleanelement['#submit'] = [[$this, 'dynamic_field_submit']]; - $cleanelement['#executes_submit_callback'] = TRUE; - $form['elements_rendered']['jsonfind_element']= $cleanelement; - dpm($cleanelement); - dpm($form['elements_rendered']['jsonfind_element']); - } + $chosen_element = $form_state->getValue(['webform_elements','elements_for_this_form'], NULL); + if ($webform_entity && $chosen_element) { + //$myelement1 = $webform_entity->getElementsDecodedAndFlattened(); + $myelement = $webform_entity->getElementDecoded($form_state->getValue(['webform_elements','elements_for_this_form'])); + //$myelement2 = \Drupal::service('plugin.manager.webform.element')->processElements($myelement); + $libraries = $webform_entity->getSubmissionForm()['#attached']['library'] ?? []; + $form['#attached']['library'] = ($form['#attached']['library'] ?? []) + $libraries; + $cleanelement = []; + foreach($myelement as $key => $value) { + if (strpos($key, '#webform') === FALSE && strpos($key, '#access_') === FALSE) { + $cleanelement[$key] = $value; + } + } + //$cleanelement['#element_validate'][] = [$this,'elementDynamicValidate']; + $cleanelement['#required'] = FALSE; + $cleanelement['#validated'] = FALSE; + //$cleanelement['#default_value'] = NULL; + $form['elements_rendered']['jsonfind_element']= $cleanelement; - $form['jsonfind'] = [ - '#type' => 'textfield', - '#title' => t('JSON Search String'), - '#default_value' => $this->configuration['jsonfind'], - '#size' => '40', - '#description' => t('A string you want to find inside your JSON.'), - ]; - $form['jsonreplace'] = [ - '#type' => 'textfield', - '#title' => t('JSON Replacement String'), - '#default_value' => $this->configuration['jsonreplace'], - '#size' => '40', - '#description' => t('Replacement string for the matched search'), - ]; + $form['elements_rendered']['jsonfind_element']['#title'] = $this->t('Value to Search for in @elementkey JSON key', [ '@elementkey' => $chosen_element]); + $form['elements_rendered']['jsonreplace_element']= $cleanelement; + $form['elements_rendered']['jsonreplace_element']['#title'] = $this->t('Value to replace with in @elementkey JSON key', [ '@elementkey' => $chosen_element]); + } $form['simulate'] = [ '#title' => $this->t('only simulate and debug affected JSON'), '#type' => 'checkbox', '#default_value' => ($this->configuration['simulate'] === FALSE) ? FALSE : TRUE, ]; + $form['actions']['submit']['#ajax'] = [ + 'callback' => 'configureActionAjaxCallback', + ]; return $form; } - /** - * Submission handler for condition changes in - */ - public function field_submit(array &$form, FormStateInterface $form_state) { - - if (empty($form_state->getValue('webform'))) { - $form_state->unsetValue(['webform_elements','elements_for_this_form']); - } - if (empty($form_state->getValue(['webform_elements','elements_for_this_form']))) { - - } - // We have to unset this is a form element's needed value may not match - // a new one's need. E.g textfield v/s entity autocomplete - //$form_state->unsetValue(['elements_rendered','jsonfind_element']); - $form_state->setRebuild(TRUE); - } - /** - * Submission handler for condition changes in - */ - public function dynamic_field_submit(array &$form, FormStateInterface $form_state) { + public function elementDynamicValidate(&$element, FormStateInterface $form_state) { + $form_state->set('holi','chao'); } - public static function elementDynamicValidate(&$element, FormStateInterface $form_state) { - - //$element['#value'] = array_filter($element['#value']); - error_log(var_export($element,true)); - error_log(var_export($form_state->getValue('elements_rendered'),true)); - $form_state->set('holi','chao'); - // $form_state->setValueForElement($element, $element['#value']); + public function configureActionAjaxCallback(array $form, FormStateInterface $form_state) { + //$form_state->setRebuild(TRUE); + return $form; } public function webformAjaxCallback(array $form, FormStateInterface $form_state) { return $form['webform_elements']; } - public static function webformElementAjaxCallback(array $form, FormStateInterface $form_state) { + + + public function webformElementAjaxCallback(array $form, FormStateInterface $form_state) { return $form['elements_rendered']; - /*$item = [ - '#type' => 'item', - '#title' => $this->t('Ajax value'), - '#markup' => microtime(), - ]; - $response = new AjaxResponse(); - $response->addCommand(new HtmlCommand('#form-elements-render-wrapper', $item)); - return $response;*/ } public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { - dpm($form_state->getValues()); - $this->configuration['jsonfind'] = $form_state->getValue('jsonfind'); - $this->configuration['jsonreplace'] = $form_state->getValue('jsonreplace'); - $this->configuration['simulate'] = $form_state->getValue('simulate'); + // Hacky but its the way we can do this dynamically + $jsonfind = $form_state->getUserInput()['elements_rendered']['jsonfind_element'] ?? []; + $jsonreplace = $form_state->getUserInput()['elements_rendered']['jsonreplace_element'] ?? []; + $chosen_element = $form_state->getValue(['webform_elements','elements_for_this_form'], NULL); + // $form_state->setRebuild(TRUE); + if ($chosen_element) { + $jsonfind_ready[$chosen_element] = $jsonfind; + $jsonreplace_ready[$chosen_element] = $jsonreplace; + $this->configuration['jsonfind'] = json_encode($jsonfind_ready) ?? '{}'; + $this->configuration['jsonreplace'] = json_encode($jsonreplace_ready) ?? '{}'; + $this->configuration['simulate'] = $form_state->getValue('simulate'); + } } /** * {@inheritdoc} */ - public function validateConfigurationForm(array &$form, FormStateInterface $form_state) { + public function execute($entity = NULL) { + /** @var \Drupal\Core\Entity\EntityInterface $entity */ + $patched = FALSE; + if ($entity) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $entity + )) { + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $entity->get($field_name); + /* @var \Drupal\strawberryfield\Field\StrawberryFieldItemList $field */ + $entity = $field->getEntity(); + /** @var $field \Drupal\Core\Field\FieldItemList */ + $patched = FALSE; + foreach ($field->getIterator() as $delta => $itemfield) { + /** @var $itemfield \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem */ + $main_prop = $itemfield->mainPropertyName(); + $fullvaluesoriginal = $itemfield->provideDecoded(TRUE); + $count = 0; + $fullvaluesjson = []; + // This is how it goes. + // First we get the original key from $this->configuration['jsonfind'] + // Then we search inside the original data and see if its single valued + // or multivalued + // If we find the match (which means for each property of the jsonfind there + // needs to be a match in the original + // we replace the existing value with the following condition + // - If jsonreplace is empty, we delete the original + // - If not we replace the found one + $decoded_jsonfind = json_decode($this->configuration['jsonfind'], TRUE); + $key = reset(array_keys($decoded_jsonfind)); + if ($key) { + if (!empty($fullvaluesoriginal[$key])) { + $isAssociativeOriginal = StrawberryfieldJsonHelper::arrayIsMultiSimple($fullvaluesoriginal[$key]); + if (!$isAssociative) { + foreach($fullvaluesoriginal[$key] as &$item) { + if ($item == $decoded_jsonfind[$key]) { + // Exact Array to Array 1:1 match + $item = $decoded_jsonfind[$key]; + $patched = TRUE; + } + } + } + else { + // Means we have a single Object not a list in the source. + if ($fullvaluesoriginal[$key] == $decoded_jsonfind[$key]) { + $fullvaluesoriginal[$key] = $decoded_jsonfind[$key]; + $patched = TRUE; + } + } + } + } + + + + + + // Now try to decode fullvalues + $fullvaluesjson = json_decode($fullvalues, TRUE, 50); + $json_error = json_last_error(); + if ($json_error != JSON_ERROR_NONE) { + $visualjsondiff = new Diff(explode(PHP_EOL,$stringvalues), explode(PHP_EOL,$fullvalues)); + $formatter = new DiffFormatter(); + $output = $formatter->format($visualjsondiff); + //$this->messenger()->addMessage($output); + $this->messenger()->addError( + $this->t( + 'We could not safely find and replace metadata for @entity. Your result after the replacement may not be a valid JSON.', + [ + '@entity' => $entity->label() + ] + )); + $this->messenger()->addMessage($output); + return $patched; + } + try { + if ($this->configuration['simulate']) { + $this->messenger()->addMessage('In simulation Mode'); + if ($fullvalues == $stringvalues) { + $patched = FALSE; + $this->messenger()->addStatus($this->t( + 'No Match for @entity, so skipping', + [ + '@entity' => $entity->label() + ] + )); + return $patched; + } + $r = new JsonDiff( + $fullvaluesoriginal, + $fullvaluesjson, + JsonDiff::REARRANGE_ARRAYS + JsonDiff::SKIP_JSON_MERGE_PATCH + JsonDiff::COLLECT_MODIFIED_DIFF + ); + // We just keep track of the changes. If none! Then we do not set + // the formstate flag. + $message = $this->formatPlural($r->getDiffCnt(), + 'Simulated patch: Digital Object @label would get one modification', + 'Simulated patch: Digital Object @label would get @count modifications', + ['@label' => $entity->label()]); + + $this->messenger()->addMessage($message); + /*$modified_diff = $r->getModifiedDiff(); + foreach ($modified_diff as $modifiedPathDiff) { + $this->messenger()->addMessage($modifiedPathDiff->path); + $this->messenger()->addMessage($modifiedPathDiff->original); + $this->messenger()->addMessage($modifiedPathDiff->new); + }*/ + + } else { + if ($fullvalues == $stringvalues) { + $patched = FALSE; + $this->messenger()->addStatus($this->t( + 'No change for @entity, so skipping', + [ + '@entity' => $entity->label() + ] + )); + return $patched; + } + $patched = TRUE; + if (!$itemfield->setMainValueFromArray((array) $fullvaluesjson)) { + $this->messenger()->addError( + $this->t( + 'We could not persist the metadata for @entity. Your result after the replacement may not be a valid JSON. Please contact your Site Admin.', + [ + '@entity' => $entity->label() + ] + ) + ); + $patched = FALSE; + }; + } + } catch (JsonDiffException $exception) { + $patched = FALSE; + $this->messenger()->addWarning( + $this->t( + 'Patch could not be applied for @entity', + [ + '@entity' => $entity->label() + ] + ) + ); + } + } + } + if ($patched) { + $this->logger->notice('%label had the following find: @jsonsearch and replace:@jsonreplace applied', [ + '%label' => $entity->label(), + '@jsonsearch' => '
    '.$this->configuration['jsonfind'].'
    ', + '@jsonreplace' => '
    '.$this->configuration['jsonreplace'].'
    ', + + ]); + if (!$this->configuration['simulate']) { + $entity->save(); + } + } + return $patched; + } + } } + + /** + * {@inheritdoc} + */ + public function validateConfigurationForm(array &$form, FormStateInterface $form_state) { + foreach($form_state->getStorage() as $prop => $value) { + $form_state->set($prop, $value); + } + } + /** * {@inheritdoc} */ @@ -244,7 +389,17 @@ public static function customAccess(AccountInterface $account, ViewExecutable $v return TRUE; } - - - + /** + * {@inheritdoc} + */ + public function __sleep() { + $obj_vars = get_object_vars($this); + $vars = parent::__sleep(); + // Well why? Because of loggers include Request Stack and this fails + // @see https://www.drupal.org/project/drupal/issues/3055287 + // Not the same but close. + $unserializable[] = 'logger'; + $vars = array_diff($vars, $unserializable); + return $vars; + } } From 84ed6ab988e6813e82f2c5afc52851b240acbcda Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 2 Sep 2021 18:20:02 -0400 Subject: [PATCH 14/42] Full Export Action to CSV Quite smart may i say! So much and so little code really. Test it, let me know how it goes --- .../Action/AmiStrawberryfieldCSVexport.php | 559 ++++++++++++++++++ 1 file changed, 559 insertions(+) create mode 100644 src/Plugin/Action/AmiStrawberryfieldCSVexport.php diff --git a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php new file mode 100644 index 0000000..9a7f47b --- /dev/null +++ b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php @@ -0,0 +1,559 @@ +entityTypeManager = $entity_type_manager; + $this->currentUser = $current_user; + $this->tempStore = $temp_store_factory->get('amiaction_csv_export'); + $this->strawberryfieldUtility = $strawberryfield_utility_service; + $this->logger = $logger; + $this->renderer = $renderer; + $this->streamWrapperManager = $streamWrapperManager; + $this->AmiUtilityService = $ami_utility; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + $container->get('tempstore.private'), + $container->get('current_user'), + $container->get('strawberryfield.utility'), + $container->get('logger.factory')->get('action'), + $container->get('renderer'), + $container->get('stream_wrapper_manager'), + $container->get('ami.utility') + ); + } + + + /** + * {@inheritdoc} + */ + public function setContext(array &$context) { + $this->context['sandbox'] = &$context['sandbox']; + foreach ($context as $key => $item) { + if ($key === 'sandbox') { + continue; + } + $this->context[$key] = $item; + } + } + + /** + * {@inheritdoc} + */ + public function setView(ViewExecutable $view) { + $this->view = $view; + } + + /** + * {@inheritdoc} + */ + public function executeMultiple(array $objects) { + $results = $response = $errors = []; + foreach ($objects as $entity) { + $result = $this->execute($entity); + if ($result) { + $this->processHeader(array_keys($result)); + } else { + $errors[] = $this->t("Errors on: item @label, could not be exported
    ", + ['@label' => $entity->label()]); + } + $results[] = $result; + //$this->context['sandbox']['processed']++; + } + $this->saveRows($results); + $response[] = $this->t("@total successfully processed items in batch @batch", + [ + '@total' => count(array_filter($results)), + '@batch' => $this->context['sandbox']['current_batch'] + ]); + // Generate the output file if the last row has been processed. + if (!isset($this->context['sandbox']['total']) || ($this->context['sandbox']['processed'] + $this->context['sandbox']['batch_size']) >= $this->context['sandbox']['total']) { + $output = $this->generateOutput(); + $this->sendToFile($output); + $response[] = $this->t("CSV export done: @total items processed.", + ['@total' => $this->context['sandbox']['total']]); + } + return array_merge($response, $errors); + } + + /** + * {@inheritdoc} + */ + public function execute($entity = NULL) { + + /** @var \Drupal\Core\Entity\EntityInterface $entity */ + $row = []; + + if ($entity) { + if ($sbf_fields = $this->strawberryfieldUtility->bearsStrawberryfield( + $entity + )) { + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $entity->get($field_name); + /* @var \Drupal\strawberryfield\Field\StrawberryFieldItemList $field */ + $entity = $field->getEntity(); + /** @var $field \Drupal\Core\Field\FieldItemList */ + foreach ($field->getIterator() as $delta => $itemfield) { + /** @var $itemfield \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem */ + $fullvalues = $itemfield->provideDecoded(TRUE); + $row['node_uuid'] = $entity->uuid(); + if ($this->configuration['expand_nodes_to_uuids']) { + // UUID-dify the mappings here + if (isset($fullvalues['ap:entitymapping']['entity:node']) && is_array($fullvalues['ap:entitymapping']['entity:node'])) { + foreach ($fullvalues['ap:entitymapping']['entity:node'] as $jsonkey_with_nodeids) { + if ($this->configuration['create_ami_set']) { + $this->context['sandbox']['parent_columns'] = $this->context['sandbox']['parent_columns'] ?? []; + $this->context['sandbox']['parent_columns'][] = $jsonkey_with_nodeids; + $this->context['sandbox']['parent_columns'] = array_unique($this->context['sandbox']['parent_columns']); + } + if (isset($fullvalues[$jsonkey_with_nodeids])) { + if (is_array($fullvalues[$jsonkey_with_nodeids])) { + foreach ($fullvalues[$jsonkey_with_nodeids] as $key => $nodeid) { + $related_entity = $this->entityTypeManager->getStorage('node') + ->load($nodeid); + if ($related_entity) { + $fullvalues[$jsonkey_with_nodeids][$key] = $related_entity->uuid(); + } + } + // This will string-ify multiple related NODES to a single ; separated list of UUIDs. + $fullvalues[$jsonkey_with_nodeids] = implode(";", + $fullvalues[$jsonkey_with_nodeids]); + } + else { + $related_entity = $this->entityTypeManager->getStorage('node') + ->load($fullvalues[$jsonkey_with_nodeids]); + if ($related_entity) { + $fullvalues[$jsonkey_with_nodeids] = $related_entity->uuid(); + } + } + } + } + } + } + // If two types have different bundles only one will win. Do not do that ok? + if ($this->configuration['create_ami_set']) { + $this->context['sandbox']['type_bundle'] = $this->context['sandbox']['type_bundle'] ?? []; + $this->context['sandbox']['type_bundle'][$fullvalues['type']] = $entity->bundle().':'.$field_name; + } + + if ($this->configuration['no_media']) { + // Remove all as:type keys and keys with files + if (isset($fullvalues['ap:entitymapping']['entity:file']) && is_array($fullvalues['ap:entitymapping']['entity:file'])) { + $fullvalues = array_diff_key($fullvalues, array_flip(array_merge($fullvalues['ap:entitymapping']['entity:file'], StrawberryfieldJsonHelper::AS_FILE_TYPE))); + } + } + if ($this->configuration['migrate_media'] && !$this->configuration['no_media']) { + $ordersubkey = 'sequence'; + if (isset($fullvalues['ap:entitymapping']['entity:file']) && is_array($fullvalues['ap:entitymapping']['entity:file'])) { + // Clear the original Keys (with File ids out first) + $fullvalues = array_diff_key($fullvalues, array_flip(array_merge($fullvalues['ap:entitymapping']['entity:file']))); + foreach (StrawberryfieldJsonHelper::AS_FILE_TYPE as $as_key) { + if (isset($fullvalues[$as_key]) && is_array($fullvalues[$as_key])) { + StrawberryfieldJsonHelper::orderSequence($fullvalues, $as_key, + $ordersubkey); + foreach ($fullvalues[$as_key] as $mediaentry) { + if ($mediaentry['dr:uuid'] && $mediaentry['name'] && $mediaentry['dr:for']) { + $link = Url::fromRoute('format_strawberryfield.iiifbinary', + [ + 'node' => $entity->id(), + 'uuid' => $mediaentry['dr:uuid'], + 'format' => $mediaentry['name'] + ], + ['absolute' => TRUE] + )->toString(); + $fullvalues[$mediaentry['dr:for']] = empty($fullvalues[$mediaentry['dr:for']]) ? $link : $fullvalues[$mediaentry['dr:for']] . ';' . $link; + if ($this->configuration['create_ami_set']) { + // set context for all accumulated file columns + $this->context['sandbox']['file_columns'] = $this->context['sandbox']['file_columns'] ?? []; + $this->context['sandbox']['file_columns'][] = $mediaentry['dr:for']; + $this->context['sandbox']['file_columns'] = array_unique($this->context['sandbox']['file_columns']); + } + } + } + } + } + // Now remove technical metadata + $fullvalues = array_diff_key($fullvalues, array_flip(StrawberryfieldJsonHelper::AS_FILE_TYPE)); + } + } + foreach($fullvalues as $key => $fullvalue) { + $row[$key] = is_array($fullvalue) ? json_encode($fullvalue, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_HEX_QUOT ,512) : $fullvalue; + } + } + } + } + } + return $row; + } + + + protected function generateOutput() { + $rows = []; + for ($i = 1; $i <= $this->context['sandbox']['current_batch']; $i++) { + $chunk = $this->tempStore->get($this->context['sandbox']['cid_prefix'] . $i); + if ($chunk) { + $keys = $this->context['sandbox']['headers']; + $template = array_fill_keys($keys, NULL); + $new_chunk = array_map(function($item) use ($template) { + return array_merge($template, $item); + }, $chunk); + $rows = array_merge($rows, $new_chunk); + $this->tempStore->delete($this->context['sandbox']['cid_prefix'] . $i); + } + } + return $rows; + } + + + + /** + * Output generated string to file. Message user. + * + * @param string $output + * The string that will be saved to a file. + */ + protected function sendToFile($output) { + if (!empty($output)) { + $data['data'] = $output; + $data['headers'] = $this->context['sandbox']['headers']; + $file_id = $this->AmiUtilityService->csv_save($data, 'node_uuid'); + if ($file_id && $this->configuration['create_ami_set']) { + $amisetdata = new \stdClass(); + $amisetdata->plugin = 'spreadsheet'; + $amisetdata->pluginconfig->op = 'update'; + $amisetdata->pluginconfig->file = [$file_id]; + $amisetdata->adomapping->base->label = "label"; + $amisetdata->adomapping->uuid->uuid = "node_uuid"; + $amisetdata->adomapping->parents = $this->context['sandbox']['parent_columns'] ?? []; + $amisetdata->adomapping->autouuid = 0; + $amisetdata->mapping->globalmapping = "custom"; + // Set by ::execute() + foreach ($this->context['sandbox']['type_bundle'] as $type => $bundle_field) { + $amisetdata->mapping->custommapping_settings->{$type}->files = $this->context['sandbox']['file_columns'] ?? []; + $amisetdata->mapping->custommapping_settings->{$type}->bundle = $bundle_field; + $amisetdata->mapping->custommapping_settings->{$type}->metadata = "direct"; + } + $amisetdata->csv = $file_id; + $amisetdata->column_keys = $this->context['sandbox']['headers'] ?? []; + $amisetdata->total_rows = count($output); + $amisetdata->zip = null; + $amisetdata->name = trim($this->configuration['create_ami_set_name']); + $amisetdata->name = strlen($amisetdata->name) > 0 ? $amisetdata->name : NULL; + $amiset_id = $this->AmiUtilityService->createAmiSet($amisetdata); + if ($amiset_id) { + $url = Url::fromRoute('entity.ami_set_entity.canonical', + ['ami_set_entity' => $amiset_id]); + $this->messenger() + ->addStatus($this->t('Well Done! New AMI Set was created and you can see it here', + ['@url' => $url->toString()])); + } + } + } + } + + + public function buildPreConfigurationForm(array $element, array $values, FormStateInterface $form_state) { + } + + public function buildConfigurationForm(array $form, FormStateInterface $form_state) { + $form['expand_nodes_to_uuids'] = [ + '#type' => 'checkbox', + '#title' => $this->t('Expand related ADOs to UUIDs'), + '#default_value' => ($this->configuration['expand_nodes_to_uuids'] === FALSE) ? FALSE : TRUE, + '#size' => '40', + '#description' => t('When enabled all related ADOs (ismemberof, etc) are going to be expandad to their UUIDs. This allows changes to parentship to be made on the CSV.'), + ]; + $form['no_media'] = [ + '#type' => 'checkbox', + '#title' => $this->t('Do not export Media/Files'), + '#default_value' => ($this->configuration['no_media'] === FALSE) ? FALSE : TRUE, + '#size' => '40', + '#description' => t('When enabled File references and their associated technical Metadata (as:filetype JSON keys, e.g as:image) will be skipped. This allows pure Descriptive Metadata to be exported to CSV'), + ]; + + $form['migrate_media'] = [ + '#title' => $this->t('Convert Media to Portable Absolute URLs.'), + '#description' => $this->t('When enabled all File references will be converted to absolute URLs and their associated technical Metadata (as:filetype JSON keys, e.g as:image) will be skipped. This allows CSVs to be used to ingest new ADOs in other repositories.'), + '#type' => 'checkbox', + '#default_value' => ($this->configuration['migrate_media'] === FALSE) ? FALSE : TRUE, + ]; + $form['create_ami_set'] = [ + '#title' => $this->t('Attach CSV to a new AMI Set.'), + '#description' => $this->t('When checked a new AMI set with the exported data will be created and configured for "Updating" existing ADOs.'), + '#type' => 'checkbox', + '#default_value' => ($this->configuration['create_ami_set'] === FALSE) ? FALSE : TRUE, + ]; + $form['create_ami_set_name'] = [ + '#title' => $this->t('Please Give your AMI Set a name. If empty we will create a (quite) generic one for you'), + '#type' => 'textfield', + '#size' => '40', + '#default_value' => $this->configuration['create_ami_set_name'] + ]; + return $form; + } + + public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { + $this->configuration['expand_nodes_to_uuids'] = $form_state->getValue('expand_nodes_to_uuids'); + $this->configuration['no_media'] = $form_state->getValue('no_media'); + $this->configuration['migrate_media'] = $form_state->getValue('migrate_media'); + $this->configuration['create_ami_set'] = $form_state->getValue('create_ami_set'); + $this->configuration['create_ami_set_name'] = $form_state->getValue('create_ami_set_name'); + } + + /** + * {@inheritdoc} + */ + public function validateConfigurationForm(array &$form, FormStateInterface $form_state) { + + } + + /** + * {@inheritdoc} + */ + public function defaultConfiguration() { + return [ + 'expand_nodes_to_uuids' => FALSE, + 'no_media' => FALSE, + 'migrate_media' => FALSE, + 'create_ami_set' => FALSE, + 'create_ami_set_name' => 'CSV Export/Import AMI Set', + ]; + } + + /** + * Default custom access callback. + * + * @param \Drupal\Core\Session\AccountInterface $account + * The user the access check needs to be preformed against. + * @param \Drupal\views\ViewExecutable $view + * The View Bulk Operations view data. + * + * @return bool + * Has access. + */ + public static function customAccess(AccountInterface $account, ViewExecutable $view) { + return TRUE; + } + + /** + * {@inheritdoc} + */ + public function access($object, AccountInterface $account = NULL, $return_as_object = FALSE) { + + /** @var \Drupal\Core\Entity\EntityInterface $object */ + $result = $object->access('view', $account, TRUE) + ->andIf(AccessResult::allowedIfHasPermission($account, 'CSV Export Archipelago Digital Objects')); + return $return_as_object ? $result : $result->isAllowed(); + } + + /** + * {@inheritdoc} + */ + public function calculateDependencies() { + $module_name = $this->entityTypeManager + ->getDefinition($this->getPluginDefinition()['type']) + ->getProvider(); + return ['module' => [$module_name]]; + } + + /** + * Saves batch data into Private storage. + * + * @param array $rows + * Rows from batch. + */ + protected function saveRows(array &$rows) { + $this->tempStore->set($this->getCid(), $rows); + unset($rows); + } + /** + * Saves combined header data into the batch context + * + * @param array $rows + * Rows from batch. + */ + protected function processHeader(array $header) { + if (!isset($this->context['sandbox']['headers'])) { + $this->context['sandbox']['headers'] = $header; + } + else { + $this->context['sandbox']['headers'] = array_unique(array_merge($this->context['sandbox']['headers'], $header)); + } + } + + /** + * Gets Cache ID for current batch. + * + * @return string + * Cache unique ID for Temporary storage. + */ + protected function getCid() { + if (!isset($this->context['sandbox']['cid_prefix'])) { + $this->context['sandbox']['cid_prefix'] = $this->context['view_id'] . ':' + . $this->context['display_id'] . ':' . $this->context['action_id'] . ':' + . md5(serialize(array_keys($this->context['list']))) . ':'; + } + + return $this->context['sandbox']['cid_prefix'] . $this->context['sandbox']['current_batch']; + } + + /** + * Prepares sandbox data (header and cache ID). + * + * @return array + * Table header. + */ + protected function getHeader() { + // Build output header array. + $header = &$this->context['sandbox']['header']; + if (!empty($header)) { + return $header; + } + return $this->setHeader(); + } + + +} From 8fe9a340987e831a8644aae5bfb8eb4e352277f8 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 20 Sep 2021 22:45:06 -0400 Subject: [PATCH 15/42] Another pass on making LoD reconciling better - This adds new methods for offsetting and paging CSV data (was a pain because JSON pretty printed has breaklines). Since i was here i also added options for getting headers (or not) for offset and paged CSV access - Adds an actual pager to the Edit Reconciling page - Nothing of this is Done-done. I need to now change the "saving" algorithm to allow partial / offset saves to happen. Probably using the already created KeyValue for the set instead of reading the actual Form data as we did with large full sets and also need the "this was fixed" checkbox to be added to a new Column More tomorrow @alliomeria ! --- src/AmiUtilityService.php | 79 ++++++++++++++++--- src/Form/amiSetEntityReconcileCleanUpForm.php | 10 ++- src/Form/amiSetEntityReconcileForm.php | 2 - 3 files changed, 75 insertions(+), 16 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index a502e6f..e888ec7 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -32,6 +32,7 @@ use Drupal\strawberryfield\StrawberryfieldUtilityService; use Ramsey\Uuid\Uuid; use Drupal\Core\File\Exception\FileException; +use SplFileObject; class AmiUtilityService { @@ -853,10 +854,15 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo /** * @param \Drupal\file\Entity\File $file - * + * @param int $offset + * Where to start to read the file + * @param int $count + * Number of results, 0 will fetch all + * @param bool $always_include_header + * Always return header even with an offset. * @return array|null */ - public function csv_read(File $file) { + public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = true) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { @@ -865,23 +871,50 @@ public function csv_read(File $file) { $url = $wrapper->realpath(); $spl = new \SplFileObject($url, 'r'); + if ($offset > 0) { + // We only set this flags when an offset is present. + // Because if not fgetcsv is already dealing with multi line CSV rows. + $spl->setFlags( + SplFileObject::READ_CSV | + SplFileObject::READ_AHEAD | + SplFileObject::SKIP_EMPTY | + SplFileObject::DROP_NEW_LINE + ); + } + + if ($offset > 0 && !$always_include_header) { + // If header needs to be included then we offset later on + $spl->seek($offset); + } $data = []; - while (!$spl->eof()) { + while (!$spl->eof() && ($count == 0 || $spl->key() < ($offset + $count))) { $data[] = $spl->fgetcsv(); + if ($offset > 0 && $always_include_header) { + $spl->seek($offset); + $offset = $offset + 1; + // So we do not process this again. + $always_include_header = false; + } } $table = []; $maxRow = 0; $highestRow = count($data); + if ($always_include_header) { + $rowHeaders = $data[0]; + $rowHeaders_utf8 = array_map('stripslashes', $rowHeaders); + $rowHeaders_utf8 = array_map('utf8_encode', $rowHeaders_utf8); + $rowHeaders_utf8 = array_map('strtolower', $rowHeaders_utf8); + $rowHeaders_utf8 = array_map('trim', $rowHeaders_utf8); + $headercount = count($rowHeaders); + } + else { + $rowHeaders = $rowHeaders_utf8 = []; + $not_a_header = $data[0] ?? []; + $headercount = count($not_a_header); + } - $rowHeaders = $data[0]; - $rowHeaders_utf8 = array_map('stripslashes', $rowHeaders); - $rowHeaders_utf8 = array_map('utf8_encode', $rowHeaders_utf8); - $rowHeaders_utf8 = array_map('strtolower', $rowHeaders_utf8); - $rowHeaders_utf8 = array_map('trim', $rowHeaders_utf8); - - $headercount = count($rowHeaders); if (($highestRow) >= 1) { // Returns Row Headers. @@ -892,6 +925,8 @@ public function csv_read(File $file) { // Skip header continue; } + // Ensure row is always an array. + $row = $row ?? []; $flat = trim(implode('', $row)); //check for empty row...if found stop there. if (strlen($flat) == 0) { @@ -917,7 +952,6 @@ public function csv_read(File $file) { ]; return $tabdata; - } /** @@ -990,7 +1024,30 @@ public function csv_clean(File $file, array $headerwithdata = []) { return $file->id(); } + /** + * @param \Drupal\file\Entity\File $file + * + * @return int + */ + public function csv_count(File $file) { + $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); + if (!$wrapper) { + return NULL; + } + $url = $wrapper->realpath(); + $spl = new \SplFileObject($url, 'r'); + $spl->setFlags( + SplFileObject::READ_CSV | + SplFileObject::READ_AHEAD | + SplFileObject::SKIP_EMPTY | + SplFileObject::DROP_NEW_LINE + ); + $spl->seek(PHP_INT_MAX); + $key = $spl->key() + 1; + $spl = NULL; + return $key; + } /** * Deal with different sized arrays for combining diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index 1358bf7..a39921e 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -158,8 +158,13 @@ public function buildForm(array $form, FormStateInterface $form_state) { $access = TRUE; if ($file_lod) { + $num_per_page = 10; + $total_rows = $this->AmiUtilityService->csv_count($file_lod); + $pager = \Drupal::service('pager.manager')->createPager($total_rows, $num_per_page); + $page = $pager->getCurrentPage(); + $offset = $num_per_page * $page; + $file_data_all = $this->AmiUtilityService->csv_read($file_lod, $offset, $num_per_page); - $file_data_all = $this->AmiUtilityService->csv_read($file_lod); $column_keys = $file_data_all['headers'] ?? []; $form['lod_cleanup']['table-row'] = [ @@ -175,7 +180,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { if ($column !== 'original' && $column != 'csv_columns') { $argument_string = static::LOD_COLUMN_TO_ARGUMENTS[$column] ?? NULL; if ($argument_string) { - $arguments = explode(';', $argument_string); $elements[$column] = [ '#type' => 'webform_metadata_' . $arguments[0], @@ -219,13 +223,13 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#label__title' => 'Label', '#default_value' => json_decode($row[$key], TRUE), ] + $elements[$header]; - } } } \Drupal::service('plugin.manager.webform.element')->processElements($form); // Attach the webform library. $form['#attached']['library'][] = 'webform/webform.form'; + $form['lod_cleanup']['pager'] = ['#type' => 'pager']; } } $form = $form + parent::buildForm($form, $form_state); diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index 454958b..cf18308 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -280,8 +280,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { ); } - - $csv_file_processed = $this->entity->get('processed_data')->getValue(); if (isset($csv_file_processed[0]['target_id'])) { /** @var \Drupal\file\Entity\File $file_lod */ From afffc75d79b468c28bd6d977dc386ae1e6551c0f Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 21 Sep 2021 13:41:47 -0400 Subject: [PATCH 16/42] Now that was a mistake! @alliomeria paging is now working again. I was doing a wrong "check if this needs a header row" thing and ending without a header row which the Reconcile form needed. Good! --- src/AmiUtilityService.php | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index e888ec7..5a9eb15 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -860,9 +860,14 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo * Number of results, 0 will fetch all * @param bool $always_include_header * Always return header even with an offset. + * * @return array|null + * Returning array will be in this form: + * 'headers' => $rowHeaders_utf8 or [] if $always_include_header == FALSE + * 'data' => $table, + * 'totalrows' => $maxRow, */ - public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = true) { + public function csv_read(File $file, int $offset = 0, int $count = 0, bool $always_include_header = TRUE) { $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); if (!$wrapper) { @@ -887,13 +892,14 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa $spl->seek($offset); } $data = []; - while (!$spl->eof() && ($count == 0 || $spl->key() < ($offset + $count))) { + $seek_to_offset = ($offset > 0 && $always_include_header); + while (!$spl->eof() && ($count == 0 || ($spl->key() < ($offset + $count)))) { $data[] = $spl->fgetcsv(); - if ($offset > 0 && $always_include_header) { + if ($seek_to_offset) { $spl->seek($offset); $offset = $offset + 1; // So we do not process this again. - $always_include_header = false; + $seek_to_offset = FALSE; } } From 6f516810b0f36f0b4364121443157f7214837f83 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 Sep 2021 14:10:40 -0400 Subject: [PATCH 17/42] Fix direct Ingest @patdunlavey not the last pull of this branch (LOD had to be postponed for tonight) but direct should work now. Please give it a spin? (i will do some extra double checking of data after things, but should "mostly" workout. --- src/AmiUtilityService.php | 39 ++++++++++++ src/Form/amiSetEntityReconcileCleanUpForm.php | 25 ++++++-- src/Form/amiSetEntityReconcileForm.php | 2 +- .../QueueWorker/IngestADOQueueWorker.php | 63 +++++++++++++------ src/Plugin/QueueWorker/LoDQueueWorker.php | 5 +- 5 files changed, 105 insertions(+), 29 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 5a9eb15..d3d7965 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -1719,6 +1719,45 @@ public function expandJson(array $row) { if ($json_error == JSON_ERROR_NONE) { $value = $expanded; } + else { + // Check if this may even be a JSON someone messed up + + if (is_string($value) && + ( + (strpos(ltrim($value), '{') === 0) || + (strpos(ltrim($value), '[') === 0) + ) + ) { + // Ok, it actually starts with a {} + // try to clean up. + $quotes = array( + "\xC2\xAB" => '"', // « (U+00AB) in UTF-8 + "\xC2\xBB" => '"', // » (U+00BB) in UTF-8 + "\xE2\x80\x98" => "'", // ‘ (U+2018) in UTF-8 + "\xE2\x80\x99" => "'", // ’ (U+2019) in UTF-8 + "\xE2\x80\x9A" => "'", // ‚ (U+201A) in UTF-8 + "\xE2\x80\x9B" => "'", // ‛ (U+201B) in UTF-8 + "\xE2\x80\x9C" => '"', // “ (U+201C) in UTF-8 + "\xE2\x80\x9D" => '"', // ” (U+201D) in UTF-8 + "\xE2\x80\x9E" => '"', // „ (U+201E) in UTF-8 + "\xE2\x80\x9F" => '"', // ‟ (U+201F) in UTF-8 + "\xE2\x80\xB9" => "'", // ‹ (U+2039) in UTF-8 + "\xE2\x80\xBA" => "'", // › (U+203A) in UTF-8 + ); + $possiblejson = strtr($value, $quotes); + $expanded = json_decode($possiblejson, TRUE); + $json_error = json_last_error(); + // Last chance to be JSON. + if ($json_error == JSON_ERROR_NONE) { + $value = $expanded; + } + else { + // But here we do unset the value + // Because it may really mess things out. + $value = NULL; + } + } + } } return $row; } diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index a39921e..eca8dc8 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -166,16 +166,29 @@ public function buildForm(array $form, FormStateInterface $form_state) { $file_data_all = $this->AmiUtilityService->csv_read($file_lod, $offset, $num_per_page); $column_keys = $file_data_all['headers'] ?? []; - + $form['lod_cleanup']['pager_top'] = ['#type' => 'pager']; $form['lod_cleanup']['table-row'] = [ '#type' => 'table', '#tree' => TRUE, '#prefix' => '
    ', '#suffix' => '
    ', '#header' => $column_keys, - '#empty' => $this->t('Sorry, There are LoD no items!'), + '#empty' => $this->t('Sorry, There are LoD no items or you have not a header column. Check your CSV for errors.'), ]; $elements = []; + $form['lod_cleanup']['offset'] = [ + '#type' => 'value', + '#value' => $offset, + ]; + $form['lod_cleanup']['num_per_page'] = [ + '#type' => 'value', + '#value' => $num_per_page, + ]; + $form['lod_cleanup']['column_keys'] = [ + '#type' => 'value', + '#value' => $column_keys, + ]; + foreach ($column_keys as $column) { if ($column !== 'original' && $column != 'csv_columns') { $argument_string = static::LOD_COLUMN_TO_ARGUMENTS[$column] ?? NULL; @@ -205,13 +218,13 @@ public function buildForm(array $form, FormStateInterface $form_state) { foreach ($file_data_all['data'] as $index => $row) { foreach($file_data_all['headers'] as $key => $header) { if ($header == 'original' || $header == 'csv_columns') { - $form['lod_cleanup']['table-row'][$index - 1][$header.'-'.$index] = [ + $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ '#type' => 'markup', '#markup' => $row[$key], ]; } else { - $form['lod_cleanup']['table-row'][$index - 1][$header.'-'.$index] = [ + $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ '#multiple' => 5, '#multiple__header' => FALSE, '#multiple__no_items_message' => '', @@ -274,8 +287,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } } } - $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); - $success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); + //$file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); + //$success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); if (!$success) { $this->messenger()->addError( $this->t( diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index cf18308..b7c32ba 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -347,7 +347,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $columns); $inverted = []; $column_map_inverted = []; - $headers = ['original','csv_columns']; + $headers = ['original','csv_columns', 'checked']; foreach($values_per_column as $column => $labels) { foreach($labels as $label) { $inverted[$label] = $inverted[$label] ?? []; diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index e9392c1..b5557a4 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -184,21 +184,34 @@ public function processItem($data) { } } } - // let's attach the LoD Context here - // - We need the columns that were Reconciliated from keystore - // - We need to fetch for this row the un-reconciliated columns and split them into labels - // - We need to fetch for every label from keystore the reconciliated values - // - Push them into $additional_context keyed by vocab and column - - $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); - if (!$processed_metadata) { - $this->messenger->addWarning($this->t('Sorry, we can not cast ADO with @uuid into proper Metadata. Check the Metadata Display Template used, your permissions and/or your data ROW in your CSV for set @setid.',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'] - ])); - return; + $method = $data->mapping->globalmapping ?? "direct"; + if ($method == 'custom') { + $method = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata ?? "direct"; + } + if ($method == "metadata") { + $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); + if (!$processed_metadata) { + $this->messenger->addWarning($this->t('Sorry, we can not cast ADO with @uuid into proper Metadata. Check the Metadata Display Template used, your permissions and/or your data ROW in your CSV for set @setid.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ])); + return; + } } + if ($method == "direct") { + $processed_metadata = $this->AmiUtilityService->expandJson($data->info['row']['data']); + $processed_metadata = !empty($processed_metadata) ? json_encode($processed_metadata) : NULL; + $json_error = json_last_error(); + if ($json_error !== JSON_ERROR_NONE || !$processed_metadata) { + $this->messenger->addWarning($this->t('Sorry, we can not cast ADO with @uuid directly into proper Metadata. Check your data ROW in your CSV for set @setid for invalid JSON data.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ])); + return; + } + } + $cleanvalues = []; // Now process Files and Nodes $ado_object = $data->adomapping->parents ?? NULL; @@ -220,13 +233,23 @@ public function processItem($data) { $ado_columns = array_values(get_object_vars($ado_object)); } - $entity_mapping_structure['entity:file'] = $file_columns; - $entity_mapping_structure['entity:node'] = $ado_columns; + // deal with possible overrides from either Direct ingest of + // A Smart twig template that adds extra mappings + $processed_metadata = json_decode($processed_metadata, true); + + $custom_file_mapping = $processed_metadata['entity:file'] ?? []; + $custom_node_mapping = $processed_metadata['entity:node'] ?? []; + + $entity_mapping_structure['entity:file'] = array_unique(array_merge($custom_file_mapping,$file_columns)); + $entity_mapping_structure['entity:node'] = array_unique(array_merge($custom_node_mapping,$ado_columns)); + // Unset so we do not lose our merge after '+' both arrays + unset($processed_metadata['entity:file']); + unset($processed_metadata['entity:node']); + $cleanvalues['ap:entitymapping'] = $entity_mapping_structure; $processed_metadata = $processed_metadata + $cleanvalues; // Assign parents as NODE Ids. - foreach ($parent_nodes as $parent_property => $node_ids) { $processed_metadata[$parent_property] = $node_ids; } @@ -350,7 +373,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { } $label_column = $data->adomapping->base->label ?? 'label'; - // Always (becaye of processed metadata via template) try to fetch again the mapped version + // Always (because of processed metadata via template) try to fetch again the mapped version $label = $processed_metadata[$label_column] ?? ($processed_metadata['label'] ?? NULL); $property_path_split = explode(':', $property_path); @@ -371,8 +394,8 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $field_name_offset = $property_path_split[2] ?? 0; // Fall back to not published in case no status was passed. $status = $data->info['status'][$bundle] ?? 0; - // default Sortfile which will respect the ingest order. - $processed_metadata['ap:tasks']['ap:sortfiles'] = 'index'; + // default Sortfile which will respect the ingest order. If there was already one set, preserve. + $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? 'index'; // JSON_ENCODE AGAIN $jsonstring = json_encode($processed_metadata, JSON_PRETTY_PRINT, 50); @@ -443,7 +466,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { } } // Finally set the original ap task or index as default. - $processed_metadata['ap:tasks']['ap:sortfiles'] = $original_value['ap:tasks']['ap:sortfiles'] ?? 'index'; + $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? 'index'; $this->patchJson($original_value, $processed_metadata); $itemfield->setMainValueFromArray($processed_metadata); break; diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index 51bc32e..f9cd766 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -134,7 +134,7 @@ public function processItem($data) { $data->info = [ 'label' => The label passed to the Reconciling URL, 'domain' => This Server's Domain name - 'headers' => All headers (LoD Routes) + 'headers' => All headers (LoD Routes) as key => value pairs 'normalized_mappings' => an array with source columns and where to find the results like array:2 [▼ @@ -185,6 +185,8 @@ public function processItem($data) { $newdata['data'][0][$lod_route_column_name] = json_encode($lod, JSON_PRETTY_PRINT|JSON_UNESCAPED_SLASHES|JSON_UNESCAPED_UNICODE) ?? ''; $newdata['data'][0]['original'] = (string) $data->info['label']; $newdata['data'][0]['csv_columns'] = json_encode((array)$data->info['csv_columns']) ?? ''; + // Adds a "Checked" column used to mark manually reconciliated elements. + $newdata['data'][0]['checked'] = FALSE; // Context data is simpler $context_data[$lod_route_column_name]['lod'] = $lod; $context_data[$lod_route_column_name]['columns'] = $data->info['csv_columns']; @@ -193,7 +195,6 @@ public function processItem($data) { $this->AmiUtilityService->csv_append($newdata, $file_lod,NULL, FALSE); // Sets the same data, per label (as key) into keystore so we can fetch it as Twig Context when needed. //@TODO also do similar if going for a "direct" in that case we replace the columns found in the original data - $this->AmiUtilityService->setKeyValuePerAmiSet($data->info['label'], $context_data, $data->info['set_id']); } } From a25920de7b7b6922ff2d3f964cb31018fdb412e4 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 23 Sep 2021 15:18:54 -0400 Subject: [PATCH 18/42] Not be super-smart. Just smart-ish with JSON attempts of decoding Allison says.. --- src/AmiUtilityService.php | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index d3d7965..7a179cb 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -1747,15 +1747,11 @@ public function expandJson(array $row) { $possiblejson = strtr($value, $quotes); $expanded = json_decode($possiblejson, TRUE); $json_error = json_last_error(); - // Last chance to be JSON. + // Last chance to be JSON. Allison says e.g EDTF may start with [] + // So do not nullify. Simply keep the mess. if ($json_error == JSON_ERROR_NONE) { $value = $expanded; } - else { - // But here we do unset the value - // Because it may really mess things out. - $value = NULL; - } } } } From 3758608251be9b7c5c9484194b127d87abe1b3e5 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 24 Sep 2021 17:20:31 -0400 Subject: [PATCH 19/42] Persist LoD Per Page and also Globally into a CSV --- src/AmiUtilityService.php | 16 ++- src/Form/amiSetEntityReconcileCleanUpForm.php | 136 ++++++++++++++++-- 2 files changed, 136 insertions(+), 16 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 7a179cb..7b093f8 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -292,6 +292,7 @@ public function file_get($uri, File $zip_file = NULL) { ) { // Now that we know its not remote, try with our registered schemas // means its either private/public/s3, etc + $scheme = $this->streamWrapperManager->getScheme($uri); if ($scheme) { if (!file_exists($uri)) { @@ -855,7 +856,7 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo /** * @param \Drupal\file\Entity\File $file * @param int $offset - * Where to start to read the file + * Where to start to read the file, starting from 0. * @param int $count * Number of results, 0 will fetch all * @param bool $always_include_header @@ -897,7 +898,6 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa $data[] = $spl->fgetcsv(); if ($seek_to_offset) { $spl->seek($offset); - $offset = $offset + 1; // So we do not process this again. $seek_to_offset = FALSE; } @@ -1050,7 +1050,7 @@ public function csv_count(File $file) { SplFileObject::DROP_NEW_LINE ); $spl->seek(PHP_INT_MAX); - $key = $spl->key() + 1; + $key = $spl->key(); $spl = NULL; return $key; } @@ -1930,7 +1930,7 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex $context_lod = []; // get the mappings for this set if any // @TODO Refactor into a Method? - $lod_mappings = $this->getKeyValueValueMappingsPerAmiSet($set_id); + $lod_mappings = $this->getKeyValueMappingsPerAmiSet($set_id); if ($lod_mappings) { foreach($lod_mappings as $source_column => $destination) { if (isset($context['data'][$source_column])) { @@ -2036,7 +2036,13 @@ public function getKeyValuePerAmiSet($label, $set_id) { ->get($label, NULL); } - public function getKeyValueValueMappingsPerAmiSet($set_id) { + public function getAllKeyValuesPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + return $this->keyValue->get($keyvalue_collection) + ->getAll(); + } + + public function getKeyValueMappingsPerAmiSet($set_id) { $keyvalue_collection = 'ami_lod_temp_mappings'; return $this->keyValue->get($keyvalue_collection) ->get($set_id, NULL); diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index eca8dc8..fe9ca12 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -92,7 +92,7 @@ public static function create(ContainerInterface $container) { } public function getConfirmText() { - return $this->t('Save LoD'); + return $this->t('Save Current LoD Page'); } @@ -110,6 +110,22 @@ public function getCancelUrl() { return new Url('entity.ami_set_entity.collection'); } + /** + * {@inheritdoc} + */ + protected function actions(array $form, FormStateInterface $form_state) { + $actions = parent::actions($form, $form_state); + $actions['submit_csv'] = [ + '#type' => 'submit', + '#value' => t('Save all LoD back to CSV File'), + '#submit' => [ + [$this, 'submitFormPersistCSV'], + ], + ]; + return $actions; + + } + /** * {@inheritdoc} */ @@ -160,6 +176,8 @@ public function buildForm(array $form, FormStateInterface $form_state) { if ($file_lod) { $num_per_page = 10; $total_rows = $this->AmiUtilityService->csv_count($file_lod); + // Remove the header in the calculations. + $total_rows = $total_rows - 1; $pager = \Drupal::service('pager.manager')->createPager($total_rows, $num_per_page); $page = $pager->getCurrentPage(); $offset = $num_per_page * $page; @@ -188,6 +206,11 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#type' => 'value', '#value' => $column_keys, ]; + $form['lod_cleanup']['total_rows'] = [ + '#type' => 'value', + '#value' => $total_rows, + ]; + foreach ($column_keys as $column) { if ($column !== 'original' && $column != 'csv_columns') { @@ -214,16 +237,28 @@ public function buildForm(array $form, FormStateInterface $form_state) { } } } - + $original_index = array_search('original', $column_keys); foreach ($file_data_all['data'] as $index => $row) { + // Find the label first + $label = $row[$original_index]; + $persisted_lod_reconciliation = $this->AmiUtilityService->getKeyValuePerAmiSet($label, $this->entity->id()); foreach($file_data_all['headers'] as $key => $header) { if ($header == 'original' || $header == 'csv_columns') { $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ '#type' => 'markup', '#markup' => $row[$key], + $header.'-'.($index-1) => [ + '#tree' => true, + '#type' => 'hidden', + '#value' => $row[$key], + ] ]; } else { + // Given the incremental save option we have now + // We need to load check first if there is + // A Key Value equivalent of the row + $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ '#multiple' => 5, '#multiple__header' => FALSE, @@ -234,7 +269,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#multiple__add_more' => FALSE, '#multiple__add_more_input' => FALSE, '#label__title' => 'Label', - '#default_value' => json_decode($row[$key], TRUE), + '#default_value' => $persisted_lod_reconciliation[$header]['lod'] ?? json_decode($row[$key], TRUE), ] + $elements[$header]; } } @@ -267,6 +302,82 @@ public function buildForm(array $form, FormStateInterface $form_state) { * {@inheritdoc} */ public function submitForm(array &$form, FormStateInterface $form_state) { + $csv_file_processed = $this->entity->get('processed_data')->getValue(); + if (isset($csv_file_processed[0]['target_id'])) /** @var \Drupal\file\Entity\File $file_lod */ { + $file_lod = $this->entityTypeManager->getStorage('file')->load( + $csv_file_processed[0]['target_id']); + } + if ($file_lod) { + /* $lod_settings will contain this + 'lod_cleanup' => [ + 'offset' => 0 + 'num_per_page' => 10 + 'column_keys' = > [ + "original", + "csv_columns", + "any_lod_vocabulary_selected", + ... + */ + $lod_settings = $form_state->getValue('lod_cleanup', []); + $column_keys = $lod_settings['column_keys'] ?? []; + $iterations = min($lod_settings['total_rows'] - $lod_settings['offset'], + $lod_settings['num_per_page']); + for ($id = 1; $id <= $iterations ?? 10; $id++) { + $label = $form_state->getValue('original-' . $id, NULL); + $csv_columns = $form_state->getValue('csv_columns-' . $id, NULL); + $csv_columns = json_decode($csv_columns, TRUE); + // If these do not exist, we can not process. + if ($label && $csv_columns) { + foreach ($column_keys as $index => $column) { + if ($column !== 'original' && $column != 'csv_columns') { + $lod = $form_state->getValue($column . '-' . $id, NULL); + $context_data[$column]['lod'] = $lod; + $context_data[$column]['columns'] = $csv_columns; + $this->AmiUtilityService->setKeyValuePerAmiSet($label, + $context_data, $this->entity->id()); + } + } + } + else { + $this->messenger()->addError( + $this->t( + 'So Sorry. We can not process row @row. Check for missing "column_keys", wrong JSON for column_keys and/or "original" values.', + [ + '@row' => $id, + ] + ) + ); + } + } + $this->messenger()->addMessage( + $this->t( + 'LoD Reconciled data for @label was updated.', + [ + '@label' => $this->entity->label(), + ] + ) + ); + } + else { + $this->messenger()->addError( + $this->t( + 'LoD Reconciled source CSV for @label was not found. Please attach one or run Reconciliation again to generate on from your Source data', + [ + '@label' => $this->entity->label(), + ] + ) + ); + } + + $form_state->setRebuild(TRUE); + } + + /** + * {@inheritdoc} + */ + public function submitFormPersistCSV(array &$form, FormStateInterface $form_state) { + // Call the parent one so we also persist the current page + $this->submitForm($form, $form_state); $csv_file_processed = $this->entity->get('processed_data')->getValue(); if (isset($csv_file_processed[0]['target_id'])) { /** @var \Drupal\file\Entity\File $file_lod */ @@ -275,20 +386,23 @@ public function submitForm(array &$form, FormStateInterface $form_state) { if ($file_lod) { $file_data_all = $this->AmiUtilityService->csv_read($file_lod); $column_keys = $file_data_all['headers'] ?? []; + $original_index = array_search('original', $column_keys); foreach ($file_data_all['data'] as $id => &$row) { + $label = $row[$original_index]; + $persisted_lod_reconciliation = $this->AmiUtilityService->getKeyValuePerAmiSet($label, $this->entity->id()); foreach ($file_data_all['headers'] as $index => $column) { if ($column !== 'original' && $column != 'csv_columns') { - $lod = $form_state->getValue($column . '-' . ((int)$id), NULL); - $row[$index] = json_encode($lod, - JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) ?? ''; - $context_data[$column]['lod'] = $lod; - $context_data[$column]['columns'] = json_decode($row[1], TRUE); - $this->AmiUtilityService->setKeyValuePerAmiSet($row[0], $context_data, $this->entity->id()); + $lod = $persisted_lod_reconciliation[$column]['lod'] ?? NULL; + if ($lod) { + $row[$index] = json_encode($lod, + JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) ?? ''; + + } } } } - //$file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); - //$success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); + $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); + $success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); if (!$success) { $this->messenger()->addError( $this->t( From 583e6bcfad0b535d71747a8c020dc9be21776e6d Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 24 Sep 2021 17:21:00 -0400 Subject: [PATCH 20/42] Small cleanup and better checks on passed for IngestADOQueueWorker --- .../QueueWorker/IngestADOQueueWorker.php | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index b5557a4..053c267 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -162,7 +162,6 @@ public function processItem($data) { // Pushing to the end of the queue. $data->info['attempt']++; if ($data->info['attempt'] < 3) { - error_log('Re-enqueueing'); \Drupal::queue('ami_ingest_ado') ->createItem($data); return; @@ -236,13 +235,13 @@ public function processItem($data) { // deal with possible overrides from either Direct ingest of // A Smart twig template that adds extra mappings - $processed_metadata = json_decode($processed_metadata, true); + $processed_metadata = json_decode($processed_metadata, TRUE); - $custom_file_mapping = $processed_metadata['entity:file'] ?? []; - $custom_node_mapping = $processed_metadata['entity:node'] ?? []; + $custom_file_mapping = isset($processed_metadata['entity:file']) && is_array($processed_metadata['entity:file']) ? $processed_metadata['entity:file'] : []; + $custom_node_mapping = isset($processed_metadata['entity:node']) && is_array($processed_metadata['entity:node']) ? $processed_metadata['entity:node'] : []; - $entity_mapping_structure['entity:file'] = array_unique(array_merge($custom_file_mapping,$file_columns)); - $entity_mapping_structure['entity:node'] = array_unique(array_merge($custom_node_mapping,$ado_columns)); + $entity_mapping_structure['entity:file'] = array_unique(array_merge($custom_file_mapping, $file_columns)); + $entity_mapping_structure['entity:node'] = array_unique(array_merge($custom_node_mapping, $ado_columns)); // Unset so we do not lose our merge after '+' both arrays unset($processed_metadata['entity:file']); unset($processed_metadata['entity:node']); @@ -250,6 +249,8 @@ public function processItem($data) { $cleanvalues['ap:entitymapping'] = $entity_mapping_structure; $processed_metadata = $processed_metadata + $cleanvalues; // Assign parents as NODE Ids. + // @TODO if we decide to allow multiple parents this is a place that + // Needs change. foreach ($parent_nodes as $parent_property => $node_ids) { $processed_metadata[$parent_property] = $node_ids; } @@ -395,8 +396,10 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { // Fall back to not published in case no status was passed. $status = $data->info['status'][$bundle] ?? 0; // default Sortfile which will respect the ingest order. If there was already one set, preserve. - $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? 'index'; - // JSON_ENCODE AGAIN + $sort_files = isset($processed_metadata['ap:tasks']) && isset($processed_metadata['ap:tasks']['ap:sortfiles']) ? $processed_metadata['ap:tasks']['ap:sortfiles'] : 'index'; + + $processed_metadata['ap:tasks']['ap:sortfiles'] = $sort_files; + // JSON_ENCODE AGAIN! $jsonstring = json_encode($processed_metadata, JSON_PRETTY_PRINT, 50); if ($jsonstring) { @@ -465,8 +468,6 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { $processed_metadata[$as_file_type] = $original_value[$as_file_type]; } } - // Finally set the original ap task or index as default. - $processed_metadata['ap:tasks']['ap:sortfiles'] = $processed_metadata['ap:tasks']['ap:sortfiles'] ?? 'index'; $this->patchJson($original_value, $processed_metadata); $itemfield->setMainValueFromArray($processed_metadata); break; From c59d1709935ead4b42e4a08e7f0fb066f4cc11fc Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sat, 25 Sep 2021 16:41:06 -0400 Subject: [PATCH 21/42] Gosh. Restored Twig Template processing After dealing with Direct Ingest i broke Twig templates.... @patdunlavey ... you will see this fail... should back again to its normal behavior. --- .../QueueWorker/IngestADOQueueWorker.php | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 053c267..b29ee2f 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -153,7 +153,6 @@ public function processItem($data) { $parent_uuids = (array) $parent_uuid; $existing = $this->entityTypeManager->getStorage('node')->loadByProperties(['uuid' => $parent_uuids]); if (count($existing) != count($parent_uuids)) { - $this->messenger->addWarning($this->t('Sorry, we can not process ADO with @uuid from Set @setid yet, there are missing parents with UUID(s) @parent_uuids. We will retry.',[ '@uuid' => $data->info['row']['uuid'], '@setid' => $data->info['set_id'], @@ -184,11 +183,13 @@ public function processItem($data) { } } + $processed_metadata = NULL; + $method = $data->mapping->globalmapping ?? "direct"; if ($method == 'custom') { $method = $data->mapping->custommapping_settings->{$data->info['row']['type']}->metadata ?? "direct"; } - if ($method == "metadata") { + if ($method == 'template') { $processed_metadata = $this->AmiUtilityService->processMetadataDisplay($data); if (!$processed_metadata) { $this->messenger->addWarning($this->t('Sorry, we can not cast ADO with @uuid into proper Metadata. Check the Metadata Display Template used, your permissions and/or your data ROW in your CSV for set @setid.',[ @@ -211,6 +212,22 @@ public function processItem($data) { } } + // If at this stage $processed_metadata is empty or Null there was a wrong + // Manual added wrong mapping or any other User input induced error + // We do not process further + // Maybe someone wants to ingest FILES only without any Metadata? + // Not a good use case so let's stop that non sense here. + + if (empty($processed_metadata)) { + $message = $this->t('Sorry, ADO with @uuid is empty or has wrong data/metadata. Check your data ROW in your CSV for set @setid or your Set Configuration for manually entered JSON that may break your setup.',[ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'] + ]); + $this->messenger->addWarning($message); + $this->loggerFactory->get('ami')->error($message); + return; + } + $cleanvalues = []; // Now process Files and Nodes $ado_object = $data->adomapping->parents ?? NULL; @@ -234,7 +251,7 @@ public function processItem($data) { // deal with possible overrides from either Direct ingest of // A Smart twig template that adds extra mappings - + // This decode will always work because we already decoded and encoded again. $processed_metadata = json_decode($processed_metadata, TRUE); $custom_file_mapping = isset($processed_metadata['entity:file']) && is_array($processed_metadata['entity:file']) ? $processed_metadata['entity:file'] : []; From a1619396fc599ea24d0b45f052c4deead2e2627b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 27 Sep 2021 14:43:08 -0400 Subject: [PATCH 22/42] Allow (enforce) AMI Set Label to be provided during Setup --- src/Form/AmiMultiStepIngest.php | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 3698c5e..37274d3 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -392,7 +392,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { $fileid = $this->store->get('zip'); $form['zip'] = [ '#type' => 'managed_file', - '#title' => $this->t('Provide an ZIP file'), + '#title' => $this->t('Provide an ZIP file.'), '#required' => false, '#multiple' => false, '#default_value' => isset($fileid) ? [$fileid] : NULL, @@ -402,6 +402,15 @@ public function buildForm(array $form, FormStateInterface $form_state) { 'file_validate_extensions' => ['zip'], ], ]; + + $form['ami_set_label'] = [ + '#type' => 'textfield', + '#title' => $this->t('Please Name your AMI Set.'), + '#required' => true, + '#size' => 64, + '#maxlength' => 255, + '#default_value' => 'AMI Set of ' . $this->currentUser()->getDisplayName() + ]; } return $form; } @@ -428,9 +437,6 @@ public function submitForm(array &$form, FormStateInterface $form_state) { /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { - // We may want to run a batch here? - // @TODO investigate how to run a batch and end in the same form different step? - // Idea is batch is only needed if there is a certain max number, e.g 5000 rows? $data = $plugin_instance->getInfo($this->store->get('pluginconfig'), $form_state,0,-1); // Check if the Plugin is ready processing or needs more data $ready = $form_state->getValue('pluginconfig')['ready'] ?? TRUE; @@ -498,7 +504,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { } else { $this->store->set('zip', NULL); } - + $ami_set_label = $form_state->getValue('ami_set_label', NULL); + $ami_set_label = $ami_set_label ? trim($ami_set_label) : $ami_set_label; $amisetdata = new \stdClass(); $amisetdata->plugin = $this->store->get('plugin'); @@ -506,7 +513,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->mapping = $this->store->get('mapping'); $amisetdata->adomapping = $this->store->get('adomapping'); $amisetdata->zip = $this->store->get('zip'); - + $amisetdata->name = $ami_set_label; /* @var $plugin_instance \Drupal\ami\Plugin\ImporterAdapterInterface| NULL */ $plugin_instance = $this->store->get('plugininstance'); if ($plugin_instance) { @@ -531,6 +538,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $amisetdata->csv = $fileid; if ($plugin_instance->getPluginDefinition()['batch']) { $data = $this->store->get('data'); + // Set A name if any given via the form $config = $this->store->get('pluginconfig'); $amisetdata->column_keys = []; $amisetdata->total_rows = NULL; // because we do not know yet From bd71514c51ed1ab4d372be752bd0d418779d2edd Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 27 Sep 2021 14:46:25 -0400 Subject: [PATCH 23/42] Complete refactor of co-dependent Symfony Services It was getting weird to have such cross dependencies. Now both deal with their own concerns and there is complementation. This changes a lot Also: - Json Decoding Cells now deals with Smart Quote exceptions and tries to be as smart as possible in case of errors. But if the key is one of our core ones ap: or as: then in case of failure we reset to NULL value. This allows us to avoid breaking code in case one our controlled values via a Twig template OR a Direct ingest ends with wrong data. @TODO. Implement a JSON-SCHEMA level validation for our own internal control vocabulary (WIP!) --- ami.services.yml | 4 +- src/AmiLoDService.php | 117 ++++++++++++----- src/AmiUtilityService.php | 120 ++++++++---------- src/Form/amiSetEntityDeleteForm.php | 50 +++++++- src/Form/amiSetEntityReconcileCleanUpForm.php | 39 +++--- src/Form/amiSetEntityReconcileForm.php | 61 ++------- src/Plugin/QueueWorker/LoDQueueWorker.php | 4 +- 7 files changed, 224 insertions(+), 171 deletions(-) diff --git a/ami.services.yml b/ami.services.yml index 2b7503c..a77e291 100644 --- a/ami.services.yml +++ b/ami.services.yml @@ -5,11 +5,11 @@ services: arguments: ['@entity_type.manager'] ami.utility: class: Drupal\ami\AmiUtilityService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client', '@keyvalue'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client', '@ami.lod', '@keyvalue'] tags: - { name: backend_overridable } ami.lod: class: Drupal\ami\AmiLoDService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@http_client', '@ami.utility', '@keyvalue'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@http_client', '@keyvalue'] tags: - { name: backend_overridable } diff --git a/src/AmiLoDService.php b/src/AmiLoDService.php index 95fae3e..aa8a055 100644 --- a/src/AmiLoDService.php +++ b/src/AmiLoDService.php @@ -12,9 +12,7 @@ use Drupal\Component\Transliteration\TransliterationInterface; use Drupal\Core\Archiver\ArchiverManager; use Drupal\Core\Config\ConfigFactoryInterface; -use Drupal\Core\Entity\EntityTypeBundleInfoInterface; use Drupal\Core\Entity\EntityTypeManagerInterface; -use \Drupal\Core\Entity\EntityFieldManagerInterface; use Drupal\Core\Extension\ModuleHandlerInterface; use Drupal\Core\File\FileSystemInterface; use Drupal\Core\KeyValueStore\KeyValueFactoryInterface; @@ -25,7 +23,6 @@ use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\Core\Url; -use Drupal\file\Entity\File; use Drupal\file\FileUsage\FileUsageInterface; use GuzzleHttp\ClientInterface; use Drupal\strawberryfield\StrawberryfieldUtilityService; @@ -136,11 +133,6 @@ class AmiLoDService { */ protected $httpClient; - /** - * @var \Drupal\ami\AmiUtilityService - */ - protected $AmiUtilityService; - /** * Key value service. * @@ -165,7 +157,6 @@ class AmiLoDService { * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory * @param \Drupal\strawberryfield\StrawberryfieldUtilityService $strawberryfield_utility_service * @param \GuzzleHttp\ClientInterface $http_client - * @param \Drupal\ami\AmiUtilityService $ami_utility * @param \Drupal\Core\KeyValueStore\KeyValueFactoryInterface $key_value */ public function __construct( @@ -182,7 +173,6 @@ public function __construct( LoggerChannelFactoryInterface $logger_factory, StrawberryfieldUtilityService $strawberryfield_utility_service, ClientInterface $http_client, - AmiUtilityService $ami_utility, KeyValueFactoryInterface $key_value ) { $this->fileSystem = $file_system; @@ -204,14 +194,92 @@ public function __construct( $this->strawberryfieldUtility = $strawberryfield_utility_service; $this->currentUser = $current_user; $this->httpClient = $http_client; - $this->AmiUtilityService = $ami_utility; $this->keyValue = $key_value; + } + /** + * Deletes All LoD KeyValues for a given AMI Set ID. + * + * @param $set_id + */ + public function cleanKeyValuesPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_' . $set_id; + $this->keyValue->get($keyvalue_collection)->deleteAll(); + $keyvalue_collection_mappings = 'ami_lod_temp_mappings'; + $this->keyValue->get($keyvalue_collection_mappings)->delete($set_id); } + /** + * Inserts a new LoD KeyValue for a given Label/AMI Set ID pair. + * + * @param $label + * @param $data + * @param $set_id + */ + public function setKeyValuePerAmiSet($label, $data, $set_id) { + // Too much trouble dealing with encodings/UTF-8 and MYSQL + // And drupal here. Simpler if the label is md5-ed + $label = md5($label); + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + $this->keyValue->get($keyvalue_collection) + ->set($label, $data); + } - public function invokeLoDRoute(string $domain, string $query, string $auth_type, $vocab = 'subjects', $rdftype = 'thing', $lang = 'en' , $count = 5):array { + /** + * Sets the JSON Key Mappings (original) for a given AMI Set ID. + * + * @param $data + * @param $set_id + */ + public function setKeyValueMappingsPerAmiSet($data, $set_id) { + $keyvalue_collection = 'ami_lod_temp_mappings'; + $this->keyValue->get($keyvalue_collection) + ->set($set_id, $data); + } + + /** + * Gets the LoD KeyValue for a given Label/AMI Set ID pair. + * + * @param $label + * @param $set_id + * + * @return mixed + */ + public function getKeyValuePerAmiSet($label, $set_id) { + $label = md5($label); + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + return $this->keyValue->get($keyvalue_collection) + ->get($label, NULL); + } + + /** + * Gets all LoD KeyValue for a given AMI Set ID. + * + * @param $set_id + * + * @return array + * Each Entry is keyed by the MD5 of the label. + */ + public function getAllKeyValuesPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_'. $set_id; + return $this->keyValue->get($keyvalue_collection) + ->getAll(); + } + + /** + * Gets the JSON Key Mappings (original) for a given AMI Set ID. + * + * @param $set_id + * + * @return mixed + */ + public function getKeyValueMappingsPerAmiSet($set_id) { + $keyvalue_collection = 'ami_lod_temp_mappings'; + return $this->keyValue->get($keyvalue_collection) + ->get($set_id, NULL); + } + public function invokeLoDRoute(string $domain, string $query, string $auth_type, $vocab = 'subjects', $rdftype = 'thing', $lang = 'en' , $count = 5):array { $current_laguage = $lang ?? \Drupal::languageManager() ->getCurrentLanguage() ->getId(); @@ -265,29 +333,6 @@ public function invokeLoDRoute(string $domain, string $query, string $auth_type, return $response_cleaned; } - /** - * From a given CSV files returns different values for a list of columns - * - * @param \Drupal\file\Entity\File $file - * @param array $columns - * - * @return array - * An Associative Array keyed by Column name - */ - public function provideLoDColumnValues(File $file, array $columns):array { - $data = $this->AmiUtilityService->csv_read($file); - $column_keys = $data['headers'] ?? []; - $alldifferent = []; - foreach ($columns as $column) { - $column_index = array_search($column, $column_keys); - if ($column_index !== FALSE) { - $alldifferent[$column] = $this->AmiUtilityService->getDifferentValuesfromColumnSplit($data, - $column_index); - } - } - return $alldifferent; - } - /** * Checks if a string is valid JSON * @@ -309,4 +354,6 @@ public function isJson($string) { public function isNotJson($string) { return !$this->isJson($string); } + + } diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 7b093f8..7df3c58 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -158,6 +158,11 @@ class AmiUtilityService { */ protected $keyValue; + /** + * @var \Drupal\ami\AmiLoDService + */ + protected $AmiLoDService; + /** * StrawberryfieldFilePersisterService constructor. * @@ -194,6 +199,7 @@ public function __construct( EntityFieldManagerInterface $entity_field_manager, EntityTypeBundleInfoInterface $entity_type_bundle_info, ClientInterface $http_client, + AmiLoDService $ami_lod, KeyValueFactoryInterface $key_value ) { $this->fileSystem = $file_system; @@ -217,6 +223,7 @@ public function __construct( $this->entityTypeBundleInfo = $entity_type_bundle_info; $this->currentUser = $current_user; $this->httpClient = $http_client; + $this->AmiLoDService = $ami_lod; $this->keyValue = $key_value; } @@ -238,17 +245,6 @@ public function isEntityId($val) { )); } - /** - * Array value callback. True if value is not an array. - * - * @param mixed $val - * - * @return bool - */ - private function isNotArray($val) { - return !is_array($val); - } - /** * Array value callback. True if $key starts with Entity * @@ -418,7 +414,6 @@ public function retrieve_remote_file( $localfile = FALSE; $md5uri = md5($uri); $parsed_url = parse_url($uri); - $mime = 'application/octet-stream'; if (!isset($destination)) { $path = file_build_uri($this->fileSystem->basename($parsed_url['path'])); } @@ -530,6 +525,7 @@ public function retrieve_remote_file( * - If it fails, FALSE. */ public function retrieve_fromzip_file($uri, $destination = NULL, $replace = FileSystemInterface::EXISTS_RENAME, File $zip_file) { + $zip_realpath = NULL; $md5uri = md5($uri); $parsed_url = parse_url($uri); if (!isset($destination)) { @@ -572,7 +568,8 @@ public function retrieve_fromzip_file($uri, $destination = NULL, $replace = File // Opening the ZIP file failed. return FALSE; } - } catch (\Exception $exception) { + } + catch (\Exception $exception) { $this->messenger()->addError( $this->t( 'Unable to extract file @uri from ZIP @zip to local @path. Verify ZIP exists, its readable and destination is writable.', @@ -583,8 +580,8 @@ public function retrieve_fromzip_file($uri, $destination = NULL, $replace = File ] ) ); - return FALSE; } + return FALSE; } /** @@ -720,7 +717,7 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { return NULL; } $realpath = $this->fileSystem->realpath($file->getFileUri()); - $fh = new \SplFileObject($realpath, 'w'); + $fh = new SplFileObject($realpath, 'w'); if (!$fh) { $this->messenger()->addError( $this->t('Error reading back the just written file!.') @@ -797,7 +794,7 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { * @return int|string|null * @throws \Drupal\Core\Entity\EntityStorageException */ - public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header) { + public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE) { $realpath = $this->fileSystem->realpath($file->getFileUri()); $fh = new \SplFileObject($realpath, 'a'); @@ -926,6 +923,7 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa // Returns Row Headers. $maxRow = 1; // at least until here. + $rowindex = 0; foreach ($data as $rowindex => $row) { if ($rowindex == 0) { // Skip header @@ -940,7 +938,7 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa break; } // This was done already by the Import Plugin but since users - // Could eventually reupload the spreadsheet better so + // Could eventually re upload the spreadsheet better so $row = $this->arrayEquallySeize( $headercount, $row @@ -948,16 +946,15 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa // Offsetting all rows by 1. That way we do not need to remap numeric parents $table[$rowindex + 1] = $row; } - $maxRow = $rowindex; + $maxRow = $maxRow ?? $rowindex; } - $tabdata = [ + return [ 'headers' => $rowHeaders_utf8, 'data' => $table, 'totalrows' => $maxRow, ]; - return $tabdata; } /** @@ -1135,6 +1132,31 @@ public function getDifferentValuesfromColumn(array $data, int $key): array { return $unique; } + /** + * From a given CSV files returns different values for a list of columns + * + * @param \Drupal\file\Entity\File $file + * @param array $columns + * + * @return array + * An Associative Array keyed by Column name + */ + public function provideDifferentColumnValuesFromCSV(File $file, array $columns):array { + $data = $this->csv_read($file); + $column_keys = $data['headers'] ?? []; + $alldifferent = []; + foreach ($columns as $column) { + $column_index = array_search($column, $column_keys); + if ($column_index !== FALSE) { + $alldifferent[$column] = $this->getDifferentValuesfromColumnSplit($data, + $column_index); + } + } + return $alldifferent; + } + + + /** * Returns a list Metadata Displays. * @@ -1168,8 +1190,6 @@ public function getMetadataDisplays() { * Returns WebformOptions marked as Archipelago * * @return array - * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException - * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException */ public function getWebformOptions():array { try { @@ -1257,8 +1277,6 @@ public function getBundlesAndFields() { * @param \Drupal\Core\Session\AccountInterface|null $account * * @return \Drupal\Core\Access\AccessResultInterface|bool - * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException - * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException */ public function checkBundleAccess(string $bundle, AccountInterface $account = NULL) { try { @@ -1467,7 +1485,7 @@ public function preprocessAmiSet(File $file, \stdClass $data, array &$invalid = // SO WE ARE OFFSET by 1, substract 1 $parent_numeric = intval(trim($parent_ado)); $parent_hash[$parent_key][$parent_numeric][$index] = $index; - $parentchilds = []; + // Lets check if the index actually exists before going crazy. // If parent is empty that is OK here. WE are Ok with no membership! @@ -1481,7 +1499,6 @@ public function preprocessAmiSet(File $file, \stdClass $data, array &$invalid = // Only traverse if we don't have this index or the parent one // in the invalid register. $parentchilds = []; - $i = 0; while (!$rootfound) { $parentup = $file_data_all['data'][$parent_numeric][$parent_to_index[$parent_key]]; if ($this->isRootParent($parentup)) { @@ -1752,6 +1769,14 @@ public function expandJson(array $row) { if ($json_error == JSON_ERROR_NONE) { $value = $expanded; } + elseif (substr( $colum, 0, 3 ) === "as:" || + substr( $colum, 0, 3 ) === "ap:" + ) { + // We can not allow wrong JSON to permeate into controlled + // by us properties + // @TODO apply a JSON Schema validator at the end. + $value = NULL; + } } } } @@ -1930,7 +1955,7 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex $context_lod = []; // get the mappings for this set if any // @TODO Refactor into a Method? - $lod_mappings = $this->getKeyValueMappingsPerAmiSet($set_id); + $lod_mappings = $this->AmiLoDService->getKeyValueMappingsPerAmiSet($set_id); if ($lod_mappings) { foreach($lod_mappings as $source_column => $destination) { if (isset($context['data'][$source_column])) { @@ -1940,7 +1965,7 @@ public function processMetadataDisplay(\stdClass $data, array $additional_contex $labels = $this->getDifferentValuesfromColumnSplit($data_to_clean, 0); foreach($labels as $label) { - $lod_for_label = $this->getKeyValuePerAmiSet($label, $set_id); + $lod_for_label = $this->AmiLoDService->getKeyValuePerAmiSet($label, $set_id); if (is_array($lod_for_label) && count($lod_for_label) > 0) { foreach ($lod_for_label as $approach => $lod) { if (isset($lod['lod'])) { @@ -2015,43 +2040,6 @@ function () use ($context, $metadatadisplay_entity) { } return $jsonstring; } - public function setKeyValuePerAmiSet($label, $data, $set_id) { - // Too much trouble dealing with encodings/UTF-8 and MYSQL - // And drupal here. Simpler if the label is md5-ed - $label = md5($label); - $keyvalue_collection = 'ami_lod_temp_'. $set_id; - $this->keyValue->get($keyvalue_collection) - ->set($label, $data); - } - public function setKeyValueMappingsPerAmiSet($data, $set_id) { - $keyvalue_collection = 'ami_lod_temp_mappings'; - $this->keyValue->get($keyvalue_collection) - ->set($set_id, $data); - } - - public function getKeyValuePerAmiSet($label, $set_id) { - $label = md5($label); - $keyvalue_collection = 'ami_lod_temp_'. $set_id; - return $this->keyValue->get($keyvalue_collection) - ->get($label, NULL); - } - - public function getAllKeyValuesPerAmiSet($set_id) { - $keyvalue_collection = 'ami_lod_temp_'. $set_id; - return $this->keyValue->get($keyvalue_collection) - ->getAll(); - } - - public function getKeyValueMappingsPerAmiSet($set_id) { - $keyvalue_collection = 'ami_lod_temp_mappings'; - return $this->keyValue->get($keyvalue_collection) - ->get($set_id, NULL); - } - - public function cleanKeyValuesPerAmiSet($set_id) { - $keyvalue_collection = 'ami_lod_temp_'. $set_id; - $this->keyValue->get($keyvalue_collection)->deleteAll(); - } /** * For a given Numeric Column index, get different/non json, split values @@ -2059,6 +2047,8 @@ public function cleanKeyValuesPerAmiSet($set_id) { * @param array $data * @param int $key * + * @param array $delimiters + * * @return array */ public function getDifferentValuesfromColumnSplit(array $data, int $key, array $delimiters = ['|@|', ';'] ): array { diff --git a/src/Form/amiSetEntityDeleteForm.php b/src/Form/amiSetEntityDeleteForm.php index 8ccef07..3a3ab46 100644 --- a/src/Form/amiSetEntityDeleteForm.php +++ b/src/Form/amiSetEntityDeleteForm.php @@ -1,9 +1,15 @@ AmiLoDService = $ami_lod; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('entity.repository'), + $container->get('entity_type.bundle.info'), + $container->get('datetime.time'), + $container->get('ami.lod'), + ); + } + + public function getQuestion() { return $this->t('Are you sure you want to delete %name?', ['%name' => $this->entity->label()]); } @@ -28,8 +73,9 @@ public function getCancelUrl() { * {@inheritdoc} */ public function submitForm(array &$form, FormStateInterface $form_state) { - // @TODO We should here make sure we get rid of any files and that - // But if the queue has elements from this Set we should not be able to delete? + // @TODO We should here make sure we get rid of any files? Not good. + // If the queue has elements from this Set we should not be able to delete. + $this->AmiLoDService->cleanKeyValuesPerAmiSet($this->entity->id()); $this->entity->delete(); $this->messenger()->addMessage( diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index fe9ca12..7c1c953 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -5,14 +5,10 @@ use Drupal\ami\AmiLoDService; use Drupal\ami\AmiUtilityService; use Drupal\Component\Datetime\TimeInterface; -use Drupal\Core\Ajax\AjaxResponse; -use Drupal\Core\Ajax\OpenOffCanvasDialogCommand; use Drupal\Core\Entity\ContentEntityConfirmFormBase; -use Drupal\Core\Entity\ContentEntityForm; use Drupal\Core\Entity\EntityRepositoryInterface; use Drupal\Core\Entity\EntityTypeBundleInfoInterface; use Drupal\Core\Form\FormStateInterface; -use Drupal\Core\Messenger\MessengerInterface; use Drupal\Core\Url; use Symfony\Component\DependencyInjection\ContainerInterface; @@ -86,8 +82,7 @@ public static function create(ContainerInterface $container) { $container->get('entity_type.bundle.info'), $container->get('datetime.time'), $container->get('ami.utility'), - $container->get('ami.lod'), - $container->get('strawberryfield.utility') + $container->get('ami.lod') ); } @@ -171,7 +166,6 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#type' => 'fieldset', '#title' => $this->t('LoD reconciled Clean Up'), ]; - $access = TRUE; if ($file_lod) { $num_per_page = 10; @@ -213,7 +207,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { foreach ($column_keys as $column) { - if ($column !== 'original' && $column != 'csv_columns') { + if ($column !== 'original' && $column != 'csv_columns' && $column !='checked') { $argument_string = static::LOD_COLUMN_TO_ARGUMENTS[$column] ?? NULL; if ($argument_string) { $arguments = explode(';', $argument_string); @@ -241,7 +235,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { foreach ($file_data_all['data'] as $index => $row) { // Find the label first $label = $row[$original_index]; - $persisted_lod_reconciliation = $this->AmiUtilityService->getKeyValuePerAmiSet($label, $this->entity->id()); + $persisted_lod_reconciliation = $this->AmiLoDService->getKeyValuePerAmiSet($label, $this->entity->id()); foreach($file_data_all['headers'] as $key => $header) { if ($header == 'original' || $header == 'csv_columns') { $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ @@ -254,6 +248,14 @@ public function buildForm(array $form, FormStateInterface $form_state) { ] ]; } + elseif ($header == 'checked') { + $checked = $persisted_lod_reconciliation[$header] ?? $row[$key]; + $checked = (bool) $checked; + $form['lod_cleanup']['table-row'][($index - 1)][$header.'-'.($index-1)] = [ + '#type' => 'checkbox', + '#default_value' => $checked + ]; + } else { // Given the incremental save option we have now // We need to load check first if there is @@ -325,15 +327,17 @@ public function submitForm(array &$form, FormStateInterface $form_state) { for ($id = 1; $id <= $iterations ?? 10; $id++) { $label = $form_state->getValue('original-' . $id, NULL); $csv_columns = $form_state->getValue('csv_columns-' . $id, NULL); + $checked = $form_state->getValue('checked-' . $id, FALSE); $csv_columns = json_decode($csv_columns, TRUE); // If these do not exist, we can not process. if ($label && $csv_columns) { foreach ($column_keys as $index => $column) { - if ($column !== 'original' && $column != 'csv_columns') { + if ($column !== 'original' && $column !== 'csv_columns' && $column !== 'checked') { $lod = $form_state->getValue($column . '-' . $id, NULL); $context_data[$column]['lod'] = $lod; $context_data[$column]['columns'] = $csv_columns; - $this->AmiUtilityService->setKeyValuePerAmiSet($label, + $context_data['checked'] = $checked; + $this->AmiLoDService->setKeyValuePerAmiSet($label, $context_data, $this->entity->id()); } } @@ -389,20 +393,23 @@ public function submitFormPersistCSV(array &$form, FormStateInterface $form_stat $original_index = array_search('original', $column_keys); foreach ($file_data_all['data'] as $id => &$row) { $label = $row[$original_index]; - $persisted_lod_reconciliation = $this->AmiUtilityService->getKeyValuePerAmiSet($label, $this->entity->id()); + $persisted_lod_reconciliation = $this->AmiLoDService->getKeyValuePerAmiSet($label, $this->entity->id()); + $checked = $persisted_lod_reconciliation['checked'] ?? NULL; foreach ($file_data_all['headers'] as $index => $column) { - if ($column !== 'original' && $column != 'csv_columns') { + if ($column !== 'original' && $column !== 'csv_columns' && $column!== 'checked') { $lod = $persisted_lod_reconciliation[$column]['lod'] ?? NULL; if ($lod) { $row[$index] = json_encode($lod, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE) ?? ''; - } } + elseif ($column === 'checked') { + $row[$index] = $checked; + } } } - $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); - $success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod,NULL, TRUE); + $this->AmiUtilityService->csv_touch($file_lod->getFilename()); + $success = $this->AmiUtilityService->csv_append($file_data_all, $file_lod, NULL, TRUE); if (!$success) { $this->messenger()->addError( $this->t( diff --git a/src/Form/amiSetEntityReconcileForm.php b/src/Form/amiSetEntityReconcileForm.php index b7c32ba..50096bb 100644 --- a/src/Form/amiSetEntityReconcileForm.php +++ b/src/Form/amiSetEntityReconcileForm.php @@ -286,8 +286,11 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $file_lod = $this->entityTypeManager->getStorage('file')->load( $csv_file_processed[0]['target_id']); // Reset all values - $file_lod_id = $this->AmiUtilityService->csv_touch($file_lod->getFilename()); - } else { + if ($file_lod) { + $this->AmiUtilityService->csv_touch($file_lod->getFilename()); + } + } + else { $file_lod_id = $this->AmiUtilityService->csv_touch(); $file_lod = $file_lod_id ? $this->entityTypeManager->getStorage('file')->load( $file_lod_id) : NULL; @@ -343,7 +346,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { '#caption' => t('Unique processed values for this column'), ]; $columns = array_keys($mappings) ?? []; - $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, + $values_per_column = $this->AmiUtilityService->provideDifferentColumnValuesFromCSV($file, $columns); $inverted = []; $column_map_inverted = []; @@ -367,9 +370,11 @@ public function submitForm(array &$form, FormStateInterface $form_state) { // This will be used to fetch the right values when passing to the twig template // Could be read from the config but this is faster during process. - $this->AmiUtilityService->setKeyValueMappingsPerAmiSet($normalized_mapping, $this->entity->id()); + // Clears old values before processing new ones. - $this->AmiUtilityService->cleanKeyValuesPerAmiSet($this->entity->id()); + $this->AmiLoDService->cleanKeyValuesPerAmiSet($this->entity->id()); + $this->AmiLoDService->setKeyValueMappingsPerAmiSet($normalized_mapping, $this->entity->id()); + ksort($inverted,SORT_NATURAL); foreach($headers as &$header) { @@ -446,7 +451,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $count = count(array_filter($added)); if ($count) { $form_state->setRebuild(); - $this->submitBatch($form_state, $queue_name, $count); + $this->submitBatch($form_state, $queue_name); } } } @@ -514,14 +519,13 @@ public function ajaxColumPreview($form, FormStateInterface $form_state) { ); if ($file) { $file_data_all = $this->AmiUtilityService->csv_read($file); - $column_keys = $file_data_all['headers'] ?? []; $output = []; $output['table'] = [ '#type' => 'table', '#caption' => t('Unique processed values for this column'), ]; $column_preview = (array) $form_state->getValue(['lod_options','select_preview']) ?? []; - $values_per_column = $this->AmiLoDService->provideLoDColumnValues($file, + $values_per_column = $this->AmiUtilityService->provideDifferentColumnValuesFromCSV($file, $column_preview); $rows = $values_per_column[$form_state->getValue(['lod_options','select_preview'])] ?? ['Emtpy Column']; sort($rows, SORT_STRING); @@ -544,46 +548,5 @@ public function ajaxColumPreview($form, FormStateInterface $form_state) { } return $response; } - - /** - * AJAX callback. - */ - public function ajaxLoDPreview($form, FormStateInterface $form_state) { - return $form['lod_cleanup']; - - - - $response = new AjaxResponse(); - $form['#attached']['library'][] = 'core/drupal.dialog.off_canvas'; - $response->setAttachments($form['#attached']); - - if (!empty($form_state->getValue(['edit']))) { - $entity = $form_state->getFormObject()->getEntity(); - $csv_file_reference = $entity->get('source_data')->getValue(); - if (isset($csv_file_reference[0]['target_id'])) { - /** @var \Drupal\file\Entity\File $file */ - $file = $this->entityTypeManager->getStorage('file')->load( - $csv_file_reference[0]['target_id'] - ); - if ($file) { - $form = \Drupal::service('entity.form_builder')->getForm($entity, 'editreconcile', []);; - } - $response->addCommand(new OpenOffCanvasDialogCommand(t('Lod for @label', [ - '@label' => $this->entity->label(), - ]), - $form, ['width' => '70%'])); - if ($form_state->getErrors()) { - // Clear errors so the user does not get confused when reloading. - \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_ERROR); - $form_state->clearErrors(); - } - } - } - return $response; - } - - - - } diff --git a/src/Plugin/QueueWorker/LoDQueueWorker.php b/src/Plugin/QueueWorker/LoDQueueWorker.php index f9cd766..6305f40 100644 --- a/src/Plugin/QueueWorker/LoDQueueWorker.php +++ b/src/Plugin/QueueWorker/LoDQueueWorker.php @@ -192,10 +192,10 @@ public function processItem($data) { $context_data[$lod_route_column_name]['columns'] = $data->info['csv_columns']; } - $this->AmiUtilityService->csv_append($newdata, $file_lod,NULL, FALSE); + $this->AmiUtilityService->csv_append($newdata, $file_lod, NULL, FALSE); // Sets the same data, per label (as key) into keystore so we can fetch it as Twig Context when needed. //@TODO also do similar if going for a "direct" in that case we replace the columns found in the original data - $this->AmiUtilityService->setKeyValuePerAmiSet($data->info['label'], $context_data, $data->info['set_id']); + $this->AmiLoDService->setKeyValuePerAmiSet($data->info['label'], $context_data, $data->info['set_id']); } } From f280dad7662a3aba44a91108dfb91fa574c4779a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 27 Sep 2021 16:05:18 -0400 Subject: [PATCH 24/42] Fix Batch Size Issue capping to hundreds and not respecting exact row numbers @alliomeria when you come back. Solr exact fetch is fixed, also fixes the real time report to the number of rows requested. Tested on collapsed/uncollapsed and with offsets to check if a set of consequent harvests with offsets was correct and not missing anything. All checks out! --- src/Plugin/ImporterAdapter/SolrImporter.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 81e706e..163ff57 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -880,7 +880,7 @@ protected function getDataChildren(array $config, SolariumClient $client, string $query->addSort("RELS_EXT_isPageNumber_literal_intDerivedFromString_l", 'asc'); $query->createFilterQuery('constituent')->setQuery('RELS_EXT_isConstituentOf_uri_ms:'.$escaped .' OR RELS_EXT_isPageOf_uri_ms:'.$escaped .' OR RELS_EXT_isMemberOf_uri_ms:'.$escaped ); $query->setQuery('*:*'); - $query->setStart(0)->setRows(3000); + $query->setStart(0)->setRows(5000); $query->setFields([ 'PID', 'fgs_label_s', @@ -1090,9 +1090,12 @@ public static function fetchBatch(array $config, ImporterPluginAdapterInterface } $context['finished'] = 0; try { + // Incremente constantly by static::BATCH_INCREMENTS except when what is left < static::BATCH_INCREMENTS + $next_increment = ($context['sandbox']['progress'] + $increment > $rows) ? ($rows - $context['sandbox']['progress']) : $increment; + $title = t('Processing %progress of %count', [ '%count' => $rows, - '%progress' => $context['sandbox']['progress'] + $increment + '%progress' => $context['sandbox']['progress'] + $next_increment ]); $context['message'] = $title; // WE keep track in the AMI set Config of the previous total rows @@ -1105,7 +1108,7 @@ public static function fetchBatch(array $config, ImporterPluginAdapterInterface $config['headers'] = !empty($amisetdata->column_keys) ? $amisetdata->column_keys : (!empty($config['headers']) ? $config['headers'] : []); $config['headerswithdata'] = $context['results']['processed']['headerswithdata'] ?? []; $data = $plugin_instance->getData($config, $context['sandbox']['progress'] + $offset, - $increment); + $next_increment); if ($data['totalrows'] == 0) { $context['finished'] = 1; } From 58b08112118d2a0756b23ee5338cd1db13f92f54 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 30 Sep 2021 15:37:00 -0400 Subject: [PATCH 25/42] replace EntityChangedActionDeriver to avoid other entities to pop on the options This is basically the same as setting type = "node" but allows us in the future to also allow certain Actions to run on e.g AMI Sets @alliomeria how to test? - git pull this branch - Clear Caches - Edit your Search and replace View. Remove the "Global: Views bulk operations" field (instead of installing/reinstalling the module, this seems to be the best option a.k.a as TRICK to make actions show again) - Add it again. The list should be refreshed. Check the ones you want to use - Save the view - test! --- .../Action/AmiStrawberryfieldCSVexport.php | 2 +- .../Action/AmiStrawberryfieldJsonAsText.php | 2 +- .../AmiStrawberryfieldJsonAsWebform.php | 2 +- .../Action/AmiStrawberryfieldJsonPatch.php | 2 +- .../Derivative/EntitySbfActionDeriver.php | 23 +++++++++++++++++++ 5 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 src/Plugin/Action/Derivative/EntitySbfActionDeriver.php diff --git a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php index 9a7f47b..e406d03 100644 --- a/src/Plugin/Action/AmiStrawberryfieldCSVexport.php +++ b/src/Plugin/Action/AmiStrawberryfieldCSVexport.php @@ -31,7 +31,7 @@ * id = "entity:ami_csvexport_action", * action_label = @Translation("Export Archipelago Digital Objects to CSV"), * category = @Translation("AMI Metadata"), - * deriver = "Drupal\Core\Action\Plugin\Action\Derivative\EntityChangedActionDeriver", + * deriver = "Drupal\ami\Plugin\Action\Derivative\EntitySbfActionDeriver", * type = "node", * pass_context = TRUE, * pass_view = TRUE, diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php index 54faf4e..94f5e77 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php @@ -23,7 +23,7 @@ * id = "entity:ami_jsontext_action", * action_label = @Translation("Text based find and replace Metadata for Archipelago Digital Objects"), * category = @Translation("AMI Metadata"), - * deriver = "Drupal\Core\Action\Plugin\Action\Derivative\EntityChangedActionDeriver", + * deriver = "Drupal\ami\Plugin\Action\Derivative\EntitySbfActionDeriver", * type = "node", * confirm = "true" * ) diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php index 3c29d21..580d12c 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php @@ -25,7 +25,7 @@ * id = "entity:ami_jsonwebform_action", * action_label = @Translation("Webform based find and replace Metadata for Archipelago Digital Objects"), * category = @Translation("AMI Metadata"), - * deriver = "Drupal\Core\Action\Plugin\Action\Derivative\EntityChangedActionDeriver", + * deriver = "Drupal\ami\Plugin\Action\Derivative\EntitySbfActionDeriver", * type = "node", * confirm = "true" * ) diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonPatch.php b/src/Plugin/Action/AmiStrawberryfieldJsonPatch.php index c691e7d..7504dec 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonPatch.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonPatch.php @@ -17,7 +17,7 @@ * id = "entity:ami_jsonpatch_action", * action_label = @Translation("JSON Patch Metadata for Archipelago Digital Objects"), * category = @Translation("AMI Metadata"), - * deriver = "Drupal\Core\Action\Plugin\Action\Derivative\EntityChangedActionDeriver", + * deriver = "Drupal\ami\Plugin\Action\Derivative\EntitySbfActionDeriver", * type = "node", * confirm = "true" * ) diff --git a/src/Plugin/Action/Derivative/EntitySbfActionDeriver.php b/src/Plugin/Action/Derivative/EntitySbfActionDeriver.php new file mode 100644 index 0000000..84bdb25 --- /dev/null +++ b/src/Plugin/Action/Derivative/EntitySbfActionDeriver.php @@ -0,0 +1,23 @@ +entityClassImplements(NodeInterface::class); + } + +} From 31e70a797a9ff6745cc3968a4f9532ef7d08544a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 4 Oct 2021 20:53:31 -0400 Subject: [PATCH 26/42] This one was driving me crazy. Stuck "Direct" for ever Happens that i uncommented a line i had commented because i used to be smart. Not anymore. If you add a #name property to an AJAX driven form with select, the original value gets cached for ever and never changes even if you submit it differently. How do i know this? Just because i tested it. There is NO documentation. Wonder if that is also the issue with Webform based find and replace?? --- src/Form/AmiMultiStepIngest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Form/AmiMultiStepIngest.php b/src/Form/AmiMultiStepIngest.php index 37274d3..531395f 100644 --- a/src/Form/AmiMultiStepIngest.php +++ b/src/Form/AmiMultiStepIngest.php @@ -224,8 +224,8 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#description' => t('Choose your transformation option'), '#open' => TRUE, // Controls the HTML5 'open' attribute. Defaults to FALSE. ]; + // NEVER ADD A #NAME To dynamic/ajax select. It will get stuck in its default value. $form['ingestsetup']['custommapping'][$type]['metadata'] = [ - '#name' => 'metadata_'.$machine_type, '#type' => 'select', '#title' => $this->t('Select the data transformation approach for @type', ['@type' => $type]), '#default_value' => isset($mapping['custommapping_settings'][$type]['metadata']) ? $mapping['custommapping_settings'][$type]['metadata'] : (key($metadata) ?? NULL), From 9a7aecc08780f290ea9735da3b68461dd6f078a9 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 4 Oct 2021 20:55:28 -0400 Subject: [PATCH 27/42] Consistently use only lower case columns Internally our CSV to data structure is already normalized for lower case (we need to document this better). So for Solr import, also inmediatelly set the column names to lower case and in general enforce checking against lowercases when doing validation. Does not require any change really on Twig templates because for Twig "data.HoLa" is the same as "data.hola". Just saying @alliomeria --- src/AmiUtilityService.php | 2 + src/Plugin/ImporterAdapter/SolrImporter.php | 41 ++++++++++----------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 7df3c58..1bdc109 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -1637,6 +1637,8 @@ protected function validateAmiSet(array $file_data_all, \stdClass $data, $strict // But safer to check both in case someone manually edited the set. $required_headers = array_merge($required_headers, array_values((array)$data->column_keys)); } + // We use internally Lower case Headers. + $required_headers = array_map('strtolower', $required_headers); $headers_missing = array_diff(array_unique($required_headers), $file_data_all['headers']); if (count($headers_missing)) { $message = $this->t( diff --git a/src/Plugin/ImporterAdapter/SolrImporter.php b/src/Plugin/ImporterAdapter/SolrImporter.php index 163ff57..a1de974 100644 --- a/src/Plugin/ImporterAdapter/SolrImporter.php +++ b/src/Plugin/ImporterAdapter/SolrImporter.php @@ -528,7 +528,7 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 $headers['type'] = 'type'; $headers['ismemberof'] = 'ismemberof'; $headers['ispartof'] = 'ispartof'; - + $allheaders_array = array_map('strtolower', $allheaders_array); foreach ($allheaders_array as $headerkey) { $headers[$headerkey] = $headerkey; } @@ -560,7 +560,7 @@ public function getInfo(array $config, FormStateInterface $form_state, $page = 0 foreach (static::FILE_COLUMNS as $column) { $sp_data[$resultset_iterator->key()][$column] = ''; } - $sp_data[$resultset_iterator->key()]['type'] = $config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['RELS_EXT_hasModel_uri']] ?? 'Thing'; + $sp_data[$resultset_iterator->key()]['type'] = $config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['rels_ext_hasmodel_uri']] ?? 'Thing'; } catch (\Exception $exception) { continue; @@ -760,7 +760,7 @@ public function getData(array $config, $page = 0, $per_page = 20): array { if ($parent_ado) { $sp_data[$resultset_iterator->key()]['ismemberof'] = $parent_ado; } - $sp_data[$resultset_iterator->key()]['type'] = $config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['RELS_EXT_hasModel_uri']] ?? 'Thing'; + $sp_data[$resultset_iterator->key()]['type'] = $config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['rels_ext_hasmodel_uri']] ?? 'Thing'; $datastream = $this->buildDatastreamURL($config, $document); if (count($datastream)) { @@ -768,8 +768,8 @@ public function getData(array $config, $page = 0, $per_page = 20): array { $sp_data[$resultset_iterator->key()][key($datastream)] = $first_datastream; } // Fetch Children - if (in_array($sp_data[$resultset_iterator->key()]['RELS_EXT_hasModel_uri'], static::MULTICHILDREN_CMODELS)) { - $pids_to_fetch[$resultset_iterator->key()] = $sp_data[$resultset_iterator->key()]['PID']; + if (in_array($sp_data[$resultset_iterator->key()]['rels_ext_hasmodel_uri'], static::MULTICHILDREN_CMODELS)) { + $pids_to_fetch[$resultset_iterator->key()] = $sp_data[$resultset_iterator->key()]['pid']; } } catch (\Exception $exception) { continue; @@ -917,27 +917,20 @@ protected function getDataChildren(array $config, SolariumClient $client, string foreach ($document as $field => $value) { $fieldname = $this->multipleToSingleFieldName($field); // Exclude this non-sense fields - if (strpos($field, '_roleTerm_', 0) !== FALSE) { + if (strpos($fieldname, '_roleTerm_', 0) !== FALSE) { continue; } if ($this->endsWith($fieldname, 'authority_marcrelator')) { continue; } - // this converts multi valued fields to a comma-separated string - foreach (static::SOLR_FIELD_SUFFIX as $suffix) { - $suffix_offset = strpos($field , $suffix , strlen($field) - strlen($suffix) -1); - if ($suffix_offset!== false) { - $fieldname = substr($field, 0, $suffix_offset); - break 1; - } - } + $headers[$fieldname] = $fieldname; if (is_array($value)) { if (!empty($sp_data[$resultset_iterator->key()][$fieldname])) { - $original_value = explode(' |@| ', $sp_data[$resultset_iterator->key()][$fieldname]) ?? []; + $original_value = explode('|@|', $sp_data[$resultset_iterator->key()][$fieldname]) ?? []; $value = array_unique(array_merge($original_value, $value)); } - $value = implode(' |@| ', array_unique($value)); + $value = implode('|@|', array_unique($value)); } $sp_data[$resultset_iterator->key()][$fieldname] = $value; } @@ -947,8 +940,8 @@ protected function getDataChildren(array $config, SolariumClient $client, string $sp_data[$resultset_iterator->key()][$column] = ''; } // Try with both main mapping and children mapping - $type = $config['solarium_mapping']['cmodel_children'][$sp_data[$resultset_iterator->key()]['RELS_EXT_hasModel_uri']] ?? NULL; - $type2 = $type ?? ($config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['RELS_EXT_hasModel_uri']] ?? NULL); + $type = $config['solarium_mapping']['cmodel_children'][$sp_data[$resultset_iterator->key()]['rels_ext_hasmodel_uri']] ?? NULL; + $type2 = $type ?? ($config['solarium_mapping']['cmodel_mapping'][$sp_data[$resultset_iterator->key()]['rels_ext_hasmodel_uri']] ?? NULL); $sp_data[$resultset_iterator->key()]['type'] = $type2 ?? 'Thing'; // Get me the datastream $datastream = $this->buildDatastreamURL($config, $document); @@ -996,6 +989,7 @@ protected function buildDatastreamURL(array $config, \Solarium\QueryType\Select\ /** * This function normalizes field names to join _ms, _s etc without prefixes. * + * Also lower cases every field name. * @param $field * * @return false|string @@ -1009,7 +1003,7 @@ protected function multipleToSingleFieldName($field) { break 1; } } - return $field; + return strtolower($field); } @@ -1030,6 +1024,8 @@ protected function endsWith($haystack, $needle) { /** * Implodes array and concatenates to existing string using common delimiter. * + * Will also remove whitespaces from start/end of each value. + * * @param array $value * @param string|null $oldvalue * @@ -1037,10 +1033,11 @@ protected function endsWith($haystack, $needle) { */ protected function concatValues(array $value, string $original_value = NULL): string { if (!empty($oldvalue)) { - $original_value = explode(' |@| ', $original_value) ?? []; + $original_value = explode('|@|', $original_value) ?? []; $value = array_unique(array_merge($original_value, $value)); } - return implode(' |@| ', array_unique($value)); + $value = array_map('trim', $value); + return implode('|@|', array_unique($value)); } @@ -1163,7 +1160,7 @@ public function provideTypes(array $config, array $data): array { public function provideKeys(array $config, array $data): array { if (count($data) > 0) { - $columns = array_merge(['type','node_uuid','ismemberof','ispartof','fgs_label','mods_titleInfo_title'], static::FILE_COLUMNS); + $columns = array_merge(['type','node_uuid','ismemberof','ispartof','fgs_label','mods_titleinfo_title'], static::FILE_COLUMNS); return $columns; } return []; From 19ea03b39ffbbd4a619da31e88ecd88a2b181746 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 12 Oct 2021 11:25:28 -0400 Subject: [PATCH 28/42] Small typo on Docs --- src/AmiUtilityService.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 1bdc109..a9217ff 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -177,7 +177,7 @@ class AmiUtilityService { * @param \Drupal\Component\Transliteration\TransliterationInterface $transliteration * @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory - * @param StrawberryfieldUtilityService $strawberryfield_utility_service , + * @param StrawberryfieldUtilityService $strawberryfield_utility_service * @param \Drupal\Core\Entity\EntityFieldManagerInterface $entity_field_manager * @param \Drupal\Core\Entity\EntityTypeBundleInfoInterface $entity_type_bundle_info * @param \GuzzleHttp\ClientInterface $http_client From fffc05903c458a75432ba9804ed4aad1abc82b80 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 19 Oct 2021 11:12:32 -0400 Subject: [PATCH 29/42] Small fail safe update in case the queue fails for not-us reasons --- src/AmiBatchQueue.php | 16 +++++++++++----- src/Form/amiSetEntityProcessForm.php | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/AmiBatchQueue.php b/src/AmiBatchQueue.php index f55958d..738f8db 100644 --- a/src/AmiBatchQueue.php +++ b/src/AmiBatchQueue.php @@ -136,11 +136,17 @@ public static function finish($success, $results, $operations) { ) ); } - // Cleanup and remove the queue. This is a live batch operation. - /** @var \Drupal\Core\Queue\QueueFactory $queue_factory */ - $queue_name = $results['queue_name']; - $queue_factory = \Drupal::service('queue'); - $queue_factory->get($queue_name)->deleteQueue(); + // If the queue fails for whatever reason the $context may be lost + if (isset($results['queue_name'])) { + // Cleanup and remove the queue. This is a live batch operation. + /** @var \Drupal\Core\Queue\QueueFactory $queue_factory */ + $queue_name = $results['queue_name']; + $queue_factory = \Drupal::service('queue'); + $queue_factory->get($queue_name)->deleteQueue(); + } + else { + \Drupal::messenger()->addError(\Drupal::translation('The Batch Operation failed. Please check your logs, available Filesystem space and try again')); + } } } diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 116b01b..818096f 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -129,7 +129,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { $notprocessnow = $form_state->getValue('not_process_now', NULL); $queue_name = 'ami_ingest_ado'; if (!$notprocessnow) { - // This queues have no queue workers. That is intended since they + // These queues have no queue workers. That is intended since they // are always processed by the ami_ingest_ado one manually. $queue_name = 'ami_ingest_ado_set_' . $this->entity->id(); \Drupal::queue($queue_name, TRUE)->createQueue(); From d71bb086c23946ec217ced8b6e413cb2cef4cfa6 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 3 Nov 2021 15:03:56 -0400 Subject: [PATCH 30/42] Make ZIP upload field Private instead of tmp all ZIP files will go eventually (once the AMI set entity is saved or when uploaded directly via "Edit" of a Set, into private://ami/zip --- ami.install | 19 +++++++++++++++++-- src/Entity/amiSetEntity.php | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ami.install b/ami.install index ff7e39f..1214a3c 100644 --- a/ami.install +++ b/ami.install @@ -53,7 +53,9 @@ function ami_update_8902() { ->setDescription(new TranslatableMarkup('A Zip file containing accompanying Files for the Source Data')) ->setSetting('file_extensions', 'zip') ->setSetting('upload_validators', $validators) - ->setRequired(TRUE) + ->setSetting('uri_scheme', 'private') + ->setSetting('file_directory', '/ami/zip') + ->setRequired(FALSE) ->setDisplayOptions('view', [ 'label' => 'above', 'type' => 'file', @@ -100,4 +102,17 @@ function ami_update_8903() { "plugin_configuration" ], ])); -} \ No newline at end of file +} + +/** + * Update 8903 - Make Private default upload location for ami Set ZIP files. + */ +function ami_update_8904() { + $field_storage_definition = \Drupal::entityDefinitionUpdateManager()->getFieldStorageDefinition('zip_file', 'ami_set_entity'); + $field_storage_definition->setSetting('uri_scheme', 'private'); + $field_storage_definition->setSetting('file_directory', '/ami/zip'); + \Drupal::entityDefinitionUpdateManager()->updateFieldStorageDefinition($field_storage_definition); +} + + + diff --git a/src/Entity/amiSetEntity.php b/src/Entity/amiSetEntity.php index 9b7bca1..fba5679 100644 --- a/src/Entity/amiSetEntity.php +++ b/src/Entity/amiSetEntity.php @@ -390,6 +390,8 @@ public static function baseFieldDefinitions(EntityTypeInterface $entity_type) { ->setLabel(t('Attached ZIP file')) ->setDescription(t('A Zip file containing accompanying Files for the Source Data')) ->setSetting('file_extensions', 'zip') + ->setSetting('uri_scheme', 'private') + ->setSetting('file_directory', '/ami/zip') ->setSetting('upload_validators', $validatorszip) ->setRequired(FALSE) ->setDisplayOptions('view', [ From d9ba478139f7c33f6df78e34b19ce69766a256e6 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 3 Nov 2021 15:06:19 -0400 Subject: [PATCH 31/42] New thing for me Since we can not predict the final extension of a download upfront without download i check now using gob() for the first part of the possible filename. If there is already a single (only a single one) File with the same starting future name (which is consistent since we use the URL of the file to generate it) then we reuse the already downloaded one. Will add also a button that deletes all files for an AMI set in the future and one that forces the download even if there. Also this piece of code ensures ZIP files (After first upload) are moved into private://ami/zip once the AMI set is saved --- src/AmiUtilityService.php | 52 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index a9217ff..635e0a5 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -336,15 +336,28 @@ public function file_get($uri, File $zip_file = NULL) { } } else { - // This is remote! + // This may be remote! // Simulate what could be the final path of a remote download. // to avoid re downloading. $localfile = file_build_uri( $this->fileSystem->basename($parsed_url['path']) ); + $md5uri = md5($uri); + $path = str_replace( + '///', + '//', + "{$destination}/" + ) . $md5uri . '_' . $this->fileSystem->basename( + $parsed_url['path'] + ); + if ($isthere = glob($this->fileSystem->realpath($path).'.*')) { +// Ups its here + if (count($isthere) == 1) { + $localfile = $isthere[0]; + } + } + // Actual remote heavy lifting only if not present. if (!file_exists($localfile)) { - // Actual remote heavy lifting only if not present. - if (!$this->fileSystem->prepareDirectory( $destination, FileSystemInterface::CREATE_DIRECTORY @@ -1342,6 +1355,7 @@ public function createAmiSet(\stdClass $data) { 'column_keys' => $data->column_keys, 'total_rows' => $data->total_rows, ]; + $zipfail = FALSE; $name = $data->name ?? 'AMI Set of ' . $current_user_name; $jsonvalue = json_encode($set, JSON_PRETTY_PRINT); /* @var \Drupal\ami\Entity\amiSetEntity $entity */ @@ -1354,10 +1368,40 @@ public function createAmiSet(\stdClass $data) { $entity->set('status', 'ready'); try { $result = $entity->save(); + // Now ensure we move the Zip file if any to private + if ($this->streamWrapperManager->isValidScheme('private') && $data->zip) { + $target_directory = 'private://ami/zip'; + // Ensure the directory + if (!$this->fileSystem->prepareDirectory( + $target_directory, + FileSystemInterface::CREATE_DIRECTORY + | FileSystemInterface::MODIFY_PERMISSIONS + )) { + $zipfail = TRUE; + } + else { + $zipfile = $this->entityTypeManager->getStorage('file') + ->load($data->zip); + if (!$zipfile) { + $zipfail = TRUE; + } else { + $zipfile = file_move($zipfile, $target_directory, FileSystemInterface::EXISTS_REPLACE); + if (!$zipfile) { + $zipfail = TRUE; + } + } + } + } + if ($zipfail) { + $this->messenger()->addError( + $this->t( + 'ZIP file attached to Ami Set entity could not be moved to Private storage. Please check with your system admin if you have permissions.', + )); + } } catch (\Exception $exception) { $this->messenger()->addError( - t( + $this->t( 'Ami Set entity Failed to be persisted because of @message', ['@message' => $exception->getMessage()] ) From 2a470a43db090af75a770a6976040bfa0455ab54 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 3 Nov 2021 18:19:58 -0400 Subject: [PATCH 32/42] Files distributed across multiple Queue Entries First pass And first pass works, like a charm! Ok, need to add 1.- Config entry to set "what is many files" and what not. 2.- Better reuse of as:technical metadata, so we do not reprocess that (file re use after download is already in place). Probably the best thing ever here. @aksm @alliomeria --- src/Form/amiSetEntityProcessForm.php | 1 + .../QueueWorker/IngestADOQueueWorker.php | 158 ++++++++++++++++-- 2 files changed, 142 insertions(+), 17 deletions(-) diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 818096f..9f2ff99 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -149,6 +149,7 @@ public function submitForm(array &$form, FormStateInterface $form_state) { 'status' => $statuses, 'set_url' => $SetURL, 'attempt' => 1, + 'queue_name' => $queue_name, ]; $added[] = \Drupal::queue($queue_name) ->createItem($data); diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index b29ee2f..75a8c7f 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -10,12 +10,14 @@ use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\Core\Queue\QueueWorkerBase; use Drupal\Core\StringTranslation\StringTranslationTrait; +use Drupal\strawberryfield\StrawberryfieldFilePersisterService; use Drupal\strawberryfield\StrawberryfieldUtilityService; use Symfony\Component\DependencyInjection\ContainerInterface; use Swaggest\JsonDiff\JsonDiff; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use Swaggest\JsonDiff\Exception as JsonDiffException; use Swaggest\JsonDiff\JsonPatch; +use \Drupal\Core\TempStore\PrivateTempStoreFactory; /** * Processes and Ingests each AMI Set CSV row. @@ -67,6 +69,18 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl */ protected $AmiLoDService; + /** + * The Strawberryfield File Persister Service + * + * @var \Drupal\strawberryfield\StrawberryfieldFilePersisterService + */ + protected $strawberryfilepersister; + + /** + * @var \Drupal\user\PrivateTempStore + */ + protected $store; + /** * Constructor. * @@ -79,6 +93,8 @@ class IngestADOQueueWorker extends QueueWorkerBase implements ContainerFactoryPl * @param \Drupal\ami\AmiUtilityService $ami_utility * @param \Drupal\ami\AmiLoDService $ami_lod * @param \Drupal\Core\Messenger\MessengerInterface $messenger + * @param \Drupal\strawberryfield\StrawberryfieldFilePersisterService $strawberry_filepersister + * @param \Drupal\Core\TempStore\PrivateTempStoreFactory $temp_store_factory */ public function __construct( array $configuration, @@ -89,7 +105,10 @@ public function __construct( StrawberryfieldUtilityService $strawberryfield_utility_service, AmiUtilityService $ami_utility, AmiLoDService $ami_lod, - MessengerInterface $messenger + MessengerInterface $messenger, + StrawberryfieldFilePersisterService $strawberry_filepersister, + PrivateTempStoreFactory $temp_store_factory + ) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->entityTypeManager = $entity_type_manager; @@ -98,6 +117,8 @@ public function __construct( $this->AmiUtilityService = $ami_utility; $this->messenger = $messenger; $this->AmiLoDService = $ami_lod; + $this->strawberryfilepersister = $strawberry_filepersister; + $this->store = $temp_store_factory->get('ami_queue_worker_file'); } /** @@ -125,7 +146,10 @@ public static function create( $container->get('strawberryfield.utility'), $container->get('ami.utility'), $container->get('ami.lod'), - $container->get('messenger') + $container->get('messenger'), + $container->get('strawberryfield.file_persister'), + $container->get('tempstore.private'), + ); } @@ -133,16 +157,38 @@ public static function create( * {@inheritdoc} */ public function processItem($data) { - /* Data info has this structure + /* Data info for an ADO has this structure $data->info = [ 'row' => The actual data 'set_id' => The Set id 'uid' => The User ID that processed the Set 'set_url' => A direct URL to the set. 'attempt' => The number of attempts to process. We always start with a 1 + 'zip_file' => File ID of a zip file if a any + 'waiting_for_files' => will only exist and TRUE if we re-enqueued this ADO after figuring out we had too many Files. + 'queue_name' => because well ... we use Hydroponics too + ]; + */ + /* Data info for a File has this structure + $data->info = [ + 'set_id' => The Set id + 'uid' => The User ID that processed the Set + 'attempt' => The number of attempts to process. We always start with a 1 + 'filename' => The File name + 'file_column' => The File column where the file needs to be saved. + 'zip_file' => File ID of a zip file if a any, + 'processed_row' => Full metadata of the ADO holding the file processed and ready as an array + 'queue_name' => because well ... we use Hydroponics too ]; */ + // This will simply go to an alternate processing on this same Queue Worker + // Just for files. + if (!empty($data->info['filename']) && !empty($data->info['file_column']) && !empty($data->info['processed_row'])) { + $this->processFile($data); + return; + } + // Before we do any processing. Check if Parent(s) exists? // If not, re-enqueue: we try twice only. Should we try more? $parent_nodes = []; @@ -161,7 +207,7 @@ public function processItem($data) { // Pushing to the end of the queue. $data->info['attempt']++; if ($data->info['attempt'] < 3) { - \Drupal::queue('ami_ingest_ado') + \Drupal::queue($data->info['queue_name']) ->createItem($data); return; } @@ -271,6 +317,8 @@ public function processItem($data) { foreach ($parent_nodes as $parent_property => $node_ids) { $processed_metadata[$parent_property] = $node_ids; } + $processed_files = 0; + $process_files_via_queue = FALSE; // Now do heavy file lifting foreach($file_columns as $file_column) { @@ -282,20 +330,69 @@ public function processItem($data) { // From the template neither. // @TODO ask users. $processed_metadata[$file_column] = []; - foreach($filenames as $filename) { - $file = $this->AmiUtilityService->file_get($filename, $data->info['zip_file']); - if ($file) { - $processed_metadata[$file_column][] = (int) $file->id(); - } - else { - $this->messenger->addWarning($this->t('Sorry, for ADO with UUID:@uuid, File @filename at column @filecolumn was not found. Skipping. Please check your CSV for set @setid.',[ - '@uuid' => $data->info['row']['uuid'], - '@setid' => $data->info['set_id'], - '@filename' => $filename, - '@filecolumn' => $file_column, - ])); - } + + // Now the hard part. Do we have too many files? + $file_limit = 10; + if ((count($filenames) + $processed_files > $file_limit) && empty($data->info['waiting_for_files'])) { + // We will add future files to the queue... + // accumulating all the ones we need + // and at the end + // re-enque this little one + $process_files_via_queue = TRUE; } + foreach ($filenames as $filename) { + if (!empty($data->info['waiting_for_files'])) { + $processed_file_data = $this->store->get('set_' . $data->info['set_id'] . '-' . md5($filename)); + if (!empty($processed_file_data['as_data']) && !empty($processed_file_data['file_id'])) { + $processed_metadata[$file_column][] = (int) $processed_file_data['file_id']; + } + } + else { + if ($process_files_via_queue) { + $data_file = new \stdClass(); + $data_file->info = [ + 'zip_file' => $data->info['zip_file'], + 'set_id' => $data->info['set_id'], + 'uid' => $data->info['uid'], + 'processed_row' => $processed_metadata, + 'file_column' => $file_column, + 'filename' => $filename, + 'attempt' => 1, + 'queue_name' => $data->info['queue_name'], + ]; + \Drupal::queue($data->info['queue_name']) + ->createItem($data_file); + } + else { + $file = $this->AmiUtilityService->file_get($filename, + $data->info['zip_file']); + + if ($file) { + $processed_files++; + $processed_metadata[$file_column][] = (int) $file->id(); + } + else { + $this->messenger->addWarning($this->t('Sorry, for ADO with UUID:@uuid, File @filename at column @filecolumn was not found. Skipping. Please check your CSV for set @setid.', + [ + '@uuid' => $data->info['row']['uuid'], + '@setid' => $data->info['set_id'], + '@filename' => $filename, + '@filecolumn' => $file_column, + ])); + } + } + } + } + + } + if ($process_files_via_queue) { + // If so we need to push this one to the end.. + // Reset the attempts + $data->info['waiting_for_files'] = TRUE; + $data->info['attempt'] = 0; + \Drupal::queue($data->info['queue_name']) + ->createItem($data); + return; } } @@ -556,4 +653,31 @@ protected function patchJson(array $original, array $new) { //error_log(print_r($r->getMergePatch(),true)); } } + + /** + * Processes a File and technical metadata to avoid congestion. + * + * @param mixed $data + */ + protected function processFile($data) { + $file = $this->AmiUtilityService->file_get($data->info['filename'], + $data->info['zip_file']); + if ($file) { + $processedAsValuesForKey = $this->strawberryfilepersister + ->generateAsFileStructure( + [$file->id()], + $data->info['file_column'], + $data->info['processed_row'] + ); + $data_to_store['as_data'] = $processedAsValuesForKey; + $data_to_store['file_id'] = $file->id(); + $this->store->set('set_'.$data->info['set_id'].'-'.md5($data->info['filename']), $data_to_store); + } + else { + $this->messenger->addWarning($this->t('Sorry, we really tried to process File @filename from Set @setid yet. Giving up',[ + '@setid' => $data->info['set_id'], + '@filename' => $data->info['filename'] + ])); + } + } } From 45815699d7d411fe76844b3d24ef55ad7494c187 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 4 Nov 2021 10:25:48 -0400 Subject: [PATCH 33/42] This looks better @alliomeria can you test with the same CSV with 387 objects? And please twice? First time may be slow-ish, second time quite fast --- src/AmiBatchQueue.php | 10 ++++- .../QueueWorker/IngestADOQueueWorker.php | 45 +++++++++++-------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/AmiBatchQueue.php b/src/AmiBatchQueue.php index 738f8db..ebf6414 100644 --- a/src/AmiBatchQueue.php +++ b/src/AmiBatchQueue.php @@ -53,7 +53,15 @@ public static function takeOne(string $queue_name, string $set_id, array &$conte try { // Only process Items of this Set if $context['set_id'] is set. if ($item = $queue->claimItem()) { - $ado_title = isset($item->data->info['row']['uuid']) ? 'ADO with UUID '.$item->data->info['row']['uuid'] : 'Unidentifed ADO without UUID'; + // Let's figure out the type of queue running here, ADO or Attached file + if (!empty($item->data->info['filename']) && !empty($item->data->info['file_column']) && !empty($item->data->info['processed_row'])) { + $ado_title = 'File ' . $item->data->info['filename']; + $ado_title .= isset($item->data->info['uuid']) ? ' for ADO with UUID ' . $item->data->info['uuid'] : 'for Unidentifed ADO without UUID '; + } + else { + $ado_title = isset($item->data->info['row']['uuid']) ? 'ADO with UUID ' . $item->data->info['row']['uuid'] : 'Unidentifed ADO without UUID'; + } + $title = t('For %name processing %adotitle, %count items remaining', [ '%name' => $context['results']['queue_label'], '%adotitle' => $ado_title, diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 75a8c7f..9bdcded 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -173,6 +173,7 @@ public function processItem($data) { $data->info = [ 'set_id' => The Set id 'uid' => The User ID that processed the Set + 'uuid' => The uuid of the ADO that needs this file 'attempt' => The number of attempts to process. We always start with a 1 'filename' => The File name 'file_column' => The File column where the file needs to be saved. @@ -345,6 +346,7 @@ public function processItem($data) { $processed_file_data = $this->store->get('set_' . $data->info['set_id'] . '-' . md5($filename)); if (!empty($processed_file_data['as_data']) && !empty($processed_file_data['file_id'])) { $processed_metadata[$file_column][] = (int) $processed_file_data['file_id']; + $processed_metadata = array_merge_recursive($processed_metadata, (array) $processed_file_data['as_data']); } } else { @@ -359,6 +361,7 @@ public function processItem($data) { 'filename' => $filename, 'attempt' => 1, 'queue_name' => $data->info['queue_name'], + 'uuid' => $data->info['row']['uuid'] ]; \Drupal::queue($data->info['queue_name']) ->createItem($data_file); @@ -660,24 +663,30 @@ protected function patchJson(array $original, array $new) { * @param mixed $data */ protected function processFile($data) { - $file = $this->AmiUtilityService->file_get($data->info['filename'], - $data->info['zip_file']); - if ($file) { - $processedAsValuesForKey = $this->strawberryfilepersister - ->generateAsFileStructure( - [$file->id()], - $data->info['file_column'], - $data->info['processed_row'] - ); - $data_to_store['as_data'] = $processedAsValuesForKey; - $data_to_store['file_id'] = $file->id(); - $this->store->set('set_'.$data->info['set_id'].'-'.md5($data->info['filename']), $data_to_store); - } - else { - $this->messenger->addWarning($this->t('Sorry, we really tried to process File @filename from Set @setid yet. Giving up',[ - '@setid' => $data->info['set_id'], - '@filename' => $data->info['filename'] - ])); + + // First check if we already have the info here, if so do nothing. + if (!$this->store->get('set_' . $data->info['set_id'] . '-' . md5($data->info['filename']))) { + $file = $this->AmiUtilityService->file_get($data->info['filename'], + $data->info['zip_file']); + if ($file) { + $processedAsValuesForKey = $this->strawberryfilepersister + ->generateAsFileStructure( + [$file->id()], + $data->info['file_column'], + $data->info['processed_row'] + ); + $data_to_store['as_data'] = $processedAsValuesForKey; + $data_to_store['file_id'] = $file->id(); + $this->store->set('set_' . $data->info['set_id'] . '-' . md5($data->info['filename']), + $data_to_store); + } + else { + $this->messenger->addWarning($this->t('Sorry, we really tried to process File @filename from Set @setid yet. Giving up', + [ + '@setid' => $data->info['set_id'], + '@filename' => $data->info['filename'] + ])); + } } } } From f629a7ccf6ec673e3c3d7b906f25a92eef633603 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 4 Nov 2021 16:37:53 -0400 Subject: [PATCH 34/42] New options. Force file processing as separate item can be set per SET also reprocessing of cached TECHMD, etc --- src/Form/amiSetEntityProcessForm.php | 29 +++++++++++++++++++ .../QueueWorker/IngestADOQueueWorker.php | 21 ++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 9f2ff99..1f426a5 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -75,6 +75,7 @@ public function getCancelUrl() { * {@inheritdoc} */ public function submitForm(array &$form, FormStateInterface $form_state) { + $manyfiles = $this->configFactory()->get('strawberryfield.filepersister_service_settings')->get('manyfiles') ?? 0; $statuses = $form_state->getValue('status', []); $csv_file_reference = $this->entity->get('source_data')->getValue(); if (isset($csv_file_reference[0]['target_id'])) { @@ -132,6 +133,8 @@ public function submitForm(array &$form, FormStateInterface $form_state) { // These queues have no queue workers. That is intended since they // are always processed by the ami_ingest_ado one manually. $queue_name = 'ami_ingest_ado_set_' . $this->entity->id(); + // Clear the queue in case there is already data there from a failed one. + \Drupal::queue($queue_name)->deleteQueue(); \Drupal::queue($queue_name, TRUE)->createQueue(); // @TODO acquire a Lock that is renewed for each queue item processing // To avoid same batch to be send to processing by different users at @@ -150,6 +153,9 @@ public function submitForm(array &$form, FormStateInterface $form_state) { 'set_url' => $SetURL, 'attempt' => 1, 'queue_name' => $queue_name, + 'force_file_queue' => (bool) $form_state->getValue('force_file_queue', FALSE), + 'force_file_process' => (bool) $form_state->getValue('force_file_process', FALSE), + 'manyfiles' => $manyfiles, ]; $added[] = \Drupal::queue($queue_name) ->createItem($data); @@ -275,6 +281,29 @@ public function buildForm(array $form, FormStateInterface $form_state) { '#required' => FALSE, '#default_value' => !empty($notprocessnow) ? $notprocessnow : FALSE, ]; + $form['force_file_queue'] = [ + '#type' => 'checkbox', + '#title' => $this->t( + 'Force every File attached to an ADO to be processed in its own Queue item.' + ), + '#description' => $this->t( + 'Warning: This may make your ingest slower. Check this to force every file attached to an ADO to be downloaded and characterized as an independent process. This bypasses the Number of files Global setting that would otherwise trigger this behavior.', + ['@url' => Url::fromRoute('strawberryfield.file_persister_settings_form')->toString()] + ), + '#required' => FALSE, + '#default_value' => FALSE, + ]; + $form['force_file_process'] = [ + '#type' => 'checkbox', + '#title' => $this->t( + 'Re download and reprocess every file' + ), + '#description' => $this->t( + 'Check this to force every file attached to an ADO to be downloaded and characterized again, even if on a previous Batch run that data was already generated for reuse. Needed if e.g the URL of a file is the same but the remote source changed.' + ), + '#required' => FALSE, + '#default_value' => FALSE, + ]; } return $form + parent::buildForm($form, $form_state); } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index 9bdcded..c80138e 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -167,6 +167,9 @@ public function processItem($data) { 'zip_file' => File ID of a zip file if a any 'waiting_for_files' => will only exist and TRUE if we re-enqueued this ADO after figuring out we had too many Files. 'queue_name' => because well ... we use Hydroponics too + 'force_file_queue' => defaults to false, will always treat files as separate queue items. + 'force_file_process' => defaults to false, will force all techmd and file fetching to happen from scratch instead of using cached versions. + 'manyfiles' => Number of files (passed by \Drupal\ami\Form\amiSetEntityProcessForm::submitForm) that will trigger queue processing for files ]; */ /* Data info for a File has this structure @@ -180,6 +183,8 @@ public function processItem($data) { 'zip_file' => File ID of a zip file if a any, 'processed_row' => Full metadata of the ADO holding the file processed and ready as an array 'queue_name' => because well ... we use Hydroponics too + 'force_file_process' => defaults to false, will force all techmd and file fetching to happen from scratch instead of using cached versions. + 'reduced' => If reduced EXIF or not should be generated ]; */ @@ -333,8 +338,8 @@ public function processItem($data) { $processed_metadata[$file_column] = []; // Now the hard part. Do we have too many files? - $file_limit = 10; - if ((count($filenames) + $processed_files > $file_limit) && empty($data->info['waiting_for_files'])) { + $file_limit = $data->info['manyfiles'] ?? 0; + if (($data->info['force_file_queue'] ?? FALSE) || (($file_limit != 0) && (count($filenames) + $processed_files > $file_limit) && empty($data->info['waiting_for_files']))) { // We will add future files to the queue... // accumulating all the ones we need // and at the end @@ -351,6 +356,7 @@ public function processItem($data) { } else { if ($process_files_via_queue) { + $reduced = (count($filenames) + $processed_files >= $file_limit) && ($file_limit != 0); $data_file = new \stdClass(); $data_file->info = [ 'zip_file' => $data->info['zip_file'], @@ -361,7 +367,9 @@ public function processItem($data) { 'filename' => $filename, 'attempt' => 1, 'queue_name' => $data->info['queue_name'], - 'uuid' => $data->info['row']['uuid'] + 'uuid' => $data->info['row']['uuid'], + 'force_file_process' => $data->info['force_file_process'], + 'reduced' => $reduced, ]; \Drupal::queue($data->info['queue_name']) ->createItem($data_file); @@ -665,15 +673,18 @@ protected function patchJson(array $original, array $new) { protected function processFile($data) { // First check if we already have the info here, if so do nothing. - if (!$this->store->get('set_' . $data->info['set_id'] . '-' . md5($data->info['filename']))) { + if ($data->info['force_file_process'] ?? FALSE || (!$this->store->get('set_' . $data->info['set_id'] . '-' . md5($data->info['filename'])))) { $file = $this->AmiUtilityService->file_get($data->info['filename'], $data->info['zip_file']); if ($file) { + $reduced = $data->info['reduced'] ?? FALSE; $processedAsValuesForKey = $this->strawberryfilepersister ->generateAsFileStructure( [$file->id()], $data->info['file_column'], - $data->info['processed_row'] + $data->info['processed_row'], + FALSE, + $reduced ); $data_to_store['as_data'] = $processedAsValuesForKey; $data_to_store['file_id'] = $file->id(); From 054c668a39a33c26339c165a0f11373688d7f0b5 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 10 Nov 2021 17:23:51 -0500 Subject: [PATCH 35/42] Enforces revisions? @dmer @patdunlavey can you check this code please (as you know release process so as soon as you can). I wonder if you removed moderation workflows which enables by default revisions or if you have not set revisions at the bundle level?) But this will force that. --- src/Plugin/QueueWorker/IngestADOQueueWorker.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index c80138e..f66511b 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -604,6 +604,13 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { // Applies to Patch/Update. $field->setValue($jsonstring); } + if ($node->getEntityType()->isRevisionable()) { + // Forces a New Revision for Not-create Operations. + $node->setNewRevision(TRUE); + // Set data for the revision + $node->setRevisionLogMessage('ADO modified via AMI Set ' . $data->info['set_id']); + $node->setRevisionUserId($data->info['uid']); + } } // In case $status was not moderated. if ($status) { From e62e6ce320df9a6afadb538eb9c839dcf0c9130e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 11 Nov 2021 18:30:04 -0500 Subject: [PATCH 36/42] AMI version of Entity Preview for Format Strawberryfield Works. Looks good. I'm good. A good person right @alliomeria and @aksm ? Revisit CSV offsets once we are on PHP 8.1 I found a PHP BUG!! --- ami.module | 106 ++++++ ami.routing.yml | 16 +- src/Controller/AmiRowAutocompleteHandler.php | 348 ++++++++++++++++++ src/Form/amiSetEntityReconcileCleanUpForm.php | 6 + 4 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 src/Controller/AmiRowAutocompleteHandler.php diff --git a/ami.module b/ami.module index 17be731..c2f228c 100644 --- a/ami.module +++ b/ami.module @@ -3,7 +3,14 @@ * @file * Contains ami.module. */ + +use Drupal\Core\Language\Language; use Drupal\Core\Routing\RouteMatchInterface; +use Drupal\Core\Form\FormStateInterface; +use Drupal\Core\Url; +use Drupal\format_strawberryfield\Event\FormatStrawberryfieldFormAlterEvent; +use Drupal\format_strawberryfield\FormatStrawberryfieldEventType; + /** * Implements hook_help(). */ @@ -24,3 +31,102 @@ function ami_help($route_name, RouteMatchInterface $route_match) { } } } + +function ami_form_metadatadisplay_entity_edit_form_alter(&$form,FormStateInterface $form_state, $form_id) { + + // Add our AMI options here + $form['preview']['entity_type'] = [ + '#type' => 'radios', + '#weight' => -10, + '#title' => t('Entity type'), + '#description' => t('The Entity Type you want to preview.'), + '#options' => [ + 'ado' => t('Archipelago Digital Objects'), + 'ami' => t('AMI Sets'), + ], + '#default_value' => $form_state->getValue('entity_type', NULL) ?? 'ado' + ]; + + + $form['preview']['ado_context_preview']['#states'] = [ + 'visible' => [ + ':input[name="entity_type"]' => ['value' => 'ado'], + ], + ]; + + $form['preview']['ado_amiset_preview'] = [ + '#type' => 'entity_autocomplete', + '#weight' => -9, + '#title' => t('Ami Set to preview'), + '#description' => t('The AMI Set to be used to preview the data.'), + '#target_type' => 'ami_set_entity', + '#maxlength' => 1024, + '#ajax' => [ + 'callback' => '\Drupal\ami\Controller\AmiRowAutocompleteHandler::rowAjaxCallback', + 'event' => 'autocompleteclose change', + ], + '#states' => [ + 'visible' => [':input[name="entity_type"]' => ['value' => 'ami']], + ], + ]; + + $form['preview']['ado_amiset_row_context_preview'] = [ + '#type' => 'textfield', + '#weight' => -8, + '#title' => t('Row to preview'), + '#states' => [ + 'visible' => [ + 'input[name="entity_type"]' => ['value' => 'ami'], + 'input[name="ado_amiset_preview"' => ['filled' => true], + ], + ], + ]; + $ami_set = $form_state->getValue('ado_amiset_preview', NULL); + if (is_scalar($ami_set)) { + $form['preview']['ado_amiset_row_context_preview']['#autocomplete_route_name'] = 'ami.rowsbylabel.autocomplete'; + $form['preview']['ado_amiset_row_context_preview']['#autocomplete_route_parameters'] = [ + 'ami_set_entity' => $ami_set + ]; + } + + $form['preview']['button_preview'][ + '#states'] = [ + 'visible' => [ + 'input[name="ado_context_preview"' => ['filled' => true], + 'input[name="entity_type"]' => ['value' => 'ado'], + ], + ]; + + $form['preview']['button_preview_amiset'] = [ + '#type' => 'button', + '#op' => 'preview', + '#weight' => -7, + '#value' => t('Show preview for AMI Set'), + '#ajax' => [ + 'callback' => '\Drupal\ami\Controller\AmiRowAutocompleteHandler::ajaxPreviewAmiSet', + ], + '#states' => [ + 'visible' => [ + 'input[name="ado_amiset_preview"' => ['filled' => true], + 'input[name="entity_type"]' => ['value' => 'ami'] + ], + ], + ]; + + $form['preview']['render_native'] = [ + '#type' => 'checkbox', + '#weight' => 10, + '#defaut_value' => FALSE, + '#title' => 'Show Preview using native Output Format (e.g HTML)', + '#states' => [ + 'visible' => [ + ['input[name="ado_context_preview"' => ['filled' => true]], + 'or', + ['input[name="ado_amiset_preview"' => ['filled' => true], + 'input[name="ado_amiset_row_context_preview"' => ['filled' => true]] + ], + ], + ]; + + return $form; +} \ No newline at end of file diff --git a/ami.routing.yml b/ami.routing.yml index 11be50e..ea754e7 100644 --- a/ami.routing.yml +++ b/ami.routing.yml @@ -100,4 +100,18 @@ entity.ami_set_entity.reconcileedit_form: _entity_form: ami_set_entity.editreconcile _title: 'Clean Reconciled LoD' requirements: - _entity_access: 'ami_set_entity.process' \ No newline at end of file + _entity_access: 'ami_set_entity.process' + +ami.rowsbylabel.autocomplete: + path: '/admin/amiset/autocomplete/{ami_set_entity}/rowsbylabel' + options: + parameters: + ami_set_entity: + type: 'entity:ami_set_entity' + defaults: + _controller: '\Drupal\ami\Controller\AmiRowAutocompleteHandler::handleAutocomplete' + _format: json + requirements: + _entity_access: 'ami_set_entity.view' + _permission: 'access content' + _csrf_token: 'TRUE' \ No newline at end of file diff --git a/src/Controller/AmiRowAutocompleteHandler.php b/src/Controller/AmiRowAutocompleteHandler.php new file mode 100644 index 0000000..3cc205b --- /dev/null +++ b/src/Controller/AmiRowAutocompleteHandler.php @@ -0,0 +1,348 @@ +currentUser = $current_user; + $this->entityTypeManager = $entity_type_manager; + $this->AmiUtilityService = $ami_utility; + + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container) { + return new static( + $container->get('current_user'), + $container->get('ami.utility'), + $container->get('entity_type.manager') + ); + } + /** + * Handler for AMI Set CSV autocomplete request. + * + * Filters against Labels + * + */ + public function handleAutocomplete(Request $request, amiSetEntity $ami_set_entity) { + $results = []; + $input = $request->query->get('q'); + + // Get the typed string from the URL, if it exists. + if (!$input) { + return new JsonResponse($results); + } + + + if (!$ami_set_entity) { + return new JsonResponse($results); + } + $input = Xss::filter($input); + $csv_file_reference = $ami_set_entity->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + $data = new \stdClass(); + foreach ($ami_set_entity->get('set') as $item) { + /** @var \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem $item */ + $data = $item->provideDecoded(FALSE); + } + $label_column = $data->adomapping->base->label ?? 'label'; + $uuid_column = $data->adomapping->uuid->uuid ?? 'node_uuid'; + $file_data_all = $this->AmiUtilityService->csv_read($file, 0, 0, TRUE); + $column_keys = $file_data_all['headers'] ?? []; + $label_original_index = array_search($label_column, $column_keys); + $uuid_original_index = array_search($uuid_column, $column_keys); + $i = 0; + if ($label_original_index !== FALSE) { + foreach ($file_data_all['data'] as $id => &$row) { + if (isset($row[$label_original_index]) && stripos($row[$label_original_index], $input) === 0) { + $i++; + $label = [ + $row[$label_original_index], + '(' . $id . ')', + $row[$uuid_original_index] ?? 'NO UUID Assigned', + ]; + + $results[] = [ + 'value' => $id, + 'label' => implode(' ', $label), + ]; + if ($i == 10) { + break; + } + } + } + } + } + + return new JsonResponse($results); + } + + /** + * AJAX callback. + */ + public static function ajaxPreviewAmiSet($form, FormStateInterface $form_state) { + $response = new AjaxResponse(); + + /** @var \Drupal\format_strawberryfield\MetadataDisplayInterface $entity */ + $entity = $form_state->getFormObject()->getEntity(); + + // Attach the library necessary for using the OpenOffCanvasDialogCommand and + // set the attachments for this Ajax response. + $form['#attached']['library'][] = 'core/drupal.dialog.off_canvas'; + $form['#attached']['library'][] = 'codemirror_editor/editor'; + $response->setAttachments($form['#attached']); + $row = $form_state->getValues()['ado_amiset_row_context_preview'] ?? 1; + $row = (int) $row; + if (!empty($form_state->getValues()['ado_amiset_preview'])) { + $form_state->setValue('ado_amiset_row_context_preview', $row); + $id = $form_state->getValues()['ado_amiset_preview'] ?? NULL; + $id = $id ?? EntityAutocomplete::extractEntityIdFromAutocompleteInput($form_state->getUserInput()['ado_amiset_preview']); + $form_state->setValue('ado_amiset_preview', $id); + /** @var \Drupal\node\NodeInterface $preview_node */ + $preview_ami_set = \Drupal::entityTypeManager() + ->getStorage('ami_set_entity') + ->load($id); + if (empty($preview_ami_set)) { + return $response; + } + // Now get the row, if not passed we will get the first because we are + // weird. + error_log(PHP_VERSION_ID); + $csv_file_reference = $preview_ami_set->get('source_data')->getValue(); + if (isset($csv_file_reference[0]['target_id'])) { + /** @var \Drupal\file\Entity\File $file */ + $file = \Drupal::entityTypeManager()->getStorage('file')->load( + $csv_file_reference[0]['target_id'] + ); + if (PHP_VERSION_ID < 80000) { + //@TODO fgetcsv has a bug when called after a seek, offsets on 1 always. + // We are trying to skip the header too (but get it) + $row = $row - 2; + } + $file_data_all = \Drupal::service('ami.utility') + ->csv_read($file, $row, 1, TRUE); + $jsondata = array_combine($file_data_all['headers'], reset($file_data_all['data'])); + $jsondata = \Drupal::service('ami.utility')->expandJson($jsondata); + // Check if render native is requested and get mimetype + $mimetype = $form_state->getValue('mimetype'); + $mimetype = !empty($mimetype) ? $mimetype[0]['value'] : 'text/html'; + $show_render_native = $form_state->getValue('render_native'); + + // Set initial context. + $context = [ + 'node' => NULL, + 'iiif_server' => \Drupal::service('config.factory') + ->get('format_strawberryfield.iiif_settings') + ->get('pub_server_url'), + ]; + + $context['data'] = $jsondata; + + $output = []; + $output['json'] = [ + '#type' => 'details', + '#title' => t('JSON Data'), + '#open' => FALSE, + ]; + $output['json']['data'] = [ + '#type' => 'codemirror', + '#rows' => 60, + '#value' => json_encode($context['data'], JSON_PRETTY_PRINT), + '#codemirror' => [ + 'lineNumbers' => FALSE, + 'toolbar' => FALSE, + 'readOnly' => TRUE, + 'mode' => 'application/json', + ], + ]; + + // Try to Ensure we're using the twig from user's input instead of the entity's + // default. + try { + $input = $form_state->getUserInput(); + $entity->set('twig', $input['twig'][0], FALSE); + $render = $entity->renderNative($context); + if ($show_render_native) { + $message = ''; + switch ($mimetype) { + case 'application/ld+json': + case 'application/json': + json_decode((string) $render); + if (JSON_ERROR_NONE !== json_last_error()) { + throw new \Exception( + 'Error parsing JSON: ' . json_last_error_msg(), + 0, + NULL + ); + } + break; + case 'text/html': + libxml_use_internal_errors(TRUE); + $dom = new \DOMDocument('1.0', 'UTF-8'); + if ($dom->loadHTML((string) $render)) { + if ($error = libxml_get_last_error()) { + libxml_clear_errors(); + $message = $error->message; + } + break; + } + else { + throw new \Exception( + 'Error parsing HTML', + 0, + NULL + ); + } + case 'application/xml': + libxml_use_internal_errors(TRUE); + try { + libxml_clear_errors(); + $dom = new \SimpleXMLElement((string) $render); + if ($error = libxml_get_last_error()) { + $message = $error->message; + } + } catch (\Exception $e) { + throw new \Exception( + "Error parsing XML: {$e->getMessage()}", + 0, + NULL + ); + } + break; + } + } + if (!$show_render_native || ($show_render_native && $mimetype != 'text/html')) { + $output['preview'] = [ + '#type' => 'codemirror', + '#rows' => 60, + '#value' => $render, + '#codemirror' => [ + 'lineNumbers' => FALSE, + 'toolbar' => FALSE, + 'readOnly' => TRUE, + 'mode' => $mimetype, + ], + ]; + } + else { + $output['preview'] = [ + '#type' => 'details', + '#open' => TRUE, + '#title' => 'HTML Output', + 'messages' => [ + '#markup' => $message, + '#attributes' => [ + 'class' => ['error'], + ], + ], + 'render' => [ + '#markup' => $render, + ], + ]; + } + } catch (\Exception $exception) { + // Make the Message easier to read for the end user + if ($exception instanceof TwigError) { + $message = $exception->getRawMessage() . ' at line ' . $exception->getTemplateLine(); + } + else { + $message = $exception->getMessage(); + } + + $output['preview'] = [ + '#type' => 'details', + '#open' => TRUE, + '#title' => t('Syntax error'), + 'error' => [ + '#markup' => $message, + ] + ]; + } + $response->addCommand(new OpenOffCanvasDialogCommand(t('Preview'), + $output, ['width' => '50%'])); + } + } + // Always refresh the Preview Element too. + $form['preview']['#open'] = TRUE; + $response->addCommand(new ReplaceCommand('#metadata-preview-container', $form['preview'])); + \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_STATUS); + if ($form_state->getErrors()) { + // Clear errors so the user does not get confused when reloading. + \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_ERROR); + $form_state->clearErrors(); + } + $form_state->setRebuild(TRUE); + return $response; + } + + /** + * AJAX callback. + */ + public static function rowAjaxCallback($form, FormStateInterface $form_state) { + $response = new AjaxResponse(); + $id = $form_state->getValues()['ado_amiset_preview'] ?? NULL; + $id = $id ?? EntityAutocomplete::extractEntityIdFromAutocompleteInput($form_state->getUserInput()['ado_amiset_preview']); + $form['preview']['#open'] = TRUE; + if ($id) { + $form['preview']['ado_amiset_row_context_preview']['#autocomplete_route_parameters'] = ['ami_set_entity' => $id]; + } + //$form_state->getUserInput()['ado_amiset_preview'] == name (id) + //$form_state->getValues()['ado_amiset_preview'] == id + \Drupal::messenger()->deleteByType(MessengerInterface::TYPE_STATUS); + $response->addCommand(new ReplaceCommand('#metadata-preview-container', $form['preview'])); + return $response; + } + +} \ No newline at end of file diff --git a/src/Form/amiSetEntityReconcileCleanUpForm.php b/src/Form/amiSetEntityReconcileCleanUpForm.php index 7c1c953..1e2b18e 100644 --- a/src/Form/amiSetEntityReconcileCleanUpForm.php +++ b/src/Form/amiSetEntityReconcileCleanUpForm.php @@ -175,6 +175,12 @@ public function buildForm(array $form, FormStateInterface $form_state) { $pager = \Drupal::service('pager.manager')->createPager($total_rows, $num_per_page); $page = $pager->getCurrentPage(); $offset = $num_per_page * $page; + if (PHP_VERSION_ID > 80000) { + // @TODO fgetcsv has a bug when called after a seek, offsets on 1 always. + // We are trying to skip the header too (but get it) + $offset = $offset + 2; + // @TODO CHECK IF THIS WILL WORK ON PHP 8.x when we get there. + } $file_data_all = $this->AmiUtilityService->csv_read($file_lod, $offset, $num_per_page); $column_keys = $file_data_all['headers'] ?? []; From 7faea54ecbfe2dbaa49bf9bccb237001eab7d273 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 15 Nov 2021 14:50:32 -0500 Subject: [PATCH 37/42] In case the key is not present or the set is "type" less default to thing --- src/AmiUtilityService.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 635e0a5..8bf9ffc 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -1457,7 +1457,7 @@ public function preprocessAmiSet(File $file, \stdClass $data, array &$invalid = // Each row will be an object. $ado = []; $ado['type'] = trim( - $row[$data->mapping->type_key] + $row[$data->mapping->type_key] ?? 'Thing' ); // Lets start by grouping by parents, namespaces and generate uuids // namespaces are inherited, so we just need to find collection From 7edc6f8615aec63e8df13a8d3d4cd06c2eae4c47 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 16 Nov 2021 13:52:49 -0500 Subject: [PATCH 38/42] Fix remote CSV load Need to check now CSV write! --- src/Plugin/ImporterAdapter/SpreadsheetImporter.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Plugin/ImporterAdapter/SpreadsheetImporter.php b/src/Plugin/ImporterAdapter/SpreadsheetImporter.php index 8a71656..752d9e4 100644 --- a/src/Plugin/ImporterAdapter/SpreadsheetImporter.php +++ b/src/Plugin/ImporterAdapter/SpreadsheetImporter.php @@ -93,7 +93,7 @@ public function getData(array $config, $page = 0, $per_page = 20): array { /* @var File $file */ $file = $this->entityTypeManager->getStorage('file') ->load($config['file'][0]); - $file_path = $this->streamWrapperManager->getViaUri($file->getFileUri())->realpath(); + $file_path = $this->streamWrapperManager->getViaUri($file->getFileUri())->getUri(); $offset = $page * $per_page; $tabdata = ['headers' => [], 'data' => [], 'totalrows' => 0]; @@ -103,7 +103,7 @@ public function getData(array $config, $page = 0, $per_page = 20): array { $objReader = IOFactory::createReader($inputFileType); $objReader->setReadDataOnly(TRUE); $objPHPExcel = $objReader->load($file_path); - } catch (Exception $e) { + } catch (\Exception $e) { $this->messenger()->addMessage( t( 'Could not parse file with error: @error', From 88ba765ebc1fa1dc45774c914dc15af2d4b8f9fd Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 16 Nov 2021 15:47:58 -0500 Subject: [PATCH 39/42] Allows CSV to work from S3 too --- src/AmiUtilityService.php | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 8bf9ffc..37941ea 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -809,8 +809,12 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { */ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', bool $append_header = TRUE) { - $realpath = $this->fileSystem->realpath($file->getFileUri()); - $fh = new \SplFileObject($realpath, 'a'); + $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); + if (!$wrapper) { + return NULL; + } + $url = $wrapper->getUri(); + $fh = new \SplFileObject($url, 'a'); if (!$fh) { $this->messenger()->addError( $this->t('Error reading the CSV file!.') @@ -852,7 +856,7 @@ public function csv_append(array $data, File $file, $uuid_key = 'node_uuid', boo $fh->fputcsv($row); } // PHP Bug! This should happen automatically - clearstatcache(TRUE, $realpath); + clearstatcache(TRUE, $url); $size = $fh->getSize(); // This is how you close a \SplFileObject $fh = NULL; @@ -885,7 +889,7 @@ public function csv_read(File $file, int $offset = 0, int $count = 0, bool $alwa return NULL; } - $url = $wrapper->realpath(); + $url = $wrapper->getUri(); $spl = new \SplFileObject($url, 'r'); if ($offset > 0) { // We only set this flags when an offset is present. @@ -984,7 +988,7 @@ public function csv_clean(File $file, array $headerwithdata = []) { if (!$wrapper) { return NULL; } - $url = $wrapper->realpath(); + $url = $wrapper->getUri(); // New temp file for the output $path = 'public://ami/csv'; $filenametemp = $this->currentUser->id() . '-' . uniqid() . '_clean.csv'; @@ -1051,7 +1055,7 @@ public function csv_count(File $file) { return NULL; } - $url = $wrapper->realpath(); + $url = $wrapper->getUri(); $spl = new \SplFileObject($url, 'r'); $spl->setFlags( SplFileObject::READ_CSV | From 2166ce4ba8717ffb6d956846a8a1ea4635e5130b Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 16 Nov 2021 15:53:55 -0500 Subject: [PATCH 40/42] Another one. ZIP loading from S3 still needed --- src/AmiUtilityService.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index 37941ea..b5cb753 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -729,8 +729,12 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { ); return NULL; } - $realpath = $this->fileSystem->realpath($file->getFileUri()); - $fh = new SplFileObject($realpath, 'w'); + $wrapper = $this->streamWrapperManager->getViaUri($file->getFileUri()); + if (!$wrapper) { + return NULL; + } + $url = $wrapper->getUri(); + $fh = new SplFileObject($url, 'w'); if (!$fh) { $this->messenger()->addError( $this->t('Error reading back the just written file!.') @@ -767,7 +771,7 @@ public function csv_save(array $data, $uuid_key = 'node_uuid') { $fh->fputcsv($row); } // PHP Bug! This should happen automatically - clearstatcache(TRUE, $realpath); + clearstatcache(TRUE, $url); $size = $fh->getSize(); // This is how you close a \SplFileObject $fh = NULL; From d800b3e1957bf6a77c662da799c9edce88086c87 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 16 Nov 2021 21:47:53 -0500 Subject: [PATCH 41/42] C'mon ZipArchive WHY! You can not stream from remote I have been willing to write the first PHP remote streamer library from forever and i never have the time. So for now, for the people that decide to put ALLL on S3 we have to download the file.. no other option. So we inject the StrawberryfieldFileMetadataService to reuse the ::ensureFileAvailability function. But we can not delete the file here since we may need it for the rest of our times. --- ami.services.yml | 2 +- src/AmiUtilityService.php | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/ami.services.yml b/ami.services.yml index a77e291..1dd7925 100644 --- a/ami.services.yml +++ b/ami.services.yml @@ -5,7 +5,7 @@ services: arguments: ['@entity_type.manager'] ami.utility: class: Drupal\ami\AmiUtilityService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client', '@ami.lod', '@keyvalue'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory', '@current_user', '@language_manager', '@transliteration', '@module_handler', '@logger.factory', '@strawberryfield.utility', '@entity_field.manager', '@entity_type.bundle.info', '@http_client', '@ami.lod', '@keyvalue', '@strawberryfield.file_metadata_extractor'] tags: - { name: backend_overridable } ami.lod: diff --git a/src/AmiUtilityService.php b/src/AmiUtilityService.php index b5cb753..97e7bb1 100644 --- a/src/AmiUtilityService.php +++ b/src/AmiUtilityService.php @@ -27,6 +27,7 @@ use Drupal\Core\StringTranslation\StringTranslationTrait; use Drupal\file\Entity\File; use Drupal\file\FileUsage\FileUsageInterface; +use Drupal\strawberryfield\StrawberryfieldFileMetadataService; use Drupal\strawberryfield\Tools\StrawberryfieldJsonHelper; use GuzzleHttp\ClientInterface; use Drupal\strawberryfield\StrawberryfieldUtilityService; @@ -163,6 +164,13 @@ class AmiUtilityService { */ protected $AmiLoDService; + /** + * The Strawberry Field File Metadata Service. + * + * @var \Drupal\strawberryfield\StrawberryfieldFileMetadataService + */ + protected $strawberryfieldFileMetadataService; + /** * StrawberryfieldFilePersisterService constructor. * @@ -182,6 +190,7 @@ class AmiUtilityService { * @param \Drupal\Core\Entity\EntityTypeBundleInfoInterface $entity_type_bundle_info * @param \GuzzleHttp\ClientInterface $http_client * @param \Drupal\Core\KeyValueStore\KeyValueFactoryInterface $key_value + * @param \Drupal\strawberryfield\StrawberryfieldFileMetadataService $strawberryfield_file_metadata_service */ public function __construct( FileSystemInterface $file_system, @@ -200,7 +209,8 @@ public function __construct( EntityTypeBundleInfoInterface $entity_type_bundle_info, ClientInterface $http_client, AmiLoDService $ami_lod, - KeyValueFactoryInterface $key_value + KeyValueFactoryInterface $key_value, + StrawberryfieldFileMetadataService $strawberryfield_file_metadata_service ) { $this->fileSystem = $file_system; $this->fileUsage = $file_usage; @@ -225,6 +235,7 @@ public function __construct( $this->httpClient = $http_client; $this->AmiLoDService = $ami_lod; $this->keyValue = $key_value; + $this->strawberryfieldFileMetadataService = $strawberryfield_file_metadata_service; } @@ -562,6 +573,13 @@ public function retrieve_fromzip_file($uri, $destination = NULL, $replace = File try { $realpath = $this->fileSystem->realpath($path); $zip_realpath = $this->fileSystem->realpath($zip_file->getFileUri()); + // Means Mr. Zip is in S3 or who knows where + // And ZipArchive (Why!!) can not stream from remote + // @TODO write once for all a remote ZIP file streamer DIEGO + if (!$zip_realpath) { + // This will add a delay once... + $zip_realpath = $this->strawberryfieldFileMetadataService->ensureFileAvailability($zip_file, NULL); + } $z = new \ZipArchive(); $contents = NULL; if ($z->open($zip_realpath)) { From 2b92b72431a918ed55fe60f5ccbd132528f71999 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 17 Nov 2021 19:16:54 -0500 Subject: [PATCH 42/42] Webform based Search and Replace is working! But.. VBO actions is not respecting the Facet selection when using the "Select all" option... We should make sure that is clear? Documentation? Eventually patch VBO or, alter the form and remove Select All completely? --- src/Form/amiSetEntityProcessForm.php | 2 +- .../Action/AmiStrawberryfieldJsonAsText.php | 8 + .../AmiStrawberryfieldJsonAsWebform.php | 211 +++++++++++------- .../QueueWorker/IngestADOQueueWorker.php | 1 + 4 files changed, 137 insertions(+), 85 deletions(-) diff --git a/src/Form/amiSetEntityProcessForm.php b/src/Form/amiSetEntityProcessForm.php index 1f426a5..d5e492b 100644 --- a/src/Form/amiSetEntityProcessForm.php +++ b/src/Form/amiSetEntityProcessForm.php @@ -319,7 +319,7 @@ public function submitBatch(FormStateInterface $form_state, $queue_name) { 'title' => $this->t('Batch processing your Set'), 'operations' => [], 'finished' => ['\Drupal\ami\AmiBatchQueue', 'finish'], - 'progress_message' => t('Processing Set @current of @total.'), + 'progress_message' => t('Processing Set @current of @total. Estimated time left: @estimate, elapsed: @elapsed.'), ]; $batch['operations'][] = [ '\Drupal\ami\AmiBatchQueue::takeOne', diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php index 94f5e77..0be776f 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsText.php @@ -209,6 +209,14 @@ public function execute($entity = NULL) { ]); if (!$this->configuration['simulate']) { + if ($entity->getEntityType()->isRevisionable()) { + // Forces a New Revision for Not-create Operations. + $entity->setNewRevision(TRUE); + $entity->setRevisionCreationTime(\Drupal::time()->getRequestTime()); + // Set data for the revision + $entity->setRevisionLogMessage('ADO modified via Json as Text Search And Replace with search token:' . $this->configuration['jsonfind'] .' and replace token:' .$this->configuration['jsonreplace']); + $entity->setRevisionUserId($this->currentUser->id()); + } $entity->save(); } } diff --git a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php index 580d12c..6740c23 100644 --- a/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php +++ b/src/Plugin/Action/AmiStrawberryfieldJsonAsWebform.php @@ -23,7 +23,7 @@ * * @Action( * id = "entity:ami_jsonwebform_action", - * action_label = @Translation("Webform based find and replace Metadata for Archipelago Digital Objects"), + * action_label = @Translation("Webform find-and-replace Metadata for Archipelago Digital Objects"), * category = @Translation("AMI Metadata"), * deriver = "Drupal\ami\Plugin\Action\Derivative\EntitySbfActionDeriver", * type = "node", @@ -57,6 +57,7 @@ public function buildPreConfigurationForm(array $element, array $values, FormSta } public function buildConfigurationForm(array $form, FormStateInterface $form_state) { + $form_state->setAlwaysProcess(TRUE); $webform = $this->AmiUtilityService->getWebforms(); $form_state->disableCache(); $form['#tree'] = TRUE; @@ -91,14 +92,25 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta foreach($form_state->getStorage() as $prop => $value) { $form_state->set($prop, $value); } - if (!empty($form_state->getValues()) && !empty($form_state->getValue('webform'))) { + $webform_id = $form_state->getValue('webform'); + if (!$webform_id) { + $input = $form_state->getUserInput(); + $webform_id = $input['webform'] ?? NULL; + } + + if ($webform_id) { /* @var \Drupal\webform\Entity\Webform $webform_entity */ - $webform_entity = $this->entityTypeManager->getStorage('webform')->load($form_state->getValue('webform')); - $anyelement = $webform_entity->getElementsInitializedAndFlattened(); + $webform_entity = $this->entityTypeManager->getStorage('webform')->load($webform_id); + $anyelement = $webform_entity->getElementsInitializedAndFlattened('update'); foreach ($anyelement as $elementkey => $element) { $element_plugin = $this->webformElementManager->getElementInstance($element); - if (($element_plugin->getTypeName() != 'webform_wizard_page') && !($element_plugin instanceof WebformManagedFileBase) && $element_plugin->isInput($element)) { + if (($element_plugin->getTypeName() != 'webform_wizard_page') && + !($element_plugin instanceof WebformManagedFileBase) && + ($element_plugin->getTypeName() != 'webform_metadata_nominatim') && + ($element_plugin->getTypeName() != 'webform_metadata_multiagent') && + ($element_plugin->getTypeName() != 'webform_metadata_panoramatour') && + $element_plugin->isInput($element)) { $webform_element_options[$elementkey] = ($element['#title'] ?? ' Unamed ') . $this->t('(@elementkey JSON key )',[ '@elementkey' => $elementkey ]); @@ -120,9 +132,14 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta ), ]; $chosen_element = $form_state->getValue(['webform_elements','elements_for_this_form'], NULL); + if (!$chosen_element) { + $input = $form_state->getUserInput(); + $chosen_element = $input['webform_elements']['elements_for_this_form'] ?? NULL; + } + if ($webform_entity && $chosen_element) { - //$myelement1 = $webform_entity->getElementsDecodedAndFlattened(); - $myelement = $webform_entity->getElementDecoded($form_state->getValue(['webform_elements','elements_for_this_form'])); + $myelement = $webform_entity->getElementDecoded($chosen_element); + //$myelement2 = \Drupal::service('plugin.manager.webform.element')->processElements($myelement); $libraries = $webform_entity->getSubmissionForm()['#attached']['library'] ?? []; $form['#attached']['library'] = ($form['#attached']['library'] ?? []) + $libraries; @@ -132,34 +149,26 @@ public function buildConfigurationForm(array $form, FormStateInterface $form_sta $cleanelement[$key] = $value; } } - //$cleanelement['#element_validate'][] = [$this,'elementDynamicValidate']; $cleanelement['#required'] = FALSE; $cleanelement['#validated'] = FALSE; - //$cleanelement['#default_value'] = NULL; - $form['elements_rendered']['jsonfind_element']= $cleanelement; - $form['elements_rendered']['jsonfind_element']['#title'] = $this->t('Value to Search for in @elementkey JSON key', [ '@elementkey' => $chosen_element]); - $form['elements_rendered']['jsonreplace_element']= $cleanelement; - $form['elements_rendered']['jsonreplace_element']['#title'] = $this->t('Value to replace with in @elementkey JSON key', [ '@elementkey' => $chosen_element]); + $cleanelement['#default_value'] = $form_state->getValue('jsonfind_element', NULL); + $form['jsonfind_element']= $cleanelement; + + $form['jsonfind_element']['#title'] = $this->t('Value to Search for in @elementkey JSON key', [ '@elementkey' => $chosen_element]); + $form['jsonreplace_element']['#title'] = $this->t('Value to replace with in @elementkey JSON key', [ '@elementkey' => $chosen_element]); + $form['jsonreplace_element']= $cleanelement; + $form['jsonreplace_element']['#name'] = 'jsonreplace_element'; + $form['jsonfind_element']['#name'] = 'jsonfind_element'; + $form['jsonfind_element']['#default_value'] = $form_state->getValue('jsonreplace_element', NULL); } + $form['simulate'] = [ '#title' => $this->t('only simulate and debug affected JSON'), '#type' => 'checkbox', '#default_value' => ($this->configuration['simulate'] === FALSE) ? FALSE : TRUE, ]; - $form['actions']['submit']['#ajax'] = [ - 'callback' => 'configureActionAjaxCallback', - ]; - return $form; - } - - public function elementDynamicValidate(&$element, FormStateInterface $form_state) { - $form_state->set('holi','chao'); - } - - public function configureActionAjaxCallback(array $form, FormStateInterface $form_state) { - //$form_state->setRebuild(TRUE); return $form; } @@ -167,17 +176,23 @@ public function webformAjaxCallback(array $form, FormStateInterface $form_state) return $form['webform_elements']; } - public function webformElementAjaxCallback(array $form, FormStateInterface $form_state) { - return $form['elements_rendered']; + $element['elements_rendered'] = [ + '#tree' => TRUE, + '#type' => 'fieldset', + '#prefix' => '
    ', + '#suffix' => '
    ', + ]; + $element['elements_rendered']['jsonfind_element'] = $form['jsonfind_element']; + $element['elements_rendered']['jsonreplace_element'] = $form['jsonreplace_element']; + return $element; } public function submitConfigurationForm(array &$form, FormStateInterface $form_state) { // Hacky but its the way we can do this dynamically - $jsonfind = $form_state->getUserInput()['elements_rendered']['jsonfind_element'] ?? []; - $jsonreplace = $form_state->getUserInput()['elements_rendered']['jsonreplace_element'] ?? []; + $jsonfind = $form_state->getValue('jsonfind_element', NULL) ?? ($form_state->getUserInput()['jsonfind_element'] ?? []); + $jsonreplace = $form_state->getValue('jsonreplace_element', NULL) ?? ($form_state->getUserInput()['jsonreplace_element'] ?? []); $chosen_element = $form_state->getValue(['webform_elements','elements_for_this_form'], NULL); - // $form_state->setRebuild(TRUE); if ($chosen_element) { $jsonfind_ready[$chosen_element] = $jsonfind; $jsonreplace_ready[$chosen_element] = $jsonreplace; @@ -185,6 +200,9 @@ public function submitConfigurationForm(array &$form, FormStateInterface $form_s $this->configuration['jsonreplace'] = json_encode($jsonreplace_ready) ?? '{}'; $this->configuration['simulate'] = $form_state->getValue('simulate'); } + else { + $form_state->setRebuild(TRUE); + } } /** @@ -209,6 +227,7 @@ public function execute($entity = NULL) { /** @var $itemfield \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem */ $main_prop = $itemfield->mainPropertyName(); $fullvaluesoriginal = $itemfield->provideDecoded(TRUE); + $fullvaluesmodified = $fullvaluesoriginal; $count = 0; $fullvaluesjson = []; // This is how it goes. @@ -221,41 +240,43 @@ public function execute($entity = NULL) { // - If jsonreplace is empty, we delete the original // - If not we replace the found one $decoded_jsonfind = json_decode($this->configuration['jsonfind'], TRUE); + $decoded_jsonreplace = json_decode($this->configuration['jsonreplace'], TRUE); $key = reset(array_keys($decoded_jsonfind)); if ($key) { - if (!empty($fullvaluesoriginal[$key])) { - $isAssociativeOriginal = StrawberryfieldJsonHelper::arrayIsMultiSimple($fullvaluesoriginal[$key]); - if (!$isAssociative) { - foreach($fullvaluesoriginal[$key] as &$item) { - if ($item == $decoded_jsonfind[$key]) { - // Exact Array to Array 1:1 match - $item = $decoded_jsonfind[$key]; - $patched = TRUE; - } - } + $isAssociativeOriginal = FALSE; + if (!empty($fullvaluesmodified[$key])) { + if (is_array($fullvaluesmodified[$key])) { + $isAssociativeOriginal = StrawberryfieldJsonHelper::arrayIsMultiSimple($fullvaluesmodified[$key]); + } + // If none are arrays we treat them like objects 1:1 comparisson. + if (!is_array($fullvaluesmodified[$key]) && !is_array($decoded_jsonfind[$key])) { + $isAssociativeOriginal = TRUE; } - else { - // Means we have a single Object not a list in the source. - if ($fullvaluesoriginal[$key] == $decoded_jsonfind[$key]) { - $fullvaluesoriginal[$key] = $decoded_jsonfind[$key]; + } + if (!$isAssociativeOriginal) { + foreach($fullvaluesmodified[$key] as &$item) { + if ($item == $decoded_jsonfind[$key]) { + // Exact Array to Array 1:1 match + $item = $decoded_jsonreplace[$key]; $patched = TRUE; } } } + else { + // Means we have a single Object not a list in the source. + if ($fullvaluesmodified[$key] == $decoded_jsonfind[$key]) { + $fullvaluesmodified[$key] = $decoded_jsonreplace[$key]; + $patched = TRUE; + } + } } - - - - - // Now try to decode fullvalues - $fullvaluesjson = json_decode($fullvalues, TRUE, 50); - $json_error = json_last_error(); + $fullvaluesmodified_string = json_encode($fullvaluesmodified); + $fullvaluesoriginal_string = json_encode($fullvaluesoriginal); if ($json_error != JSON_ERROR_NONE) { - $visualjsondiff = new Diff(explode(PHP_EOL,$stringvalues), explode(PHP_EOL,$fullvalues)); + $visualjsondiff = new Diff(explode(PHP_EOL, $fullvaluesmodified_string), explode(PHP_EOL,$fullvaluesoriginal_string)); $formatter = new DiffFormatter(); $output = $formatter->format($visualjsondiff); - //$this->messenger()->addMessage($output); $this->messenger()->addError( $this->t( 'We could not safely find and replace metadata for @entity. Your result after the replacement may not be a valid JSON.', @@ -263,25 +284,28 @@ public function execute($entity = NULL) { '@entity' => $entity->label() ] )); - $this->messenger()->addMessage($output); + $this->messenger()->addError($output); return $patched; } try { if ($this->configuration['simulate']) { $this->messenger()->addMessage('In simulation Mode'); - if ($fullvalues == $stringvalues) { + if ($fullvaluesoriginal_string == $fullvaluesmodified_string) { $patched = FALSE; $this->messenger()->addStatus($this->t( - 'No Match for @entity, so skipping', + 'No Match for search:@jsonsearch and replace:@jsonreplace on @entity, so skipping', [ - '@entity' => $entity->label() + '@entity' => $entity->label(), + '@jsonsearch' => '
    '.$this->configuration['jsonfind'].'
    ', + '@jsonreplace' => '
    '.$this->configuration['jsonreplace'].'
    ', + ] )); return $patched; } $r = new JsonDiff( $fullvaluesoriginal, - $fullvaluesjson, + $fullvaluesmodified, JsonDiff::REARRANGE_ARRAYS + JsonDiff::SKIP_JSON_MERGE_PATCH + JsonDiff::COLLECT_MODIFIED_DIFF ); // We just keep track of the changes. If none! Then we do not set @@ -292,38 +316,41 @@ public function execute($entity = NULL) { ['@label' => $entity->label()]); $this->messenger()->addMessage($message); - /*$modified_diff = $r->getModifiedDiff(); + $modified_diff = $r->getModifiedDiff(); foreach ($modified_diff as $modifiedPathDiff) { $this->messenger()->addMessage($modifiedPathDiff->path); $this->messenger()->addMessage($modifiedPathDiff->original); $this->messenger()->addMessage($modifiedPathDiff->new); - }*/ - - } else { - if ($fullvalues == $stringvalues) { + } + } + else { + if ($fullvaluesoriginal_string == $fullvaluesmodified_string) { $patched = FALSE; $this->messenger()->addStatus($this->t( - 'No change for @entity, so skipping', + 'No change for @entity, skipping.', [ '@entity' => $entity->label() ] )); return $patched; } - $patched = TRUE; - if (!$itemfield->setMainValueFromArray((array) $fullvaluesjson)) { - $this->messenger()->addError( - $this->t( - 'We could not persist the metadata for @entity. Your result after the replacement may not be a valid JSON. Please contact your Site Admin.', - [ - '@entity' => $entity->label() - ] - ) - ); - $patched = FALSE; - }; + + if ($patched) { + if (!$itemfield->setMainValueFromArray((array) $fullvaluesmodified)) { + $this->messenger()->addError( + $this->t( + 'We could not persist the metadata for @entity. Your result after the replacement may not be a valid JSON. Please contact your Site Admin.', + [ + '@entity' => $entity->label() + ] + ) + ); + $patched = FALSE; + }; + } } - } catch (JsonDiffException $exception) { + } + catch (JsonDiffException $exception) { $patched = FALSE; $this->messenger()->addWarning( $this->t( @@ -333,23 +360,39 @@ public function execute($entity = NULL) { ] ) ); + return $patched; } } } if ($patched) { - $this->logger->notice('%label had the following find: @jsonsearch and replace:@jsonreplace applied', [ - '%label' => $entity->label(), - '@jsonsearch' => '
    '.$this->configuration['jsonfind'].'
    ', - '@jsonreplace' => '
    '.$this->configuration['jsonreplace'].'
    ', - - ]); if (!$this->configuration['simulate']) { + // In case after saving the Label changes we keep the original one here + // For reporting/messaging + $label = $entity->label(); + $this->logger->notice('%label had the following find: @jsonsearch and replace:@jsonreplace applied', [ + '%label' => $label, + '@jsonsearch' => '
    '.$this->configuration['jsonfind'].'
    ', + '@jsonreplace' => '
    '.$this->configuration['jsonreplace'].'
    ', + ]); + if ($entity->getEntityType()->isRevisionable()) { + // Forces a New Revision for Not-create Operations. + $entity->setNewRevision(TRUE); + $entity->setRevisionCreationTime(\Drupal::time()->getRequestTime()); + // Set data for the revision + $entity->setRevisionLogMessage('ADO modified via Webform Search And Replace with search token:' . $this->configuration['jsonfind'] .' and replace token:' .$this->configuration['jsonreplace']); + $entity->setRevisionUserId($this->currentUser->id()); + } $entity->save(); + $link = $entity->toUrl()->toString(); + $this->messenger()->addStatus($this->t('ADO %title was successfully patched.',[ + ':link' => $link, + '%title' => $label, + ])); } } - return $patched; } } + return $patched; } diff --git a/src/Plugin/QueueWorker/IngestADOQueueWorker.php b/src/Plugin/QueueWorker/IngestADOQueueWorker.php index f66511b..5b57813 100644 --- a/src/Plugin/QueueWorker/IngestADOQueueWorker.php +++ b/src/Plugin/QueueWorker/IngestADOQueueWorker.php @@ -607,6 +607,7 @@ private function persistEntity(\stdClass $data, array $processed_metadata) { if ($node->getEntityType()->isRevisionable()) { // Forces a New Revision for Not-create Operations. $node->setNewRevision(TRUE); + $node->setRevisionCreationTime(\Drupal::time()->getRequestTime()); // Set data for the revision $node->setRevisionLogMessage('ADO modified via AMI Set ' . $data->info['set_id']); $node->setRevisionUserId($data->info['uid']);