diff --git a/src/Console/AddRecordCommand.php b/src/Console/AddRecordCommand.php index 47f92a7..95137fa 100644 --- a/src/Console/AddRecordCommand.php +++ b/src/Console/AddRecordCommand.php @@ -40,6 +40,14 @@ )] class AddRecordCommand extends Command { + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { Database::getInstance()->pruneOrphanSets(); diff --git a/src/Console/CsvImportCommand.php b/src/Console/CsvImportCommand.php index 311a388..23a89b7 100644 --- a/src/Console/CsvImportCommand.php +++ b/src/Console/CsvImportCommand.php @@ -24,8 +24,10 @@ use DateTime; use OCC\OaiPmh2\Database; +use OCC\OaiPmh2\Database\Format; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; +use Symfony\Component\Console\Helper\ProgressIndicator; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputOption; @@ -43,6 +45,11 @@ )] class CsvImportCommand extends Command { + /** + * Configures the current command. + * + * @return void + */ protected function configure(): void { $this->addArgument( @@ -61,28 +68,28 @@ function (): array { ); $this->addOption( 'idColumn', - null, + 'i', InputOption::VALUE_OPTIONAL, 'Name of the CSV column which holds the records\' identifier.', 'identifier' ); $this->addOption( 'contentColumn', - null, + 'c', InputOption::VALUE_OPTIONAL, 'Name of the CSV column which holds the records\' content.', 'content' ); $this->addOption( 'dateColumn', - null, + 'd', InputOption::VALUE_OPTIONAL, 'Name of the CSV column which holds the records\' datetime of last change.', 'lastChanged' ); $this->addOption( 'setColumn', - null, + 's', InputOption::VALUE_OPTIONAL, 'Name of the CSV column which holds the records\' sets list.', 'sets' @@ -90,77 +97,66 @@ function (): array { parent::configure(); } + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { - /** @var array */ - $arguments = $input->getArguments(); - /** @var array */ - $options = $input->getOptions(); - - $formats = Database::getInstance()->getMetadataFormats()->getQueryResult(); - if (!in_array($arguments['format'], array_keys($formats), true)) { - // Error: Invalid metadata prefix - echo 1; + if (!$this->validateInput($input, $output)) { return Command::INVALID; } + $memoryLimit = $this->getMemoryLimit(); + /** @var array */ + $arguments = $input->getArguments(); + /** @var Format */ + $format = Database::getInstance()->getEntityManager()->getReference(Format::class, $arguments['format']); + /** @var resource */ $file = fopen($arguments['file'], 'r'); - if ($file === false) { - // Error: File not found or not readable - echo 2; - return Command::INVALID; - } - $headers = fgetcsv($file); - if (!is_array($headers)) { - // Error: No CSV - echo 3; - return Command::INVALID; - } else { - $headers = array_flip($headers); - } - - $column = []; - foreach ($options as $option => $value) { - if (isset($headers[$value])) { - $column[$option] = $headers[$value]; - } - } - if (!isset($column['idColumn']) || !isset($column['contentColumn'])) { - // Error: Required columns missing - echo 4; + $columns = $this->getColumnNames($input, $output, $file); + if (count($columns) === 0) { return Command::INVALID; } - $lastChanged = new DateTime(); $count = 0; + $progressIndicator = new ProgressIndicator($output, 'verbose', 200, ['⠏', '⠛', '⠹', '⢸', '⣰', '⣤', '⣆', '⡇']); + $progressIndicator->start('Importing...'); + while ($record = fgetcsv($file)) { - $identifier = $record[$column['idColumn']]; - $content = $record[$column['contentColumn']]; - if ($content === '') { - $content = null; - } - if (isset($column['dateColumn'])) { - $lastChanged = new DateTime($record[$column['dateColumn']]); - } - // TODO: Complete support for sets. - $sets = null; Database::getInstance()->addOrUpdateRecord( - $identifier, - $arguments['format'], - $content, - $lastChanged, - $sets, + $record[$columns['idColumn']], + $format, + trim($record[$columns['contentColumn']]), + new DateTime($record[$columns['dateColumn']] ?? 'now'), + // TODO: Complete support for sets. + /* $record[$columns['setColumn']] ?? */ null, true ); + ++$count; - if ($count % 500 === 0) { + $progressIndicator->advance(); + $progressIndicator->setMessage((string) $count . ' done.'); + + // Flush to database if memory usage reaches 90% of available limit. + if (memory_get_usage() / $memoryLimit > 0.9) { Database::getInstance()->flush(true); + /** @var Format */ + $format = Database::getInstance()->getEntityManager()->getReference(Format::class, $arguments['format']); } } Database::getInstance()->flush(true); Database::getInstance()->pruneOrphanSets(); + $progressIndicator->finish('All done!'); + + fclose($file); + $output->writeln([ '', sprintf( @@ -172,4 +168,117 @@ protected function execute(InputInterface $input, OutputInterface $output): int ]); return Command::SUCCESS; } + + /** + * Get the column names of CSV. + * + * @param InputInterface $input The inputs + * @param OutputInterface $output The output interface + * @param resource $file The handle for the CSV file + * + * @return array The mapped column names + */ + protected function getColumnNames(InputInterface $input, OutputInterface $output, $file): array + { + /** @var array */ + $options = $input->getOptions(); + + $columns = []; + + $headers = fgetcsv($file); + if (!is_array($headers)) { + $output->writeln([ + '', + sprintf( + ' [ERROR] File "%s" does not contain valid CSV. ', + stream_get_meta_data($file)['uri'] + ), + '' + ]); + return []; + } else { + $headers = array_flip($headers); + } + foreach ($options as $option => $value) { + if (isset($headers[$value])) { + $columns[$option] = $headers[$value]; + } + } + + if (!isset($columns['idColumn']) || !isset($columns['contentColumn'])) { + $output->writeln([ + '', + sprintf( + ' [ERROR] File "%s" does not contain valid CSV. ', + stream_get_meta_data($file)['uri'] + ), + '' + ]); + return []; + } + return $columns; + } + + /** + * Get the PHP memory limit in bytes. + * + * @return int The memory limit in bytes or -1 if unlimited + */ + protected function getMemoryLimit(): int + { + $ini = trim(ini_get('memory_limit')); + $limit = (int) $ini; + $unit = strtolower($ini[strlen($ini)-1]); + switch($unit) { + case 'g': + $limit *= 1024; + case 'm': + $limit *= 1024; + case 'k': + $limit *= 1024; + } + if ($limit < 0) { + return -1; + } + return $limit; + } + + /** + * Validate input arguments. + * + * @param InputInterface $input The inputs + * @param OutputInterface $output The output interface + * + * @return bool Whether the inputs validate + */ + protected function validateInput(InputInterface $input, OutputInterface $output): bool + { + /** @var array */ + $arguments = $input->getArguments(); + + $formats = Database::getInstance()->getMetadataFormats()->getQueryResult(); + if (!in_array($arguments['format'], array_keys($formats), true)) { + $output->writeln([ + '', + sprintf( + ' [ERROR] Metadata format "%s" is not supported. ', + $arguments['format'] + ), + '' + ]); + return false; + } + if (!is_readable($arguments['file'])) { + $output->writeln([ + '', + sprintf( + ' [ERROR] File "%s" not found or not readable. ', + $arguments['file'] + ), + '' + ]); + return false; + } + return true; + } } diff --git a/src/Console/DeleteRecordCommand.php b/src/Console/DeleteRecordCommand.php index 01c3b34..5ce84f8 100644 --- a/src/Console/DeleteRecordCommand.php +++ b/src/Console/DeleteRecordCommand.php @@ -41,6 +41,14 @@ )] class DeleteRecordCommand extends Command { + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { $policy = Configuration::getInstance()->deletedRecords; diff --git a/src/Console/PruneRecordsCommand.php b/src/Console/PruneRecordsCommand.php index 826b598..4db7ebb 100644 --- a/src/Console/PruneRecordsCommand.php +++ b/src/Console/PruneRecordsCommand.php @@ -42,17 +42,30 @@ )] class PruneRecordsCommand extends Command { + /** + * Configures the current command. + * + * @return void + */ protected function configure(): void { $this->addOption( 'force', - null, + 'f', InputOption::VALUE_NONE, 'Deletes records even under "transient" policy.' ); parent::configure(); } + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { $policy = Configuration::getInstance()->deletedRecords; diff --git a/src/Console/PruneResumptionTokensCommand.php b/src/Console/PruneResumptionTokensCommand.php index 24a9c4e..12cbb7b 100644 --- a/src/Console/PruneResumptionTokensCommand.php +++ b/src/Console/PruneResumptionTokensCommand.php @@ -40,6 +40,14 @@ )] class PruneResumptionTokensCommand extends Command { + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { $expired = Database::getInstance()->pruneResumptionTokens(); diff --git a/src/Console/UpdateFormatsCommand.php b/src/Console/UpdateFormatsCommand.php index a484102..9629e95 100644 --- a/src/Console/UpdateFormatsCommand.php +++ b/src/Console/UpdateFormatsCommand.php @@ -45,6 +45,14 @@ )] class UpdateFormatsCommand extends Command { + /** + * Executes the current command. + * + * @param InputInterface $input The input + * @param OutputInterface $output The output + * + * @return int 0 if everything went fine, or an error code + */ protected function execute(InputInterface $input, OutputInterface $output): int { $formats = Configuration::getInstance()->metadataPrefix; diff --git a/src/Database.php b/src/Database.php index e56a73f..7c961bd 100644 --- a/src/Database.php +++ b/src/Database.php @@ -106,7 +106,7 @@ public function addOrUpdateMetadataFormat(string $prefix, string $namespace, str * Add or update record. * * @param string $identifier The record identifier - * @param Format|string $format The metadata prefix + * @param Format $format The metadata prefix * @param ?string $data The record's content * @param ?DateTime $lastChanged The date of last change * @param ?array $sets The record's associated sets @@ -116,7 +116,7 @@ public function addOrUpdateMetadataFormat(string $prefix, string $namespace, str */ public function addOrUpdateRecord( string $identifier, - Format|string $format, + Format $format, ?string $data = null, ?DateTime $lastChanged = null, // TODO: Complete support for sets @@ -124,10 +124,6 @@ public function addOrUpdateRecord( bool $bulkMode = false ): void { - if (!$format instanceof Format) { - /** @var Format */ - $format = $this->entityManager->getReference(Format::class, $format); - } $record = $this->entityManager->find(Record::class, ['identifier' => $identifier, 'format' => $format]); if (!isset($data) && Configuration::getInstance()->deletedRecords === 'no') { if (isset($record)) { @@ -257,7 +253,7 @@ public function getRecord(string $identifier, string $metadataPrefix): ?Record * Get list of records. * * @param string $verb The currently requested verb ('ListIdentifiers' or 'ListRecords') - * @param string $metadataPrefix The metadata prefix + * @param Format $metadataPrefix The metadata format * @param int $counter Counter for split result sets * @param ?string $from The "from" datestamp * @param ?string $until The "until" datestamp @@ -267,7 +263,7 @@ public function getRecord(string $identifier, string $metadataPrefix): ?Record */ public function getRecords( string $verb, - string $metadataPrefix, + Format $metadataPrefix, int $counter = 0, ?string $from = null, ?string $until = null, @@ -305,7 +301,7 @@ public function getRecords( $token = new Token($verb, [ 'counter' => $counter + 1, 'completeListSize' => count($paginator), - 'metadataPrefix' => $metadataPrefix, + 'metadataPrefix' => $metadataPrefix->getPrefix(), 'from' => $from, 'until' => $until, 'set' => $set diff --git a/src/Database/Format.php b/src/Database/Format.php index d4d88a3..0f988e5 100644 --- a/src/Database/Format.php +++ b/src/Database/Format.php @@ -22,8 +22,6 @@ namespace OCC\OaiPmh2\Database; -use Doctrine\Common\Collections\ArrayCollection; -use Doctrine\Common\Collections\Collection; use Doctrine\ORM\Mapping as ORM; use Symfony\Component\Validator\Constraints as Assert; use Symfony\Component\Validator\Exception\ValidationFailedException; @@ -58,28 +56,6 @@ class Format #[ORM\Column(type: 'string')] private string $xmlSchema; - /** - * Collection of associated records. - * - * @var Collection - */ - #[ORM\OneToMany(targetEntity: Record::class, mappedBy: 'format', fetch: 'EXTRA_LAZY', cascade: ['persist'], orphanRemoval: true)] - private Collection $records; - - /** - * Update bi-directional association with records. - * - * @param Record $record The record to add to this format - * - * @return void - */ - public function addRecord(Record $record): void - { - if (!$this->records->contains($record)) { - $this->records->add($record); - } - } - /** * Get the format's namespace URI. * @@ -100,16 +76,6 @@ public function getPrefix(): string return $this->prefix; } - /** - * Get a collection of associated records. - * - * @return Collection The associated records - */ - public function getRecords(): Collection - { - return $this->records; - } - /** * Get the format's schema URL. * @@ -221,7 +187,6 @@ public function __construct(string $prefix, string $namespace, string $schema) $this->prefix = $this->validatePrefix($prefix); $this->setNamespace($namespace); $this->setSchema($schema); - $this->records = new ArrayCollection(); } catch (ValidationFailedException $exception) { throw $exception; } diff --git a/src/Database/Record.php b/src/Database/Record.php index e09ed20..2b5bdc2 100644 --- a/src/Database/Record.php +++ b/src/Database/Record.php @@ -51,7 +51,7 @@ class Record * The associated format. */ #[ORM\Id] - #[ORM\ManyToOne(targetEntity: Format::class, inversedBy: 'records', cascade: ['persist'])] + #[ORM\ManyToOne(targetEntity: Format::class, inversedBy: 'records')] #[ORM\JoinColumn(name: 'format', referencedColumnName: 'prefix')] private Format $format; @@ -206,7 +206,6 @@ public function setContent(?string $data = null, bool $validate = true): void protected function setFormat(Format $format): void { $this->format = $format; - $format->addRecord($this); } /** diff --git a/src/Middleware/ListIdentifiers.php b/src/Middleware/ListIdentifiers.php index 904f122..bc1af21 100644 --- a/src/Middleware/ListIdentifiers.php +++ b/src/Middleware/ListIdentifiers.php @@ -70,8 +70,8 @@ protected function prepareResponse(ServerRequestInterface $request): void } } } - $prefixes = Database::getInstance()->getMetadataFormats(); - if (!in_array($metadataPrefix, array_keys($prefixes->getQueryResult()), true)) { + $prefixes = Database::getInstance()->getMetadataFormats()->getQueryResult(); + if (!in_array($metadataPrefix, array_keys($prefixes), true)) { ErrorHandler::getInstance()->withError('cannotDisseminateFormat'); return; } @@ -83,7 +83,7 @@ protected function prepareResponse(ServerRequestInterface $request): void } } - $records = Database::getInstance()->getRecords($verb, $metadataPrefix, $counter, $from, $until, $set); + $records = Database::getInstance()->getRecords($verb, $prefixes[$metadataPrefix], $counter, $from, $until, $set); if (count($records) === 0) { ErrorHandler::getInstance()->withError('noRecordsMatch'); return;