Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chunking package updates into batches of 100 #512

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ the internal handling of Composer v1.x.

The first database population may easily take hours. Be patient.

0. `bin/console doctrine:migrations:migrate`: Ensure the database schema is up to date with the code.
1. `bin/console refresh`: Query the WordPress.org SVN in order to find new and updated packages.
2. `bin/console update`: Update the version information for packages identified in `2`. Uses the WordPress.org API.
3. `bin/console build`: Rebuild all `PackageStore` data.
1. `bin/console doctrine:migrations:migrate`: Ensure the database schema is up to date with the code.
2. `bin/console refresh`: Query the WordPress.org SVN in order to find new and updated packages.
3. `bin/console update`: Update the version information for packages identified in `(2)`. Uses the WordPress.org API.
4. `bin/console build`: Rebuild all `PackageStore` data.

Each of these can be run with the `-vvv` verbosity flag, to give useful progress updates

## Running locally with Docker

Expand Down
18 changes: 11 additions & 7 deletions src/Entity/PackageRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public function updateProviderGroups(): void
* being fetched or that are inactive but have been updated in the past 90 days
* and haven't been fetched in the past 7 days.
*
* @return Package[]
* @return array consisting of count and iterable
*/
public function findDueUpdate(): array
{
Expand All @@ -63,14 +63,18 @@ public function findDueUpdate(): array
OR (p.lastCommitted - p.lastFetched) > :twoHours
OR (p.isActive = false AND p.lastCommitted > :threeMonthsAgo AND p.lastFetched < :oneWeekAgo)
EOT;
$countDql = str_replace('SELECT p', 'SELECT COUNT(1)', $dql);
$dateFormat = $this->getEntityManager()->getConnection()->getDatabasePlatform()->getDateTimeFormatString();
$query = $entityManager->createQuery($dql)
// This seems to be how Doctrine wants its 'interval' type bound – not a DateInterval
->setParameter('twoHours', '2 hour')
->setParameter('threeMonthsAgo', (new \DateTime())->sub(new \DateInterval('P3M'))->format($dateFormat))
->setParameter('oneWeekAgo', (new \DateTime())->sub(new \DateInterval('P1W'))->format($dateFormat));
// This seems to be how Doctrine wants its 'interval' type bound – not a DateInterval
$parameters = [
'twoHours' => '2 hour',
'threeMonthsAgo' => (new \DateTime())->sub(new \DateInterval('P3M'))->format($dateFormat),
'oneWeekAgo' => (new \DateTime())->sub(new \DateInterval('P1W'))->format($dateFormat)
];
$query = $entityManager->createQuery($dql)->setParameters($parameters);
$countQuery = $entityManager->createQuery($countDql)->setParameters($parameters);

return $query->getResult();
return [$countQuery->getSingleScalarResult(), $query->toIterable()];
}

/**
Expand Down
153 changes: 87 additions & 66 deletions src/Service/Update.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public function __construct(EntityManagerInterface $entityManager)
public function updateAll(LoggerInterface $logger): void
{
$packages = $this->repo->findDueUpdate();
$this->update($logger, $packages);
$this->update($logger, $packages[1], $packages[0]);
}

/**
Expand Down Expand Up @@ -62,7 +62,7 @@ public function updateOne(LoggerInterface $logger, string $name, int $allowMoreT

if ($package) {
try {
$this->update($logger, [$package]);
$this->update($logger, [$package], 1);
} catch (UniqueConstraintViolationException $exception) {
if ($allowMoreTries > 0) {
return $this->updateOne($logger, $name, $allowMoreTries - 1);
Expand All @@ -78,24 +78,28 @@ public function updateOne(LoggerInterface $logger, string $name, int $allowMoreT

/**
* @param LoggerInterface $logger
* @param Package[] $packages
* @param iterable|Package[] $packages
* @param int $count
*/
protected function update(LoggerInterface $logger, array $packages): void
protected function update(LoggerInterface $logger, mixed $packages, int $count): void
{
$count = count($packages);
$versionParser = new VersionParser();

$wporgClient = WporgClient::getClient();

$logger->info("Updating {$count} packages");

foreach ($packages as $index => $package) {
$percent = $index / $count * 100;
$batchSize = 100;

$i = 0;
foreach ($packages as $package) {
$percent = ++$i / $count * 100;

$name = $package->getName();

$info = null;
$fields = ['versions'];
$deactivateReason = null;
try {
if ($package instanceof Plugin) {
$info = $wporgClient->getPlugin($name, $fields);
Expand All @@ -104,82 +108,99 @@ protected function update(LoggerInterface $logger, array $packages): void
}

$logger->info(sprintf("<info>%04.1f%%</info> Fetched %s %s", $percent, $package->getType(), $name));
if (empty($info)) {
$deactivateReason = 'not active';
}
} catch (CommandClientException $exception) {
$res = $exception->getResponse();
$this->deactivate($package, $res->getStatusCode() . ': ' . $res->getReasonPhrase(), $logger);
continue;
$deactivateReason = $res->getStatusCode() . ': ' . $res->getReasonPhrase();
} catch (GuzzleException $exception) {
$logger->warning("Skipped {$package->getType()} '{$name}' due to error: '{$exception->getMessage()}'");
}

if (empty($info)) {
// Plugin is not active
$this->deactivate($package, 'not active', $logger);

continue;
}

//get versions as [version => url]
$versions = $info['versions'] ?: [];

//current version of plugin not present in tags so add it
if (empty($versions[$info['version']])) {
$logger->info('Adding trunk psuedo-version for ' . $name);

//add to front of array
$versions = array_reverse($versions, true);
$versions[$info['version']] = 'trunk';
$versions = array_reverse($versions, true);
}

//all plugins have a dev-trunk version
if ($package instanceof Plugin) {
unset($versions['trunk']);
$versions['dev-trunk'] = 'trunk';
}
if ($info && !$deactivateReason) {
$versions = $this->extractVersions($package, $info, $versionParser, $logger);

foreach ($versions as $version => $url) {
try {
//make sure versions are parseable by Composer
$versionParser->normalize($version);
if ($package instanceof Theme) {
//themes have different SVN folder structure
$versions[$version] = $version;
} elseif ($url !== 'trunk') {
//add ref to SVN tag
$versions[$version] = 'tags/' . $version;
} // else do nothing, for 'trunk'.
} catch (\UnexpectedValueException $e) {
// Version is invalid – we've seen this e.g. with 5 numeric parts.
$logger->info(sprintf(
'Skipping invalid version %s for %s %s',
$version,
$package->getType(),
$name
));
unset($versions[$version]);
if ($versions) {
$package->setLastFetched(new \DateTime());
$package->setVersions($versions);
$package->setIsActive(true);
$package->setDisplayName($info['name']);
$this->entityManager->persist($package);
} else {
$deactivateReason = 'no versions found';
}
}

if ($versions) {
if ($deactivateReason) {
$package->setLastFetched(new \DateTime());
$package->setVersions($versions);
$package->setIsActive(true);
$package->setDisplayName($info['name']);
$package->setIsActive(false);
$this->entityManager->persist($package);
} else {
// Package is not active
$this->deactivate($package, 'no versions found', $logger);
$logger->info(sprintf("<info>Deactivated %s %s because %s</info>", $package->getType(), $package->getName(), $deactivateReason));
}

if (($i % $batchSize) === 0) {
$logger->info('---Persisting updated packages---');
$this->entityManager->flush();
$this->entityManager->clear();
}
}
$this->entityManager->flush();
}

private function deactivate(Package $package, string $reason, LoggerInterface $logger): void
/**
* @param Package $package
* @param array $info
* @param VersionParser $versionParser
* @param LoggerInterface $logger
* @return array|mixed
*/
protected function extractVersions($package, $info, $versionParser, $logger)
{
$package->setLastFetched(new \DateTime());
$package->setIsActive(false);
$this->entityManager->persist($package);
$logger->info(sprintf("<info>Deactivated %s %s because %s</info>", $package->getType(), $package->getName(), $reason));
$name = $package->getName();

//get versions as [version => url]
$versions = $info['versions'] ?: [];

//current version of plugin not present in tags so add it
if (empty($versions[$info['version']])) {
$logger->info('Adding trunk pseudo-version for ' . $name);

//add to front of array
$versions = array_reverse($versions, true);
$versions[$info['version']] = 'trunk';
$versions = array_reverse($versions, true);
}

//all plugins have a dev-trunk version
if ($package instanceof Plugin) {
unset($versions['trunk']);
$versions['dev-trunk'] = 'trunk';
}

foreach ($versions as $version => $url) {
try {
//make sure versions are parseable by Composer
$versionParser->normalize($version);
if ($package instanceof Theme) {
//themes have different SVN folder structure
$versions[$version] = $version;
} elseif ($url !== 'trunk') {
//add ref to SVN tag
$versions[$version] = 'tags/' . $version;
} // else do nothing, for 'trunk'.
} catch (\UnexpectedValueException $e) {
// Version is invalid – we've seen this e.g. with 5 numeric parts.
$logger->info(sprintf(
'Skipping invalid version %s for %s %s',
$version,
$package->getType(),
$name
));
unset($versions[$version]);
}
}

return $versions;
}
}