Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for in-place restores. #1094

Open
wants to merge 15 commits into
base: 4.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.Date;
import java.util.List;
import javax.inject.Inject;

Expand Down Expand Up @@ -149,14 +148,6 @@ public void parsePartialPrefix(String remoteFilePath) {
token = remotePath.getName(3).toString();
}

@Override
public String remotePrefix(Date start, Date end, String location) {
return PATH_JOINER.join(
clusterPrefix(location),
instanceIdentity.getInstance().getToken(),
match(start, end));
}

@Override
public Path remoteV2Prefix(Path location, BackupFileType fileType) {
if (location.getNameCount() <= 1) {
Expand Down
40 changes: 0 additions & 40 deletions priam/src/main/java/com/netflix/priam/backup/AbstractBackup.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,7 @@
import com.netflix.priam.utils.SystemUtils;
import java.io.File;
import java.io.FileFilter;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -95,40 +89,6 @@ protected String getKeyspace(File backupDir) {
*/
protected abstract void processColumnFamily(File backupDir) throws Exception;

/**
* Get all the backup directories for Cassandra.
*
* @param config to get the location of the data folder.
* @param monitoringFolder folder where cassandra backup's are configured.
* @return Set of the path(s) containing the backup folder for each columnfamily.
* @throws Exception incase of IOException.
*/
public static Set<Path> getBackupDirectories(IConfiguration config, String monitoringFolder)
throws Exception {
HashSet<Path> backupPaths = new HashSet<>();
if (config.getDataFileLocation() == null) return backupPaths;
Path dataPath = Paths.get(config.getDataFileLocation());
if (Files.exists(dataPath) && Files.isDirectory(dataPath))
try (DirectoryStream<Path> directoryStream =
Files.newDirectoryStream(dataPath, path -> Files.isDirectory(path))) {
for (Path keyspaceDirPath : directoryStream) {
try (DirectoryStream<Path> keyspaceStream =
Files.newDirectoryStream(
keyspaceDirPath, path -> Files.isDirectory(path))) {
for (Path columnfamilyDirPath : keyspaceStream) {
Path backupDirPath =
Paths.get(columnfamilyDirPath.toString(), monitoringFolder);
if (Files.exists(backupDirPath) && Files.isDirectory(backupDirPath)) {
logger.debug("Backup folder: {}", backupDirPath);
backupPaths.add(backupDirPath);
}
}
}
}
}
return backupPaths;
}

protected static File[] getSecondaryIndexDirectories(File backupDir) {
FileFilter filter = (file) -> file.getName().startsWith(".") && isAReadableDirectory(file);
return Optional.ofNullable(backupDir.listFiles(filter)).orElse(new File[] {});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public abstract class AbstractBackupPath implements Comparable<AbstractBackupPat
ImmutableMap.of(BackupFolder.BACKUPS, 3, BackupFolder.SNAPSHOTS, 4);

public enum BackupFileType {
CL,
META_V2,
SECONDARY_INDEX_V2,
SNAPSHOT_VERIFIED,
Expand Down Expand Up @@ -147,15 +146,13 @@ protected String match(Date start, Date end) {
/** Local restore file */
public File newRestoreFile() {
File return_;
String dataDir = config.getDataFileLocation();
String dataDir = config.getRestoreDataLocation();
switch (type) {
case CL:
return_ = new File(PATH_JOINER.join(config.getBackupCommitLogLocation(), fileName));
break;
case SECONDARY_INDEX_V2:
String restoreFileName =
PATH_JOINER.join(dataDir, keyspace, columnFamily, indexDir, fileName);
return_ = new File(restoreFileName);
return_ =
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While we are in here, consider renaming this variable, its not idiomatic java

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

new File(
PATH_JOINER.join(
dataDir, keyspace, columnFamily, indexDir, fileName));
break;
case META_V2:
return_ = new File(PATH_JOINER.join(config.getDataFileLocation(), fileName));
Expand Down Expand Up @@ -188,11 +185,6 @@ public boolean equals(Object obj) {
/** Parses paths with just token prefixes */
public abstract void parsePartialPrefix(String remoteFilePath);

/**
* Provides a common prefix that matches all objects that fall between the start and end time
*/
public abstract String remotePrefix(Date start, Date end, String location);

public abstract Path remoteV2Prefix(Path location, BackupFileType fileType);

/** Provides the cluster prefix */
Expand Down
50 changes: 36 additions & 14 deletions priam/src/main/java/com/netflix/priam/backup/BackupRestoreUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@
package com.netflix.priam.backup;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.netflix.priam.backupv2.IMetaProxy;
import com.netflix.priam.utils.DateUtil;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -83,19 +87,6 @@ public static List<AbstractBackupPath> getMostRecentSnapshotPaths(
return snapshotPaths;
}

public static List<AbstractBackupPath> getIncrementalPaths(
AbstractBackupPath latestValidMetaFile,
DateUtil.DateRange dateRange,
IMetaProxy metaProxy) {
Instant snapshotTime;
snapshotTime = latestValidMetaFile.getLastModified();
DateUtil.DateRange incrementalDateRange =
new DateUtil.DateRange(snapshotTime, dateRange.getEndTime());
List<AbstractBackupPath> incrementalPaths = new ArrayList<>();
metaProxy.getIncrementals(incrementalDateRange).forEachRemaining(incrementalPaths::add);
return incrementalPaths;
}

public static Map<String, List<String>> getFilter(String inputFilter)
throws IllegalArgumentException {
if (StringUtils.isEmpty(inputFilter)) return null;
Expand Down Expand Up @@ -144,4 +135,35 @@ public final boolean isFiltered(String keyspace, String columnFamilyDir) {
|| includeFilter.get(keyspace).contains(columnFamilyName)));
return false;
}

/**
* Get all the backup directories for Cassandra.
*
* @param dataDirectory the location of the data folder.
* @param monitoringFolder folder where cassandra backup's are configured.
* @return Set of the path(s) containing the backup folder for each columnfamily.
* @throws IOException
*/
public static ImmutableSet<Path> getBackupDirectories(
String dataDirectory, String monitoringFolder) throws IOException {
ImmutableSet.Builder<Path> backupPaths = ImmutableSet.builder();
Path dataPath = Paths.get(dataDirectory);
if (Files.exists(dataPath) && Files.isDirectory(dataPath))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

try (DirectoryStream<Path> directoryStream =
Files.newDirectoryStream(dataPath, Files::isDirectory)) {
for (Path keyspaceDirPath : directoryStream) {
try (DirectoryStream<Path> keyspaceStream =
Files.newDirectoryStream(keyspaceDirPath, Files::isDirectory)) {
for (Path columnfamilyDirPath : keyspaceStream) {
Path backupDirPath =
Paths.get(columnfamilyDirPath.toString(), monitoringFolder);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe you can use columnFamilyDirPath.resolve(monitoringFolder) here instead of converting back and forth between string and path. Reference: https://docs.oracle.com/javase/8/docs/api/index.html?java/nio/file/Path.html

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if (Files.exists(backupDirPath) && Files.isDirectory(backupDirPath)) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here that the File.exists call is redundant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

backupPaths.add(backupDirPath);
}
}
}
}
}
return backupPaths.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -209,18 +209,7 @@ public List<BackupMetadata> getLatestBackupMetadata(DateUtil.DateRange dateRange
.stream()
.filter(Objects::nonNull)
.filter(backupMetadata -> backupMetadata.getStatus() == Status.FINISHED)
.filter(
backupMetadata ->
backupMetadata
.getStart()
.toInstant()
.compareTo(dateRange.getStartTime())
>= 0
&& backupMetadata
.getStart()
.toInstant()
.compareTo(dateRange.getEndTime())
<= 0)
.filter(backupMetadata -> dateRange.contains(backupMetadata.getStart().toInstant()))
.sorted(Comparator.comparing(BackupMetadata::getStart).reversed())
.collect(Collectors.toList());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ public static TaskTimer getTimer(

private static void cleanOldBackups(IConfiguration configuration) throws Exception {
Set<Path> backupPaths =
AbstractBackup.getBackupDirectories(configuration, INCREMENTAL_BACKUP_FOLDER);
BackupRestoreUtil.getBackupDirectories(
configuration.getDataFileLocation(), INCREMENTAL_BACKUP_FOLDER);
for (Path backupDirPath : backupPaths) {
FileUtils.cleanDirectory(backupDirPath.toFile());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ public BackupTTLTask(

@Override
public void execute() throws Exception {
if (instanceState.getRestoreStatus() != null
&& instanceState.getRestoreStatus().getStatus() != null
&& instanceState.getRestoreStatus().getStatus() == Status.STARTED) {
if (instanceState.isRestoring()) {
logger.info("Not executing the TTL Task for backups as Priam is in restore mode.");
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ public void execute() throws Exception {
return;
}

if (instanceState.getRestoreStatus() != null
&& instanceState.getRestoreStatus().getStatus() != null
&& instanceState.getRestoreStatus().getStatus() == Status.STARTED) {
if (instanceState.isRestoring()) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this cleanup 🙌

logger.info("Skipping backup verification. Priam is in restore mode.");
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

package com.netflix.priam.backupv2;

import com.google.common.collect.ImmutableList;
import com.netflix.priam.backup.AbstractBackupPath;
import com.netflix.priam.backup.BackupRestoreException;
import com.netflix.priam.backup.BackupVerificationResult;
import com.netflix.priam.utils.DateUtil;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.List;

/** Proxy to do management tasks for meta files. Created by aagrawal on 12/18/18. */
Expand Down Expand Up @@ -78,7 +78,7 @@ public interface IMetaProxy {
* @param dateRange the time period to scan in the remote file system for incremental files.
* @return iterator containing the list of path on the remote file system satisfying criteria.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update javadoc if you are going to keep ImmutableList

*/
Iterator<AbstractBackupPath> getIncrementals(DateUtil.DateRange dateRange);
ImmutableList<AbstractBackupPath> getIncrementals(DateUtil.DateRange dateRange);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mostly curious: why this change? the iterator gives us flexibility that the data could be "streamed" where as this locks us in to determining all the incrementals up front. Likely not an issue but something I noticed.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the implementation we have converted from something that would get called on each iteration to something that gets called all during the execution of this method. Not sure thats a problem or not but wanted to call it out.


/**
* Validate that all the files mentioned in the meta file actually exists on remote file system.
Expand Down
58 changes: 24 additions & 34 deletions priam/src/main/java/com/netflix/priam/backupv2/MetaV2Proxy.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.netflix.priam.backupv2;

import com.google.common.collect.ImmutableList;
import com.netflix.priam.backup.AbstractBackupPath;
import com.netflix.priam.backup.BackupRestoreException;
import com.netflix.priam.backup.BackupVerificationResult;
Expand All @@ -31,8 +32,6 @@
import java.util.*;
import javax.inject.Inject;
import javax.inject.Provider;
import org.apache.commons.collections4.iterators.FilterIterator;
import org.apache.commons.collections4.iterators.TransformIterator;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.IOFileFilter;
Expand Down Expand Up @@ -84,40 +83,34 @@ private String getMatch(
}

@Override
public Iterator<AbstractBackupPath> getIncrementals(DateUtil.DateRange dateRange) {
String incrementalPrefix = getMatch(dateRange, AbstractBackupPath.BackupFileType.SST_V2);
String marker =
getMatch(
new DateUtil.DateRange(dateRange.getStartTime(), null),
AbstractBackupPath.BackupFileType.SST_V2);
public ImmutableList<AbstractBackupPath> getIncrementals(DateUtil.DateRange dateRange) {
return new ImmutableList.Builder<AbstractBackupPath>()
.addAll(getIncrementals(dateRange, AbstractBackupPath.BackupFileType.SST_V2))
.addAll(
getIncrementals(
dateRange, AbstractBackupPath.BackupFileType.SECONDARY_INDEX_V2))
.build();
}

private ImmutableList<AbstractBackupPath> getIncrementals(
DateUtil.DateRange dateRange, AbstractBackupPath.BackupFileType type) {
String incrementalPrefix = getMatch(dateRange, type);
String marker = getMatch(new DateUtil.DateRange(dateRange.getStartTime(), null), type);
logger.info(
"Listing filesystem with prefix: {}, marker: {}, daterange: {}",
incrementalPrefix,
marker,
dateRange);
Iterator<String> iterator = fs.listFileSystem(incrementalPrefix, null, marker);
Iterator<AbstractBackupPath> transformIterator =
new TransformIterator<>(
iterator,
s -> {
AbstractBackupPath path = abstractBackupPathProvider.get();
path.parseRemote(s);
return path;
});

return new FilterIterator<>(
transformIterator,
abstractBackupPath ->
(abstractBackupPath.getLastModified().isAfter(dateRange.getStartTime())
&& abstractBackupPath
.getLastModified()
.isBefore(dateRange.getEndTime()))
|| abstractBackupPath
.getLastModified()
.equals(dateRange.getStartTime())
|| abstractBackupPath
.getLastModified()
.equals(dateRange.getEndTime()));
ImmutableList.Builder<AbstractBackupPath> results = ImmutableList.builder();
while (iterator.hasNext()) {
AbstractBackupPath path = abstractBackupPathProvider.get();
path.parseRemote(iterator.next());
if (dateRange.contains(path.getLastModified())) {
results.add(path);
}
}
return results.build();
}

@Override
Expand All @@ -136,10 +129,7 @@ public List<AbstractBackupPath> findMetaFiles(DateUtil.DateRange dateRange) {
AbstractBackupPath abstractBackupPath = abstractBackupPathProvider.get();
abstractBackupPath.parseRemote(iterator.next());
logger.debug("Meta file found: {}", abstractBackupPath);
if (abstractBackupPath.getLastModified().toEpochMilli()
>= dateRange.getStartTime().toEpochMilli()
&& abstractBackupPath.getLastModified().toEpochMilli()
<= dateRange.getEndTime().toEpochMilli()) {
if (dateRange.contains(abstractBackupPath.getLastModified())) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really like this sort of cleanup as well.

metas.add(abstractBackupPath);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ public static TaskTimer getTimer(IBackupRestoreConfig config) throws IllegalArgu

static void cleanOldBackups(IConfiguration config) throws Exception {
// Clean up all the backup directories, if any.
Set<Path> backupPaths = AbstractBackup.getBackupDirectories(config, SNAPSHOT_FOLDER);
Set<Path> backupPaths =
BackupRestoreUtil.getBackupDirectories(
config.getDataFileLocation(), SNAPSHOT_FOLDER);
for (Path backupDirPath : backupPaths)
try (DirectoryStream<Path> directoryStream =
Files.newDirectoryStream(backupDirPath, Files::isDirectory)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,6 @@ default int getBackupRetentionDays() {
return 0;
}

/** @return Get list of racs to backup. Backup all racs if empty */
default List<String> getBackupRacs() {
return Collections.EMPTY_LIST;
}

/**
* Backup location i.e. remote file system to upload backups. e.g. for S3 it will be s3 bucket
* name
Expand Down Expand Up @@ -403,6 +398,10 @@ default String getRestoreSnapshot() {
return StringUtils.EMPTY;
}

default String getRestoreDataLocation() {
return getCassandraBaseDirectory() + "/restore";
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To confirm my understanding on our systems this would be something like /mnt/data/cassandra/restore? If so, +1

}

/** @return Get the region to connect to SDB for instance identity */
default String getSDBInstanceIdentityRegion() {
return "us-east-1";
Expand Down
Loading
Loading