shortcut recovery if we are on a shared FS - no need to compare files etc.

This commit is contained in:
Simon Willnauer 2015-02-11 17:51:22 +01:00
parent 24d36c92dd
commit 2d42736fed
6 changed files with 33 additions and 19 deletions

View file

@ -158,6 +158,7 @@ public class IndexMetaData {
public static final String SETTING_NUMBER_OF_SHARDS = "index.number_of_shards";
public static final String SETTING_NUMBER_OF_REPLICAS = "index.number_of_replicas";
public static final String SETTING_SHADOW_REPLICAS = "index.shadow_replicas";
public static final String SETTING_SHARED_FILESYSTEM = "index.shared_filesystem";
public static final String SETTING_AUTO_EXPAND_REPLICAS = "index.auto_expand_replicas";
public static final String SETTING_READ_ONLY = "index.blocks.read_only";
public static final String SETTING_BLOCKS_READ = "index.blocks.read";
@ -785,4 +786,10 @@ public class IndexMetaData {
}
}
}
// NOCOMMIT find a good place for this and document it
public static boolean usesSharedFilesystem(Settings settings) {
return settings.getAsBoolean(SETTING_SHARED_FILESYSTEM, settings.getAsBoolean(SETTING_SHADOW_REPLICAS, false));
}
}

View file

@ -556,23 +556,18 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
metadataLock.writeLock().lock();
try {
final StoreDirectory dir = directory;
final boolean shadowReplicasInUse = indexSettings.getAsBoolean(IndexMetaData.SETTING_SHADOW_REPLICAS, false);
for (String existingFile : dir.listAll()) {
if (shadowReplicasInUse) {
logger.debug("skipping store cleanup of [{}] because shadow replicas are in use", existingFile);
continue;
}
// don't delete snapshot file, or the checksums file (note, this is extra protection since the Store won't delete checksum)
// we also don't want to deleted IndexWriter's write.lock
// files, since it could be a shared filesystem
if (!sourceMetaData.contains(existingFile) && !Store.isChecksum(existingFile) && !Store.isEngineLock(existingFile)) {
try {
dir.deleteFile(reason, existingFile);
} catch (Exception e) {
// ignore, we don't really care, will get deleted later on
for (String existingFile : dir.listAll()) {
// don't delete snapshot file, or the checksums file (note, this is extra protection since the Store won't delete checksum)
// we also don't want to deleted IndexWriter's write.lock
// files, since it could be a shared filesystem
if (!sourceMetaData.contains(existingFile) && !Store.isChecksum(existingFile) && !Store.isEngineLock(existingFile)) {
try {
dir.deleteFile(reason, existingFile);
} catch (Exception e) {
// ignore, we don't really care, will get deleted later on
}
}
}
}
final Store.MetadataSnapshot metadataOrEmpty = getMetadata();
verifyAfterCleanup(sourceMetaData, metadataOrEmpty);
} finally {

View file

@ -536,7 +536,7 @@ public class IndicesService extends AbstractLifecycleComponent<IndicesService> i
//NOCOMMIT document this
public boolean canDeleteIndexContents(Index index, Settings indexSettings) {
final Tuple<IndexService, Injector> indexServiceInjectorTuple = this.indices.get(index);
if (indexSettings.getAsBoolean(IndexMetaData.SETTING_SHADOW_REPLICAS, false) == false) {
if (IndexMetaData.usesSharedFilesystem(indexSettings) == false) {
if (indexServiceInjectorTuple == null && nodeEnv.hasNodeFile()) {
return true;
}
@ -569,8 +569,7 @@ public class IndicesService extends AbstractLifecycleComponent<IndicesService> i
private boolean canDeleteShardContent(ShardId shardId, @IndexSettings Settings indexSettings, boolean ownsShard) {
final Tuple<IndexService, Injector> indexServiceInjectorTuple = this.indices.get(shardId.getIndex());
// TODO add some protection here to prevent shard deletion if we are on a shard FS or have ShadowReplicas enabled.
if (indexSettings.getAsBoolean(IndexMetaData.SETTING_SHADOW_REPLICAS, ownsShard) == false) {
if (IndexMetaData.usesSharedFilesystem(indexSettings) == false || ownsShard) {
if (indexServiceInjectorTuple != null && nodeEnv.hasNodeFile()) {
final IndexService indexService = indexServiceInjectorTuple.v1();
return indexService.hasShard(shardId.id()) == false;

View file

@ -20,6 +20,7 @@
package org.elasticsearch.indices.recovery;
import org.elasticsearch.ElasticsearchTimeoutException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.logging.ESLogger;

View file

@ -157,7 +157,13 @@ public class RecoveryTarget extends AbstractComponent {
private void doRecovery(final RecoveryStatus recoveryStatus) {
assert recoveryStatus.sourceNode() != null : "can't do a recovery without a source node";
if (IndexMetaData.usesSharedFilesystem(recoveryStatus.indexShard().indexSettings())) {
// NOCOMMIT - this is a super shortcut we need to check if statistics are all on etc.
recoveryStatus.indexShard().performRecoveryPrepareForTranslog();
recoveryStatus.indexShard().performRecoveryFinalization(false, recoveryStatus.state());
onGoingRecoveries.markRecoveryAsDone(recoveryStatus.recoveryId());
return;
}
logger.trace("collecting local files for {}", recoveryStatus);
final Map<String, StoreFileMetaData> existingFiles;
try {
@ -399,6 +405,7 @@ public class RecoveryTarget extends AbstractComponent {
Store.MetadataSnapshot sourceMetaData = request.sourceMetaSnapshot();
try {
IndexMetaData indexMeta = clusterService.state().getMetaData().index(request.shardId().getIndex());
assert IndexMetaData.usesSharedFilesystem(indexMeta.settings()) == false : "[" + indexMeta.getIndex() +"] index uses shared FS - can't recover / clean files";
store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData, indexMeta.settings());
} catch (Exception ex) {
throw new RecoveryFailedException(recoveryStatus.state(), "failed to clean after recovery", ex);

View file

@ -19,6 +19,7 @@
package org.elasticsearch.index;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
@ -32,6 +33,7 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.test.store.MockFSDirectoryService;
import org.junit.Ignore;
import org.junit.Test;
import java.nio.file.Path;
@ -53,6 +55,7 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
@Test
@TestLogging("_root:DEBUG,env:TRACE")
@Repeat(iterations = 10) // NOCOMMIT
public void testIndexWithFewDocuments() throws Exception {
Settings nodeSettings = ImmutableSettings.builder()
.put("node.add_id_to_custom_path", false)
@ -71,6 +74,7 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
.put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString())
.put(IndexMetaData.SETTING_SHADOW_REPLICAS, true)
.put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true)
.build();
prepareCreate(IDX).setSettings(idxSettings).get();
@ -113,6 +117,7 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
@Test
@LuceneTestCase.Slow
@Ignore // NOCOMMIT for now
public void testChaosMonkeyWithShadowReplicas() throws Exception {
final int initialNodeCount = scaledRandomIntBetween(3, 8);