All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.gateway.local.state.shards.LocalGatewayShardsState Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.gateway.local.state.shards;

import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterStateListener;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.io.stream.CachedStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.*;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.gateway.local.state.meta.LocalGatewayMetaState;
import org.elasticsearch.index.shard.ShardId;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 */
public class LocalGatewayShardsState extends AbstractComponent implements ClusterStateListener {

    private final NodeEnvironment nodeEnv;
    private final LocalGatewayMetaState metaState;

    private volatile Map currentState = Maps.newHashMap();

    @Inject
    public LocalGatewayShardsState(Settings settings, NodeEnvironment nodeEnv, TransportNodesListGatewayStartedShards listGatewayStartedShards, LocalGatewayMetaState metaState) throws Exception {
        super(settings);
        this.nodeEnv = nodeEnv;
        this.metaState = metaState;
        listGatewayStartedShards.initGateway(this);

        if (DiscoveryNode.dataNode(settings)) {
            try {
                pre019Upgrade();
                long start = System.currentTimeMillis();
                currentState = loadShardsStateInfo();
                logger.debug("took {} to load started shards state", TimeValue.timeValueMillis(System.currentTimeMillis() - start));
            } catch (Exception e) {
                logger.error("failed to read local state (started shards), exiting...", e);
                throw e;
            }
        }
    }

    public Map currentStartedShards() {
        return this.currentState;
    }

    public ShardStateInfo loadShardInfo(ShardId shardId) throws Exception {
        return loadShardStateInfo(shardId);
    }

    @Override
    public void clusterChanged(ClusterChangedEvent event) {
        if (event.state().blocks().disableStatePersistence()) {
            return;
        }

        if (!event.state().nodes().localNode().dataNode()) {
            return;
        }

        if (!event.routingTableChanged()) {
            return;
        }

        Map newState = Maps.newHashMap();
        newState.putAll(this.currentState);


        // remove from the current state all the shards that are completely started somewhere, we won't need them anymore
        // and if they are still here, we will add them in the next phase
        // Also note, this works well when closing an index, since a closed index will have no routing shards entries
        // so they won't get removed (we want to keep the fact that those shards are allocated on this node if needed)
        for (IndexRoutingTable indexRoutingTable : event.state().routingTable()) {
            for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
                if (indexShardRoutingTable.countWithState(ShardRoutingState.STARTED) == indexShardRoutingTable.size()) {
                    newState.remove(indexShardRoutingTable.shardId());
                }
            }
        }
        // remove deleted indices from the started shards
        for (ShardId shardId : currentState.keySet()) {
            if (!event.state().metaData().hasIndex(shardId.index().name())) {
                newState.remove(shardId);
            }
        }
        // now, add all the ones that are active and on this node
        RoutingNode routingNode = event.state().readOnlyRoutingNodes().node(event.state().nodes().localNodeId());
        if (routingNode != null) {
            // our node is not in play yet...
            for (MutableShardRouting shardRouting : routingNode) {
                if (shardRouting.active()) {
                    newState.put(shardRouting.shardId(), new ShardStateInfo(shardRouting.version(), shardRouting.primary()));
                }
            }
        }

        // go over the write started shards if needed
        for (Iterator> it = newState.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry entry = it.next();
            ShardId shardId = entry.getKey();
            ShardStateInfo shardStateInfo = entry.getValue();

            String writeReason = null;
            ShardStateInfo currentShardStateInfo = currentState.get(shardId);
            if (currentShardStateInfo == null) {
                writeReason = "freshly started, version [" + shardStateInfo.version + "]";
            } else if (currentShardStateInfo.version != shardStateInfo.version) {
                writeReason = "version changed from [" + currentShardStateInfo.version + "] to [" + shardStateInfo.version + "]";
            }

            // we update the write reason if we really need to write a new one...
            if (writeReason == null) {
                continue;
            }

            try {
                writeShardState(writeReason, shardId, shardStateInfo, currentShardStateInfo);
            } catch (Exception e) {
                // we failed to write the shard state, remove it from our builder, we will try and write
                // it next time...
                it.remove();
            }
        }

        // REMOVED: don't delete shard state, rely on IndicesStore to delete the shard location
        //          only once all shards are allocated on another node
        // now, go over the current ones and delete ones that are not in the new one
//        for (Map.Entry entry : currentState.entrySet()) {
//            ShardId shardId = entry.getKey();
//            if (!newState.containsKey(shardId)) {
//                if (!metaState.isDangling(shardId.index().name())) {
//                    deleteShardState(shardId);
//                }
//            }
//        }

        this.currentState = newState;
    }

    private Map loadShardsStateInfo() throws Exception {
        Set shardIds = nodeEnv.findAllShardIds();
        long highestVersion = -1;
        Map shardsState = Maps.newHashMap();
        for (ShardId shardId : shardIds) {
            ShardStateInfo shardStateInfo = loadShardStateInfo(shardId);
            if (shardStateInfo == null) {
                continue;
            }
            shardsState.put(shardId, shardStateInfo);

            // update the global version
            if (shardStateInfo.version > highestVersion) {
                highestVersion = shardStateInfo.version;
            }
        }
        return shardsState;
    }

    private ShardStateInfo loadShardStateInfo(ShardId shardId) {
        long highestShardVersion = -1;
        ShardStateInfo highestShardState = null;
        for (File shardLocation : nodeEnv.shardLocations(shardId)) {
            File shardStateDir = new File(shardLocation, "_state");
            if (!shardStateDir.exists() || !shardStateDir.isDirectory()) {
                continue;
            }
            // now, iterate over the current versions, and find latest one
            File[] stateFiles = shardStateDir.listFiles();
            if (stateFiles == null) {
                continue;
            }
            for (File stateFile : stateFiles) {
                if (!stateFile.getName().startsWith("state-")) {
                    continue;
                }
                try {
                    long version = Long.parseLong(stateFile.getName().substring("state-".length()));
                    if (version > highestShardVersion) {
                        byte[] data = Streams.copyToByteArray(new FileInputStream(stateFile));
                        if (data.length == 0) {
                            logger.debug("[{}][{}]: not data for [" + stateFile.getAbsolutePath() + "], ignoring...", shardId.index().name(), shardId.id());
                            continue;
                        }
                        ShardStateInfo readState = readShardState(data);
                        if (readState == null) {
                            logger.debug("[{}][{}]: not data for [" + stateFile.getAbsolutePath() + "], ignoring...", shardId.index().name(), shardId.id());
                            continue;
                        }
                        assert readState.version == version;
                        highestShardState = readState;
                        highestShardVersion = version;
                    }
                } catch (Exception e) {
                    logger.debug("[{}][{}]: failed to read [" + stateFile.getAbsolutePath() + "], ignoring...", e, shardId.index().name(), shardId.id());
                }
            }
        }
        return highestShardState;
    }

    @Nullable
    private ShardStateInfo readShardState(byte[] data) throws Exception {
        XContentParser parser = null;
        try {
            parser = XContentHelper.createParser(data, 0, data.length);
            XContentParser.Token token = parser.nextToken();
            if (token == null) {
                return null;
            }
            long version = -1;
            Boolean primary = null;
            String currentFieldName = null;
            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                if (token == XContentParser.Token.FIELD_NAME) {
                    currentFieldName = parser.currentName();
                } else if (token.isValue()) {
                    if ("version".equals(currentFieldName)) {
                        version = parser.longValue();
                    } else if ("primary".equals(currentFieldName)) {
                        primary = parser.booleanValue();
                    }
                }
            }
            return new ShardStateInfo(version, primary);
        } finally {
            if (parser != null) {
                parser.close();
            }
        }
    }

    private void writeShardState(String reason, ShardId shardId, ShardStateInfo shardStateInfo, @Nullable ShardStateInfo previousStateInfo) throws Exception {
        logger.trace("[{}][{}] writing shard state, reason [{}]", shardId.index().name(), shardId.id(), reason);
        CachedStreamOutput.Entry cachedEntry = CachedStreamOutput.popEntry();
        try {
            XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON, cachedEntry.bytes());
            builder.prettyPrint();
            builder.startObject();
            builder.field("version", shardStateInfo.version);
            if (shardStateInfo.primary != null) {
                builder.field("primary", shardStateInfo.primary);
            }
            builder.endObject();
            builder.flush();

            Exception lastFailure = null;
            boolean wroteAtLeastOnce = false;
            for (File shardLocation : nodeEnv.shardLocations(shardId)) {
                File shardStateDir = new File(shardLocation, "_state");
                FileSystemUtils.mkdirs(shardStateDir);
                File stateFile = new File(shardStateDir, "state-" + shardStateInfo.version);


                FileOutputStream fos = null;
                try {
                    fos = new FileOutputStream(stateFile);
                    BytesReference bytes = cachedEntry.bytes().bytes();
                    fos.write(bytes.array(), bytes.arrayOffset(), bytes.length());
                    fos.getChannel().force(true);
                    Closeables.closeQuietly(fos);
                    wroteAtLeastOnce = true;
                } catch (Exception e) {
                    lastFailure = e;
                } finally {
                    Closeables.closeQuietly(fos);
                }
            }

            if (!wroteAtLeastOnce) {
                logger.warn("[{}][{}]: failed to write shard state", shardId.index().name(), shardId.id(), lastFailure);
                throw new IOException("failed to write shard state for " + shardId, lastFailure);
            }

            // delete the old files
            if (previousStateInfo != null && previousStateInfo.version != shardStateInfo.version) {
                for (File shardLocation : nodeEnv.shardLocations(shardId)) {
                    File stateFile = new File(new File(shardLocation, "_state"), "state-" + previousStateInfo.version);
                    stateFile.delete();
                }
            }
        } finally {
            CachedStreamOutput.pushEntry(cachedEntry);
        }
    }

    private void deleteShardState(ShardId shardId) {
        logger.trace("[{}][{}] delete shard state", shardId.index().name(), shardId.id());
        File[] shardLocations = nodeEnv.shardLocations(shardId);
        for (File shardLocation : shardLocations) {
            if (!shardLocation.exists()) {
                continue;
            }
            FileSystemUtils.deleteRecursively(new File(shardLocation, "_state"));
        }
    }

    private void pre019Upgrade() throws Exception {
        long index = -1;
        File latest = null;
        for (File dataLocation : nodeEnv.nodeDataLocations()) {
            File stateLocation = new File(dataLocation, "_state");
            if (!stateLocation.exists()) {
                continue;
            }
            File[] stateFiles = stateLocation.listFiles();
            if (stateFiles == null) {
                continue;
            }
            for (File stateFile : stateFiles) {
                if (logger.isTraceEnabled()) {
                    logger.trace("[find_latest_state]: processing [" + stateFile.getName() + "]");
                }
                String name = stateFile.getName();
                if (!name.startsWith("shards-")) {
                    continue;
                }
                long fileIndex = Long.parseLong(name.substring(name.indexOf('-') + 1));
                if (fileIndex >= index) {
                    // try and read the meta data
                    try {
                        byte[] data = Streams.copyToByteArray(new FileInputStream(stateFile));
                        if (data.length == 0) {
                            logger.debug("[upgrade]: not data for [" + name + "], ignoring...");
                        }
                        pre09ReadState(data);
                        index = fileIndex;
                        latest = stateFile;
                    } catch (IOException e) {
                        logger.warn("[upgrade]: failed to read state from [" + name + "], ignoring...", e);
                    }
                }
            }
        }
        if (latest == null) {
            return;
        }

        logger.info("found old shards state, loading started shards from [{}] and converting to new shards state locations...", latest.getAbsolutePath());
        Map shardsState = pre09ReadState(Streams.copyToByteArray(new FileInputStream(latest)));

        for (Map.Entry entry : shardsState.entrySet()) {
            writeShardState("upgrade", entry.getKey(), entry.getValue(), null);
        }

        // rename shards state to backup state
        File backupFile = new File(latest.getParentFile(), "backup-" + latest.getName());
        if (!latest.renameTo(backupFile)) {
            throw new IOException("failed to rename old state to backup state [" + latest.getAbsolutePath() + "]");
        }

        // delete all other shards state files
        for (File dataLocation : nodeEnv.nodeDataLocations()) {
            File stateLocation = new File(dataLocation, "_state");
            if (!stateLocation.exists()) {
                continue;
            }
            File[] stateFiles = stateLocation.listFiles();
            if (stateFiles == null) {
                continue;
            }
            for (File stateFile : stateFiles) {
                String name = stateFile.getName();
                if (!name.startsWith("shards-")) {
                    continue;
                }
                stateFile.delete();
            }
        }

        logger.info("conversion to new shards state location and format done, backup create at [{}]", backupFile.getAbsolutePath());
    }

    private Map pre09ReadState(byte[] data) throws IOException {
        XContentParser parser = null;
        try {
            Map shardsState = Maps.newHashMap();

            parser = XContentHelper.createParser(data, 0, data.length);

            String currentFieldName = null;
            XContentParser.Token token = parser.nextToken();
            if (token == null) {
                // no data...
                return shardsState;
            }
            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                if (token == XContentParser.Token.FIELD_NAME) {
                    currentFieldName = parser.currentName();
                } else if (token == XContentParser.Token.START_ARRAY) {
                    if ("shards".equals(currentFieldName)) {
                        while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                            if (token == XContentParser.Token.START_OBJECT) {
                                String shardIndex = null;
                                int shardId = -1;
                                long version = -1;
                                while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                                    if (token == XContentParser.Token.FIELD_NAME) {
                                        currentFieldName = parser.currentName();
                                    } else if (token.isValue()) {
                                        if ("index".equals(currentFieldName)) {
                                            shardIndex = parser.text();
                                        } else if ("id".equals(currentFieldName)) {
                                            shardId = parser.intValue();
                                        } else if ("version".equals(currentFieldName)) {
                                            version = parser.longValue();
                                        }
                                    }
                                }
                                shardsState.put(new ShardId(shardIndex, shardId), new ShardStateInfo(version, null));
                            }
                        }
                    }
                }
            }
            return shardsState;
        } finally {
            if (parser != null) {
                parser.close();
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy