All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.staros.worker.Worker Maven / Gradle / Ivy

There is a newer version: 3.4-rc2
Show newest version
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.staros.worker;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.staros.proto.AddShardRequest;
import com.staros.proto.RemoveShardRequest;
import com.staros.proto.WorkerInfo;
import com.staros.proto.WorkerState;
import com.staros.starlet.StarletAgent;
import com.staros.starlet.StarletAgentFactory;
import com.staros.util.LockCloseable;
import com.staros.util.Text;
import com.staros.util.Writable;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;

public class Worker implements Writable {
    private static final Logger LOG = LogManager.getLogger(Worker.class);

    private final String serviceId;
    private final long groupId;
    private final long workerId;
    private final String ipPort;
    private long startTime;
    // last healthy heartbeat time, Usually set as System.currentTimeMillis() in ms
    private long lastSeenTime;
    private final AtomicReference state;
    // ImmutableMap, Copy-On-Write when doing updateInfo()
    private Map workerProperties;
    private final ReentrantLock lock;

    // Approximate number of shards reported by starlet client,
    // no need to persistent, will be updated by worker's heartbeat.
    // TODO: what if allows starlet to report shards in separate batches?
    private final AtomicLong numOfShards;
    private final StarletAgent starletAgent;

    public Worker(String serviceId, long groupId, long workerId, String ipPort) {
        this.serviceId = serviceId;
        this.groupId = groupId;
        this.workerId = workerId;
        this.ipPort = ipPort;
        this.startTime = 0;
        this.state = new AtomicReference<>(WorkerState.DOWN);
        this.workerProperties = ImmutableMap.of();
        this.lock = new ReentrantLock();

        this.numOfShards = new AtomicLong(0);
        this.starletAgent = StarletAgentFactory.newStarletAgent();
        this.starletAgent.connectWorker(this);
    }

    public String getServiceId() {
        return serviceId;
    }

    public long getGroupId() {
        return groupId;
    }

    public long getWorkerId() {
        return workerId;
    }

    public String getIpPort() {
        return ipPort;
    }

    public WorkerState getState() {
        return state.get();
    }

    public Map getProperties() {
        return workerProperties;
    }

    public long getNumOfShards() {
        long n = numOfShards.get();
        return n < 0 ? 0 : n;
    }

    public boolean setState(WorkerState state) {
        try (LockCloseable ignored = new LockCloseable(lock)) {
            if (this.state.get() == state) {
                return false;
            }
            this.state.set(state);
            LOG.info("worker {} state set to {}.", workerId, this.state);
            return true;
        }
    }

    public boolean isAlive() {
        return this.state.get() == WorkerState.ON;
    }

    public void updateLastSeenTime(long lastSeenTime) {
        try (LockCloseable ignored = new LockCloseable(lock)) {
            if (lastSeenTime > this.lastSeenTime) {
                this.lastSeenTime = lastSeenTime;
            }
        }
    }

    public long getLastSeenTime() {
        return lastSeenTime;
    }

    /**
     * Update worker internal state. Called by worker heartbeat response.
     *
     * @param sTime       worker start time
     * @param workerProps worker properties
     * @param numOfShards number of shards
     * @return Pair
     */
    public Pair updateInfo(long sTime, Map workerProps, long numOfShards) {
        try (LockCloseable ignored = new LockCloseable(lock)) {
            boolean needPersist = false;
            if (workerProps != null && !workerProperties.equals(workerProps)) {
                LOG.debug("worker {} properties changed, prev:{}, now:{}.", workerId, workerProperties, workerProps);
                workerProperties = ImmutableMap.copyOf(workerProps);
                needPersist = true;
            }

            boolean restarted = false;
            if (sTime == 0 || sTime < startTime) {
                LOG.info("Detect invalid start time of worker {}, reported startTime:{}, latest startTime:{}!",
                        workerId, sTime, startTime);
            } else { // startTime <= sTime
                if (sTime > startTime) {
                    // startTime == 0 means this is the first time to get the worker's heartbeat, don't treat it as restart.
                    if (startTime != 0L) {
                        restarted = true;
                        SimpleDateFormat fmt = new SimpleDateFormat("MM-dd HH:mm:ss.SSS");
                        LOG.info("Detect worker {} start at {}, previous start time: {}.",
                                workerId, fmt.format(new Date(sTime)), fmt.format(new Date(startTime)));
                    }
                    startTime = sTime;
                }
            }
            // update number of shards hosted on the worker
            this.numOfShards.set(numOfShards);
            needPersist = needPersist || restarted;
            return Pair.of(restarted, needPersist);
        }
    }

    /**
     * Called by Worker::fromProtobuf() to restore its internal state.
     *
     * @param startTime    start time
     * @param state        worker state
     * @param workerProps  worker properties
     * @param lastSeenTime worker last seen time
     */
    private void restoreState(long startTime, WorkerState state, Map workerProps, long lastSeenTime) {
        try (LockCloseable ignored = new LockCloseable(lock)) {
            this.startTime = startTime;
            this.state.set(state);
            this.workerProperties = workerProps;
            this.lastSeenTime = lastSeenTime;
        }
    }

    public WorkerInfo toProtobuf() {
        try (LockCloseable ignored = new LockCloseable(lock)) {
            return WorkerInfo.newBuilder()
                    .setServiceId(serviceId)
                    .setGroupId(groupId)
                    .setWorkerId(workerId)
                    .setIpPort(ipPort)
                    .setWorkerState(getState())
                    .putAllWorkerProperties(workerProperties)
                    .setStartTime(startTime)
                    .build();
        }
    }

    public static Worker fromProtobuf(WorkerInfo info) {
        String serviceId = info.getServiceId();
        long groupId = info.getGroupId();
        long workerId = info.getWorkerId();
        String ipPort = info.getIpPort();
        WorkerState state = info.getWorkerState();
        long startTime = info.getStartTime();
        Map workerProps = info.getWorkerPropertiesMap();

        Worker worker = new Worker(serviceId, groupId, workerId, ipPort);
        // use startTime as its initial lastSeenTime
        worker.restoreState(startTime, state, workerProps, startTime);
        return worker;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        byte[] bytes = toProtobuf().toByteArray();
        Text.writeBytes(out, bytes);
    }

    public static Worker read(DataInput in) throws IOException {
        byte[] bytes = Text.readBytes(in);
        WorkerInfo info = WorkerInfo.parseFrom(bytes);
        return Worker.fromProtobuf(info);
    }

    public boolean heartbeat() {
        boolean success = starletAgent.heartbeat();
        if (success) {
            updateLastSeenTime(System.currentTimeMillis());
        }
        return success;
    }

    public boolean match(Worker worker) {
        if (worker == null) {
            return false;
        }
        if (this.workerId == worker.workerId) {
            Preconditions.checkState(this.serviceId.equals(worker.serviceId));
            Preconditions.checkState(this.groupId == worker.groupId);
            return true;
        } else {
            return false;
        }
    }

    public boolean update(Worker worker) {
        if (this.workerId != worker.workerId) {
            return false;
        }
        Preconditions.checkState(this.groupId == worker.groupId);
        Preconditions.checkState(this.serviceId.equals(worker.serviceId));

        try (LockCloseable ignored = new LockCloseable(lock)) {
            this.startTime = worker.startTime;
            this.state.set(worker.state.get());
            this.workerProperties = ImmutableMap.copyOf(worker.workerProperties);
            return true;
        }
    }

    public boolean addShard(AddShardRequest request) {
        if (!isAlive()) {
            return false;
        }
        long before = numOfShards.get();
        boolean success = starletAgent.addShard(request);
        if (success) {
            // increase value only if numOfShards matches the number before RPC call, may be still inaccurate.
            numOfShards.compareAndSet(before, before + request.getShardInfoCount());
        }
        return success;
    }

    public boolean removeShard(RemoveShardRequest request) {
        if (!isAlive()) {
            return false;
        }
        long before = numOfShards.get();
        boolean success = starletAgent.removeShard(request);
        if (success) {
            // decrease value only if numOfShards matches the number before RPC call, may be still inaccurate.
            numOfShards.compareAndSet(before, before - request.getShardIdsCount());
        }
        return success;
    }

    public void decommission() {
        starletAgent.disconnectWorker();
    }

    @Override
    public String toString() {
        return String.format("[Worker] id:%d, group:%d, service:%s", workerId, groupId, serviceId);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy