All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pulsar.io.alluxio.sink.AlluxioSink Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.io.alluxio.sink;

import alluxio.AlluxioURI;
import alluxio.client.WriteType;
import alluxio.client.file.FileOutStream;
import alluxio.client.file.FileSystem;
import alluxio.conf.InstancedConfiguration;
import alluxio.conf.PropertyKey;
import alluxio.exception.AlluxioException;
import alluxio.grpc.CreateFilePOptions;
import alluxio.grpc.WritePType;
import alluxio.util.FileSystemOptions;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import lombok.extern.slf4j.Slf4j;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.api.schema.GenericObject;
import org.apache.pulsar.client.api.schema.GenericRecord;
import org.apache.pulsar.client.api.schema.KeyValueSchema;
import org.apache.pulsar.common.schema.SchemaType;
import org.apache.pulsar.functions.api.Record;
import org.apache.pulsar.io.core.KeyValue;
import org.apache.pulsar.io.core.Sink;
import org.apache.pulsar.io.core.SinkContext;
import org.apache.pulsar.io.core.annotations.Connector;
import org.apache.pulsar.io.core.annotations.IOType;

/**
 * Alluxio sink that treats incoming messages on the input topic as Strings
 * and write identical key/value pairs.
 */
@Connector(
        name = "alluxio",
        type = IOType.SINK,
        help = "The sink connector is used for moving records from Pulsar to Alluxio.",
        configClass = AlluxioSinkConfig.class)
@Slf4j
public class AlluxioSink implements Sink {

    private FileSystem fileSystem;
    private FileOutStream fileOutStream;

    private long recordsNum;
    private String tmpFilePath;
    private String fileDirPath;
    private String tmpFileDirPath;
    private long lastRotationTime;
    private long rotationRecordsNum;
    private long rotationInterval;
    private AlluxioSinkConfig alluxioSinkConfig;
    private AlluxioState alluxioState;

    private InstancedConfiguration configuration = InstancedConfiguration.defaults();

    private ObjectMapper objectMapper = new ObjectMapper();

    private List> recordsToAck;

    @Override
    public void open(Map config, SinkContext sinkContext) throws Exception {
        alluxioSinkConfig = AlluxioSinkConfig.load(config);
        alluxioSinkConfig.validate();

        // initialize FileSystem
        String alluxioMasterHost = alluxioSinkConfig.getAlluxioMasterHost();
        int alluxioMasterPort = alluxioSinkConfig.getAlluxioMasterPort();
        configuration.set(PropertyKey.MASTER_HOSTNAME, alluxioMasterHost);
        configuration.set(PropertyKey.MASTER_RPC_PORT, alluxioMasterPort);
        if (alluxioSinkConfig.getSecurityLoginUser() != null) {
            configuration.set(PropertyKey.SECURITY_LOGIN_USERNAME, alluxioSinkConfig.getSecurityLoginUser());
        }
        fileSystem = FileSystem.Factory.create(configuration);

        // initialize alluxio dirs
        String alluxioDir = alluxioSinkConfig.getAlluxioDir();
        fileDirPath = alluxioDir.startsWith("/") ? alluxioDir : "/" + alluxioDir;
        tmpFileDirPath = fileDirPath + "/tmp";

        AlluxioURI alluxioDirPath = new AlluxioURI(fileDirPath);
        if (!fileSystem.exists(alluxioDirPath)) {
            fileSystem.createDirectory(alluxioDirPath);
        }

        AlluxioURI tmpAlluxioDirPath = new AlluxioURI(tmpFileDirPath);
        if (!fileSystem.exists(tmpAlluxioDirPath)) {
            fileSystem.createDirectory(tmpAlluxioDirPath);
        }

        recordsNum = 0;
        recordsToAck = Lists.newArrayList();
        tmpFilePath = "";
        alluxioState = AlluxioState.WRITE_STARTED;

        lastRotationTime = System.currentTimeMillis();
        rotationRecordsNum = alluxioSinkConfig.getRotationRecords();
        rotationInterval =  alluxioSinkConfig.getRotationInterval();
    }

    @SuppressWarnings("checkstyle:fallthrough")
    @Override
    public void write(Record record) {
        long now = System.currentTimeMillis();

        switch (alluxioState) {
            case WRITE_STARTED:
                try {
                    writeToAlluxio(record);
                    if (!shouldRotate(now)) {
                        break;
                    }
                    alluxioState = AlluxioState.FILE_ROTATED;
                } catch (AlluxioException | IOException e) {
                    log.error("Unable to write record to alluxio.", e);
                    record.fail();
                    break;
                }
            case FILE_ROTATED:
                try {
                    closeAndCommitTmpFile();
                    alluxioState = AlluxioState.FILE_COMMITTED;
                    ackRecords();
                } catch (AlluxioException | IOException e) {
                    log.error("Unable to flush records to alluxio.", e);
                    failRecords();
                    try {
                        deleteTmpFile();
                    } catch (AlluxioException | IOException e1) {
                        log.error("Failed to delete tmp cache file.", e);
                    }
                    break;
                }
            case FILE_COMMITTED:
                alluxioState = AlluxioState.WRITE_STARTED;
                break;
            default:
                log.error("{} is not a valid state when writing record to alluxio temp dir {}.",
                    alluxioState, tmpFileDirPath);
                break;
        }
    }

    @Override
    public void close() throws Exception {
        // flush records in the tmpFile when closing sink
        try {
            closeAndCommitTmpFile();
            ackRecords();
        } catch (AlluxioException | IOException e) {
            log.error("Unable to flush records to alluxio.", e);
            failRecords();
        }
        deleteTmpFile();
    }

    private void ackRecords() {
        recordsToAck.forEach(Record::ack);
        recordsToAck.clear();
    }

    private void failRecords() {
        recordsToAck.forEach(Record::fail);
        recordsToAck.clear();
    }

    private void writeToAlluxio(Record record) throws AlluxioException, IOException {
        KeyValue keyValue = extractKeyValue(record);
        if (fileOutStream == null) {
            createTmpFile();
        }
        fileOutStream.write(toBytes(keyValue.getValue()));
        if (alluxioSinkConfig.getLineSeparator() != '\u0000') {
            fileOutStream.write(alluxioSinkConfig.getLineSeparator());
        }
        recordsNum++;
        recordsToAck.add(record);
    }

    private void createTmpFile() throws AlluxioException, IOException {
        CreateFilePOptions.Builder optionsBuilder =
                FileSystemOptions.createFileDefaults(configuration).toBuilder();
        UUID id = UUID.randomUUID();
        String fileExtension = alluxioSinkConfig.getFileExtension();
        tmpFilePath = tmpFileDirPath + "/" + id.toString() + "_tmp" + fileExtension;
        if (alluxioSinkConfig.getWriteType() != null) {
            WritePType writePType;
            try {
                writePType = WritePType.valueOf(alluxioSinkConfig.getWriteType().toUpperCase());
            } catch (IllegalArgumentException e) {
                throw new IllegalArgumentException("Illegal write type when creating Alluxio files, valid values are: "
                    + Arrays.asList(WriteType.values()));
            }
            optionsBuilder.setWriteType(writePType);
        }
        fileOutStream = fileSystem.createFile(new AlluxioURI(tmpFilePath), optionsBuilder.build());
    }

    private void closeAndCommitTmpFile() throws AlluxioException, IOException {
        // close the tmpFile
        if (fileOutStream != null) {
            fileOutStream.close();
        }
        // commit the tmpFile
        String filePrefix = alluxioSinkConfig.getFilePrefix();
        String fileExtension = alluxioSinkConfig.getFileExtension();
        String newFile = filePrefix + "-" + System.currentTimeMillis() + fileExtension;
        String newFilePath = fileDirPath + "/" + newFile;
        fileSystem.rename(new AlluxioURI(tmpFilePath), new AlluxioURI(newFilePath));
        fileOutStream = null;
        tmpFilePath = "";
        recordsNum = 0;
        lastRotationTime = System.currentTimeMillis();
    }

    private void deleteTmpFile() throws AlluxioException, IOException {
        if (!tmpFilePath.equals("")) {
            fileSystem.delete(new AlluxioURI(tmpFilePath));
        }
    }

    private boolean shouldRotate(long now) {
        boolean rotated = false;
        if (recordsNum >= rotationRecordsNum) {
            rotated = true;
        } else {
            if (rotationInterval != -1 && (now - lastRotationTime) >= rotationInterval) {
                rotated = true;
            }
        }
        return rotated;
    }

    private static byte[] toByteArray(Object obj) throws IOException {
        byte[] bytes = null;
        try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
             ObjectOutputStream oos = new ObjectOutputStream(baos)) {
            oos.writeObject(obj);
            oos.flush();
            bytes = baos.toByteArray();
        } catch (IOException e) {
            log.error("Failed to serialize the object.", e);
            throw e;
        }
        return bytes;
    }

    private static byte[] toBytes(Object obj) throws IOException {
        byte[] bytes;
        if (obj instanceof String) {
            String s = (String) obj;
            bytes = s.getBytes(StandardCharsets.UTF_8);
        } else if (obj instanceof byte[]) {
            bytes = (byte[]) obj;
        } else {
            bytes = toByteArray(obj);
        }
        return bytes;
    }

    public KeyValue extractKeyValue(Record record) throws JsonProcessingException {
        // just ignore the key
        if (alluxioSinkConfig.isSchemaEnable()) {
            GenericObject recordValue = null;
            Schema valueSchema = null;
            if (record.getSchema() != null && record.getSchema() instanceof KeyValueSchema) {
                KeyValueSchema keyValueSchema = (KeyValueSchema) record.getSchema();
                valueSchema = keyValueSchema.getValueSchema();
                org.apache.pulsar.common.schema.KeyValue keyValue =
                        (org.apache.pulsar.common.schema.KeyValue)
                                record.getValue().getNativeObject();
                recordValue = keyValue.getValue();
            } else {
                valueSchema = record.getSchema();
                recordValue = record.getValue();
            }

            String value = null;
            if (recordValue != null) {
                if (valueSchema != null) {
                    value = stringifyValue(valueSchema, recordValue);
                } else {
                    if (recordValue.getNativeObject() instanceof byte[]) {
                        value = new String((byte[]) recordValue.getNativeObject(), StandardCharsets.UTF_8);
                    } else {
                        value = recordValue.getNativeObject().toString();
                    }
                }
            }
            return new KeyValue<>(null, value);
        } else {
            return new KeyValue<>(null, new String(record.getMessage()
                    .orElseThrow(() -> new IllegalArgumentException("Record does not carry message information"))
                    .getData(), StandardCharsets.UTF_8));
        }
    }

    public String stringifyValue(Schema schema, Object val) throws JsonProcessingException {
        // just support json schema
        if (schema.getSchemaInfo().getType() == SchemaType.JSON) {
            JsonNode jsonNode = (JsonNode) ((GenericRecord) val).getNativeObject();
            return objectMapper.writeValueAsString(jsonNode);
        }
        throw new UnsupportedOperationException("Unsupported value schemaType=" + schema.getSchemaInfo().getType());
    }

    private enum AlluxioState {
        WRITE_STARTED,
        FILE_ROTATED,
        FILE_COMMITTED
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy