All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kylin.parser.StreamingParser Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.kylin.parser;

import java.lang.reflect.Constructor;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.kylin.common.util.DateFormat;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Maps;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.streaming.metadata.StreamingMessageRow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * By convention:
 * 1. stream parsers should have constructor
 *     - with (List allColumns, Map properties) as params
 *
 */
public abstract class StreamingParser {

    public static final String PROPERTY_TS_COLUMN_NAME = "tsColName";
    public static final String PROPERTY_TS_PARSER = "tsParser";
    public static final String PROPERTY_TS_PATTERN = "tsPattern";
    public static final String PROPERTY_EMBEDDED_SEPARATOR = "separator";
    public static final String PROPERTY_STRICT_CHECK = "strictCheck"; // whether need check each column strictly, default be false (fault tolerant).
    public static final String PROPERTY_TS_TIMEZONE = "tsTimezone";
    protected static final Map defaultProperties = Maps.newHashMap();
    private static final Logger logger = LoggerFactory.getLogger(StreamingParser.class);

    static {
        defaultProperties.put(PROPERTY_TS_COLUMN_NAME, "timestamp");
        defaultProperties.put(PROPERTY_TS_PARSER, "org.apache.kylin.parser.DefaultTimeParser");
        defaultProperties.put(PROPERTY_TS_PATTERN, DateFormat.DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS);
        defaultProperties.put(PROPERTY_EMBEDDED_SEPARATOR, "_");
        defaultProperties.put(PROPERTY_STRICT_CHECK, "false");
        defaultProperties.put(PROPERTY_TS_TIMEZONE, "GMT+0");
    }

    protected List allColumns = Lists.newArrayList();
    protected Map columnMapping = Maps.newHashMap();

    /**
     * Constructor should be override
     */
    public StreamingParser(List allColumns, Map properties) {
        if (allColumns != null) {
            initColumns(allColumns);
        }
    }

    public static StreamingParser getStreamingParser(String parserName, String parserProperties,
            List columns) throws ReflectiveOperationException {
        if (!StringUtils.isEmpty(parserName)) {
            logger.info("Construct StreamingParse {} with properties {}", parserName, parserProperties);
            Class clazz = Class.forName(parserName);
            Map properties = parseProperties(parserProperties);
            Constructor constructor = clazz.getConstructor(List.class, Map.class);
            return (StreamingParser) constructor.newInstance(columns, properties);
        } else {
            throw new IllegalStateException("Invalid StreamingConfig, parserName " + parserName + ", parserProperties "
                    + parserProperties + ".");
        }
    }

    public static Map parseProperties(String propertiesStr) {

        Map result = Maps.newHashMap(defaultProperties);
        if (!StringUtils.isEmpty(propertiesStr)) {
            String[] properties = propertiesStr.split(";");
            for (String prop : properties) {
                String[] parts = prop.split("=");
                if (parts.length == 2) {
                    result.put(parts[0], parts[1]);
                } else {
                    logger.warn("Ignored invalid property expression '{}'.", prop);
                }
            }
        }

        return result;
    }

    private void initColumns(List allColumns) {
        for (TblColRef col : allColumns) {
            if (col.getColumnDesc().isComputedColumn()) {
                continue;
            }
            this.allColumns.add(col);
        }
        logger.info("Streaming Parser columns: {}", this.allColumns);
    }

    /**
     * Flatten stream message: Extract all key value pairs to a map
     *    The key is source_attribute
     *    The value is the value string
     * @param message
     * @return a flat map must not be NULL
     */
    public abstract Map flattenMessage(ByteBuffer message);

    /**
     * @param message
     * @return List must not be NULL
     */
    public abstract List parse(ByteBuffer message);

    /**
     * Set column mapping
     * The column mapping: column_name --> source_attribute
     * @param columnMapping
     */
    public void setColumnMapping(Map columnMapping) {
        this.columnMapping = columnMapping;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy