All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com._4paradigm.openmldb.synctool.DataParser Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021 4Paradigm
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com._4paradigm.openmldb.synctool;

import com._4paradigm.openmldb.proto.Common.ColumnDesc;

import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;

import com._4paradigm.openmldb.common.codec.RowView;

import io.netty.buffer.ByteBuf;
import org.apache.commons.lang3.StringEscapeUtils;
import com.google.common.base.Preconditions;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class DataParser {
    private final ByteBuf data;
    private final long count;
    private final RowView rowView;

    public DataParser(ByteBuf data, final long count, List schema) throws Exception {
        this.data = data;
        this.count = count;
        this.rowView = new RowView(schema);
    }

    // spark-readable format, escape string, null is null
    public static String rowToString(RowView rowView) throws Exception {
        StringBuilder sb = new StringBuilder();
        List schema = rowView.getSchema();
        for (int i = 0; i < schema.size(); i++) {
            if (i != 0) {
                sb.append(",");
            }
            ColumnDesc col = schema.get(i);
            Object value = rowView.getValue(i, col.getDataType());
            String appendStr = value == null ? "null" : value.toString();
            if (value instanceof String) {
                // escape for string
                appendStr = String.format("\"%s\"", StringEscapeUtils.escapeJson((String) value));
            }
            // bool, date and timestamp string can be read by spark
            // bool: true, date: 2023-04-11, timestamp: 2001-09-09T09:46:39.999+08:00
            sb.append(appendStr);
        }
        return sb.toString();
    }

    public void writeAll(BufferedWriter bufferedWriter) {
        try {
            long readCount = 0;
            while (readCount < count) {
                int start = data.readerIndex();
                data.readerIndex(start + 2); // skip 2 bytes
                int rowSize = data.readIntLE();
                log.debug("read row size: {}", rowSize);
                // netty is big endian, but we use little endian
                ByteBuffer row = data.nioBuffer(start, rowSize).order(ByteOrder.LITTLE_ENDIAN);
                Preconditions.checkState(rowView.reset(row, rowSize), "reset row view failed");
                data.readerIndex(start + rowSize);
                bufferedWriter.write(rowToString(rowView));
                bufferedWriter.write("\n");
                readCount++;
            }
            Preconditions.checkState(readCount == count,
                    String.format("read count not match, read: %d, expect: %d", readCount, count));
        } catch (IOException e) {
            throw new RuntimeException("data write failed", e);
        } catch (Exception e) {
            throw new RuntimeException("data parse failed", e);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy