All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.prepare.util.SharedMethods Maven / Gradle / Ivy

package com.datastax.data.prepare.util;

import org.apache.parquet.Strings;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

//todo andy 名称
public class SharedMethods {
    private static final Logger logger = LoggerFactory.getLogger(SharedMethods.class);

//    过滤属性
    public static  StructField[] attributeFilter(Dataset data, final String attributeSelector, final boolean invertSelection, final String attribute,
                                                    final String regularExpression, final String valueType) {
        StructField[] fields = data.schema().fields();
        boolean flag = true;
        if(Consts.ATTRIBUTE_NAME.equals(attributeSelector)) {
            if(attribute != null && attribute.length() != 0) {
                String[] temp = handleColsWithEmpty(attribute.split(Consts.DELIMITER));
                if(temp.length == 0) {
                    logger.info("Attribute Name 的属性为空");
                    return null;
                }
                fields = getSelectedField(fields, temp, invertSelection, 1);
                flag = false;
            }else {
                fields = null;
            }
        }
        if(Consts.REGULAR_EXPRESSION.equals( attributeSelector)) {
            if(regularExpression != null && !"".equals(regularExpression)) {
                Pattern pattern = Pattern.compile(regularExpression);
                StructField[] temp = new StructField[fields != null ? fields.length : 0];
                for(int i = 0, position = 0; i < (fields != null ? fields.length : 0); i++) {
                    if(invertSelection != pattern.matcher(fields[i].name()).matches()) {
                        temp[position++] = fields[i];
                    }
                }
                fields = temp;
            }else {
                fields = null;
            }
            flag = false;
        }
        if(Consts.VALUE_TYPE.equals(attributeSelector)) {
            DataType[] types = null;
            if(Consts.NUMERIC.equals(valueType)) {
                types = new DataType[]{DataTypes.IntegerType, DataTypes.ByteType, DataTypes.DoubleType,
                        DataTypes.FloatType, DataTypes.LongType, DataTypes.ShortType};
            }
            if(Consts.INTEGER.equals(valueType)) {
                types = new DataType[]{DataTypes.ShortType, DataTypes.IntegerType, DataTypes.ByteType, DataTypes.LongType};
            }
            if(Consts.DATE.equals(valueType)) {
                types = new DataType[]{DataTypes.DateType};
            }
            if(Consts.TIMESTAMP.equals(valueType)) {
                types = new DataType[]{DataTypes.TimestampType};
            }
            if(types == null) {
                types = new DataType[]{DataTypes.StringType, DataTypes.CalendarIntervalType, DataTypes.BinaryType};
            }
            fields = getSelectedField(fields, types, invertSelection, 2);
            flag = false;
        }
        if((flag & invertSelection) || fields == null) {
            logger.info("None of Attributes is selected");
            return null;
        }
        return dropSuffixEmpty(fields);
    }

    private static StructField[] dropSuffixEmpty(StructField[] fields) {
        int position = fields.length-1;
        while(fields[position] == null) { position--; }
        StructField[] temp = new StructField[position+1];
        System.arraycopy(fields, 0, temp, 0, position+1);
        return temp;
    }

    private static StructField[] getSelectedField(StructField[] fields, Object[] temp, boolean invertSelection, int type) {
        StructField[] result = new StructField[fields.length];
        int[] sign = new int[fields.length];
        boolean flag;
        for(int i=0; i fileList, String... suffixs) {
        if(!file.exists()) {
            logger.info("file does not exists");
            return ;
        }
        if(file.isDirectory()) {
            File[] files = file.listFiles();
            if(files == null) {
                return ;
            }
            for(File temp : files) {
                if(file.isDirectory()) {
                    filesFilter(temp, fileList, suffixs);
                }else {
                    if(accept(file.getName(), suffixs)) { fileList.add(file); }
                }
            }
        }else {
            if(accept(file.getName(), suffixs)) { fileList.add(file); }
        }
    }

    private static boolean accept(String fileName, String[] suffixs) {
        if(suffixs.length == 0) {
            return true;
        }
        boolean flag = false;
        for(String suffix : suffixs) {
            if(!Strings.isNullOrEmpty(suffix)) {
                if(fileName.endsWith(suffix)) {
                    flag = true; break;
                }
            }
        }
        return flag;
    }

    //todo andy 无用
    public static boolean checkColumnFormat(Row[] rows, String regex) {
        boolean flag = true;
        for(Row row : rows) {
            if(!Strings.isNullOrEmpty(row.getString(0)) && !row.getString(0).matches(regex)) {
                flag = false;
                break;
            }
        }
        return flag;
    }


    public static String[] handleColsWithEmpty(String[] cols) {
        List list = new ArrayList<>();
        int i = 0;
        for(String s : cols) {
            String t = s.trim();
            if(!t.isEmpty()) {
                i++;
                list.add(t);
            }
        }
        return list.toArray(new String[i]);
    }


    public static void recordSchema(StructField[] fields, Map map) {
        for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy