All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qiniu.datasource.TextContainer Maven / Gradle / Ivy

There is a newer version: 8.4.8
Show newest version
package com.qiniu.datasource;

import com.google.gson.JsonObject;
import com.qiniu.common.QiniuException;
import com.qiniu.interfaces.*;
import com.qiniu.util.*;

import java.io.IOException;
import java.time.LocalDateTime;
import java.util.*;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public abstract class TextContainer extends DatasourceActor implements IDataSource {

    protected String path;
    protected String parse;
    protected String separator;
    protected String addKeyPrefix;
    protected String rmKeyPrefix;
    protected List antiPrefixes;
    protected boolean hasAntiPrefixes = false;
    protected Map> urisMap;
    protected List uris;
    protected ILineProcess processor; // 定义的资源处理器

    public TextContainer(String path, String parse, String separator, Map> urisMap,
                         List antiPrefixes, String addKeyPrefix, String rmKeyPrefix, Map indexMap,
                         List fields, int unitLen, int threads) throws IOException {
        super(unitLen, threads);
        this.path = path;
        this.parse = parse;
        this.separator = separator;
        this.addKeyPrefix = addKeyPrefix;
        this.rmKeyPrefix = rmKeyPrefix;
        setAntiPrefixes(antiPrefixes);
        setUrisAndMap(urisMap);
        setIndexMapWithDefault(indexMap);
        if (fields != null && fields.size() > 0) this.fields = fields;
        else this.fields = ConvertingUtils.getOrderedFields(this.indexMap, null);
        // default save parameters,默认全记录保存
        setSaveOptions(true, "result", "tab", "\t", null);
    }

    private void setAntiPrefixes(List antiPrefixes) {
        if (antiPrefixes != null && antiPrefixes.size() > 0) {
            hasAntiPrefixes = true;
            this.antiPrefixes = antiPrefixes.stream().sorted().collect(Collectors.toList());
            int size = this.antiPrefixes.size();
            Iterator iterator = this.antiPrefixes.iterator();
            String temp = iterator.next();
            while (iterator.hasNext() && size > 0) {
                size--;
                String prefix = iterator.next();
                if (prefix.startsWith(temp)) iterator.remove();
                else temp = prefix;
            }
        }
    }

    private void setUrisAndMap(Map> urisMap) throws IOException {
        if (urisMap == null || urisMap.size() <= 0) {
            this.urisMap = new HashMap<>();
        } else {
            if (urisMap.containsKey(null)) throw new IOException("uris map can not contains null.");
            this.urisMap = new HashMap<>(threads);
            this.urisMap.putAll(urisMap);
            int size = this.urisMap.size();
            uris = new ArrayList<>();
            Iterator iterator = this.urisMap.keySet().stream().sorted().collect(Collectors.toList()).iterator();
            while (iterator.hasNext() && size > 0) {
                size--;
                String uri = iterator.next();
                if (uri == null || uri.equals("")) {
                    throw new IOException("uris can not contain empty item.");
                } else {
                    uris.add(uri.split("-\\|\\|-")[0]);
                }
            }
        }
    }

    private void setIndexMapWithDefault(Map indexMap) throws IOException {
        if (indexMap == null || indexMap.size() == 0) {
            if (this.indexMap == null) this.indexMap = new HashMap<>();
            for (String fileInfoField : ConvertingUtils.defaultFileFields) {
                this.indexMap.put(fileInfoField, fileInfoField);
            }
        } else {
            for (String s : indexMap.keySet()) {
                if (s == null || "".equals(s)) throw new IOException("the index can not be empty in " + indexMap);
            }
            this.indexMap = indexMap;
        }
    }

    public void setProcessor(ILineProcess processor) {
        this.processor = processor;
    }

    protected abstract ITypeConvert getNewConverter() throws IOException;

    protected abstract ITypeConvert getNewStringConverter() throws IOException;

    boolean checkPrefix(String name) {
        for (String antiPrefix : antiPrefixes) {
            if (name.startsWith(antiPrefix)) return false;
        }
        return true;
    }

    void recordListerByUri(String prefix) {
        Map map = urisMap.get(prefix.split("-\\|\\|-")[0]);
        String record = map == null ? "{}" : JsonUtils.toJsonObject(map).toString();
        recordLister(prefix, record);
    }

    public void export(ITextReader reader, IResultOutput saver, ILineProcess processor) throws Exception {
        ITypeConvert converter = getNewConverter();
        ITypeConvert stringConverter = null;
        if (saveTotal) {
            stringConverter = getNewStringConverter();
            saver.preAddWriter("failed");
        }
        String lastLine = reader.currentEndLine();
        List srcList = null;
        List convertedList;
        List writeList;
        int retry;
        Map map = urisMap.get(reader.getName());
        JsonObject json = map != null ? JsonUtils.toJsonObject(map) : (lastLine != null ? new JsonObject() : null);
        while (lastLine != null) {
            if (stopped) break;
            if (LocalDateTime.now(DatetimeUtils.clock_Default).isAfter(pauseDateTime)) {
                synchronized (object) {
                    object.wait();
                }
            }
            retry = retryTimes + 1;
            while (retry > 0) {
                try {
                    srcList = reader.readLines();
                    retry = 0;
                } catch (IOException e) {
                    retry--;
                    if (retry == 0) throw e;
                }
            }
            convertedList = converter.convertToVList(srcList);
            if (converter.errorSize() > 0) saver.writeError(converter.errorLines(), false);
            if (stringConverter != null) {
                writeList = stringConverter.convertToVList(convertedList);
                if (writeList.size() > 0) saver.writeSuccess(String.join("\n", writeList), false);
                if (stringConverter.errorSize() > 0)
                    saver.writeToKey("failed", stringConverter.errorLines(), false);
            }
            // 如果抛出异常需要检测下异常是否是可继续的异常,如果是程序可继续的异常,忽略当前异常保持数据源读取过程继续进行
            if (processor != null) {
                try {
                    processor.processLine(convertedList);
                } catch (QiniuException e) {
                    // 这里其实逻辑上没有做重试次数的限制,因为返回的 retry 始终大于等于 -1,所以不是必须抛出的异常则会跳过,process 本身会
                    // 保存失败的记录,除非是 process 出现 599 状态码才会抛出异常
                    if (HttpRespUtils.checkException(e, 2) < -1) throw e;
                    if (e.response != null) e.response.close();
                }
            }
            statistics.addAndGet(srcList.size());
            if (stopped) break;
            lastLine = reader.currentEndLine();
            json.addProperty("start", lastLine);
            recordLister(reader.getName(), json.toString());
        }
    }

    protected abstract IResultOutput getNewResultSaver(String order) throws IOException;

    private void reading(ITextReader reader) {
        int order = UniOrderUtils.getOrder();
        String orderStr = String.valueOf(order);
        ILineProcess lineProcessor = null;
        IResultOutput saver = null;
        try {
            saver = getNewResultSaver(orderStr);
            saverMap.put(orderStr, saver);
            if (processor != null) {
                lineProcessor = processor.clone();
                lineProcessor.changeSaveOrder(orderStr);
                processorMap.put(orderStr, lineProcessor);
            }
            export(reader, saver, lineProcessor);
            procedureLogger.info("{}-|-", reader.getName());
            progressMap.remove(reader.getName()); // 只有 export 成功情况下才移除 record
        }  catch (QiniuException e) {
            try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
            errorLogger.error("{}: {}, {}", reader.getName(), progressMap.get(reader.getName()), e.error(), e);
            if (e.response != null) e.response.close();
        } catch (Throwable e) {
            try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
            errorLogger.error("{}: {}", reader.getName(), progressMap.get(reader.getName()), e);
        } finally {
            try { FileUtils.createIfNotExists(infoLogFile); } catch (IOException ignored) {}
            infoLogger.info("{}\t{}\t{}", orderStr, reader.getName(), reader.count());
            if (saver != null) {
                saver.closeWriters();
                saver = null; // let gc work
            }
            saverMap.remove(orderStr);
            if (lineProcessor != null) {
                lineProcessor.closeResource();
                lineProcessor = null;
            }
            UniOrderUtils.returnOrder(order);
            reader.close();
        }
    }

    protected abstract ITextReader generateReader(String name) throws IOException;

    protected abstract Stream getReaders(String path) throws IOException;

    public void export() throws Exception {
        String info = processor == null ? String.join(" ", "read lines from path:", path) :
                String.join(" ", "read lines from path:", path, "and", processor.getProcessName());
        rootLogger.info("{} running...", info);
        rootLogger.info("order\tpath\tquantity");
        showdownHook();
        Stream readerStream;
        if (uris == null || uris.size() == 0) {
            readerStream = getReaders(FileUtils.convertToRealPath(path));
        } else {
            if (hasAntiPrefixes) {
                uris = uris.parallelStream()
                        .filter(this::checkPrefix)
                        .peek(this::recordListerByUri)
                        .collect(Collectors.toList());
            } else {
                uris.parallelStream().forEach(this::recordListerByUri);
            }
            readerStream = uris.parallelStream().map(uri -> {
                try {
                    return generateReader(uri);
                } catch (IOException e) {
                    errorLogger.error("generate lister failed by {}\t{}", uri, urisMap.get(uri), e);
                    return null;
                }
            });
        }
        try {
            executorPool = Executors.newFixedThreadPool(threads);
            readerStream.filter(generated -> {
                if (generated == null) return false;
                else if (generated.currentEndLine() != null) return true;
                else {
                    progressMap.remove(generated.getName());
                    generated.close();
                    return false;
                }
            }).forEach(reader -> executorPool.execute(() -> reading(reader)));
            executorPool.shutdown();
            while (!executorPool.isTerminated()) {
                sleep(2000);
                if (countInterval-- <= 0) {
                    countInterval = 300;
                    refreshRecordAndStatistics();
                }
            }
            rootLogger.info("{} finished, results in {}.", info, savePath);
            endAction();
        } catch (Throwable e) {
            stopped = true;
            rootLogger.error("export failed", e);
            endAction();
            System.exit(-1);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy