All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qiniu.datasource.FileContainer Maven / Gradle / Ivy

There is a newer version: 8.4.8
Show newest version
package com.qiniu.datasource;

import com.google.gson.JsonObject;
import com.qiniu.common.QiniuException;
import com.qiniu.interfaces.*;
import com.qiniu.util.*;

import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

public abstract class FileContainer extends DatasourceActor implements IDataSource, IResultOutput, T> {

    protected String path;
    protected boolean keepDir;
    protected String transferPath = null;
    protected int leftTrimSize = 0;
    protected String realPath;
    protected int initPathSize;
    protected List antiPrefixes;
    protected boolean hasAntiPrefixes = false;
    protected Map> directoriesMap;
    protected List directories;
    protected ILineProcess processor; // 定义的资源处理器
    protected ConcurrentMap> listerMap = new ConcurrentHashMap<>(threads);
    protected boolean withEtag;
    protected boolean withDatetime;
    protected boolean withMime;
    protected boolean withParent;

    public FileContainer(String path, Map> directoriesMap, List antiPrefixes, boolean keepDir,
                         Map indexMap, List fields, int unitLen, int threads) throws IOException {
        super(unitLen, threads);
        this.path = path;
        this.keepDir = keepDir;
        setIndexMapWithDefault(indexMap);
        setAntiPrefixes(antiPrefixes);
        setTransferPathAndLeftTrimSize();
        setDirectoriesAndMap(directoriesMap);
        if (fields != null && fields.size() > 0) this.fields = fields;
        else this.fields = ConvertingUtils.getOrderedFields(this.indexMap, null);
        // default save parameters,默认全记录保存
        setSaveOptions(true, "result", "tab", "\t", null);
    }

    private void setAntiPrefixes(List antiPrefixes) {
        if (antiPrefixes != null && antiPrefixes.size() > 0) {
            hasAntiPrefixes = true;
            this.antiPrefixes = antiPrefixes.stream().sorted().collect(Collectors.toList());
            int size = this.antiPrefixes.size();
            Iterator iterator = this.antiPrefixes.iterator();
            String temp = iterator.next();
            while (iterator.hasNext() && size > 0) {
                size--;
                String prefix = iterator.next();
                if (prefix.startsWith(temp)) iterator.remove();
                else temp = prefix;
            }
        }
    }

    private void setTransferPathAndLeftTrimSize() throws IOException {
        if (path.indexOf(FileUtils.pathSeparator + FileUtils.currentPath) > 0 ||
                path.indexOf(FileUtils.pathSeparator + FileUtils.parentPath) > 0 ||
                path.endsWith(FileUtils.pathSeparator + ".") ||
                path.endsWith(FileUtils.pathSeparator + "..")) {
            throw new IOException("please set straight path, can not contain \"/..\" or \"/.\".");
        } else {
            if (path.contains("\\~")) path = path.replace("\\~", "~");
            if (path.endsWith(FileUtils.pathSeparator)) path = path.substring(0, path.length() - 1);
            if (path.startsWith(FileUtils.userHomeStartPath)) {
                realPath = String.join("", FileUtils.userHome, path.substring(1));
                transferPath = "~";
                leftTrimSize = FileUtils.userHome.length();
            } else {
                if (path.startsWith(FileUtils.parentPath) || "..".equals(path)) {
                    realPath = new File(path).getCanonicalPath();
                    transferPath = "..";
                    leftTrimSize = new File("..").getCanonicalPath().length();
                } else if (path.startsWith(FileUtils.currentPath) || ".".equals(path)) {
                    realPath = new File(path).getCanonicalPath();
                    transferPath = ".";
                    leftTrimSize = new File(".").getCanonicalPath().length();
                } else {
                    realPath = path;
                }
            }
        }
        initPathSize = realPath.split(FileUtils.pathSeparator).length;
    }

    private void setDirectoriesAndMap(Map> directoriesMap) throws IOException {
        if (directoriesMap == null || directoriesMap.size() <= 0) {
            this.directoriesMap = new HashMap<>();
        } else {
            if (directoriesMap.containsKey(null)) throw new IOException("prefixes map can not contain null.");
            this.directoriesMap = new HashMap<>(threads);
            this.directoriesMap.putAll(directoriesMap);
            int size = this.directoriesMap.size();
            Iterator iterator = this.directoriesMap.keySet().parallelStream()
                    .map(directory -> directory = directory.split("-\\|\\|-")[0])
                    .sorted().distinct().collect(Collectors.toList()).iterator();
            String temp = iterator.next();
            Map value = directoriesMap.get(temp);
            String end = value == null ? null : value.get("end");
            File tempFile = new File(temp);
            if (!tempFile.exists()) tempFile = new File(realPath, temp);
            directories = new ArrayList<>(size);
            if (tempFile.isDirectory()) directories.add(tempFile);
            else throw new IOException(temp + " is not valid directory.");
            String forCheckPath = tempFile.getCanonicalPath() + FileUtils.pathSeparator;
            File file;
            while (iterator.hasNext() && size > 0) {
                size--;
                String directory = iterator.next();
                if (directory == null || directory.equals("")) {
                    throw new IOException("directories can not contain empty item.");
                } else {
                    file = new File(directory);
                    if (!file.exists()) file = new File(realPath, directory);
                    if (file.isDirectory()) {
                        if (file.getCanonicalPath().startsWith(forCheckPath)) {
                            if (end == null || "".equals(end)) {
                                iterator.remove();
                                this.directoriesMap.remove(directory);
                            } else if (end.compareTo(directory) >= 0) {
                                throw new IOException(temp + "'s end can not be larger than " + directory + " in " + directoriesMap);
                            } else {
                                directories.add(file);
                            }
                        } else {
                            directories.add(file);
                            tempFile = file;
                            temp = directory;
                            value = directoriesMap.get(temp);
                            end = value == null ? null : value.get("end");
                            forCheckPath = tempFile.getCanonicalPath() + FileUtils.pathSeparator;
                        }
                    } else {
                        throw new IOException(directory + " is not valid directory.");
                    }
                }
            }
        }
    }

    private void setIndexMapWithDefault(Map indexMap) throws IOException {
        if (indexMap == null || indexMap.size() == 0) {
            if (this.indexMap == null) this.indexMap = new HashMap<>();
            for (String fileInfoField : ConvertingUtils.localFileInfoFields) {
                this.indexMap.put(fileInfoField, fileInfoField);
            }
        } else {
            for (String s : indexMap.keySet()) {
                if (s == null || "".equals(s)) throw new IOException("the index can not be empty in " + indexMap);
            }
            this.indexMap = indexMap;
        }
        withEtag = this.indexMap.containsKey("etag");
        withDatetime = this.indexMap.containsKey("datetime");
        withMime = this.indexMap.containsKey("mime");
        withParent = this.indexMap.containsKey("parent");
    }

    public void setProcessor(ILineProcess processor) {
        this.processor = processor;
    }

    protected abstract ITypeConvert getNewConverter() throws IOException;

    protected abstract ITypeConvert getNewStringConverter() throws IOException;

    private boolean checkPrefix(File directory) {
        for (String antiPrefix : antiPrefixes) {
            if (directory.getPath().startsWith(antiPrefix)) return false;
        }
        return true;
    }

    private void recordListerByDirectory(String name) {
        Map map = directoriesMap.get(name.split("-\\|\\|-")[0]);
        String record = map == null ? "{}" : JsonUtils.toJsonObject(map).toString();
        recordLister(name, record);
    }

    protected abstract IFileLister getLister(File directory, String start, String end, int unitLen) throws IOException;

    protected abstract IFileLister getLister(String name, List fileInfoList, String start,
                                                      String end, int unitLen) throws IOException;

    protected abstract IFileLister getLister(String singleFilePath) throws IOException;

    private IFileLister generateLister(File directory) throws IOException {
        Map map = directoriesMap.get(directory.getPath());
        String start;
        String end;
        if (map == null) {
            start = end = null;
        } else {
            start = map.get("start");
            end = map.get("end");
        }
        return getLister(directory, start, end, unitLen);
    }

    public void export(IFileLister lister, IResultOutput saver, ILineProcess processor) throws Exception {
        ITypeConvert converter = getNewConverter();
        ITypeConvert stringConverter = null;
        if (saveTotal) {
            stringConverter = getNewStringConverter();
            saver.preAddWriter("failed");
        }
        List convertedList;
        List writeList;
        List objects = lister.currents();
        boolean hasNext = lister.hasNext();
        Map map = directoriesMap.get(lister.getName());
        JsonObject json = map != null ? JsonUtils.toJsonObject(map) : (hasNext ? new JsonObject() : null);
        // 初始化的 lister 包含首次列举的结果列表,需要先取出,后续向前列举时会更新其结果列表
        while (objects.size() > 0 || hasNext) {
            if (stopped) break;
            if (LocalDateTime.now(DatetimeUtils.clock_Default).isAfter(pauseDateTime)) {
                synchronized (object) {
                    object.wait();
                }
            }
            if (stringConverter != null) {
                writeList = stringConverter.convertToVList(objects);
                if (writeList.size() > 0) saver.writeSuccess(String.join("\n", writeList), false);
                if (stringConverter.errorSize() > 0) saver.writeToKey("failed", stringConverter.errorLines(), false);
            }
            if (processor != null) {
                convertedList = converter.convertToVList(objects);
                if (converter.errorSize() > 0) saver.writeError(converter.errorLines(), false);
                // 如果抛出异常需要检测下异常是否是可继续的异常,如果是程序可继续的异常,忽略当前异常保持数据源读取过程继续进行
                try {
                    processor.processLine(convertedList);
                } catch (QiniuException e) {
                    if (HttpRespUtils.checkException(e, 2) < -1) throw e;
                    errorLogger.error("process objects: {}", lister.getName(), e);
                    if (e.response != null) e.response.close();
                }
            }
            if (hasNext) {
                json.addProperty("start", lister.currentEndFilepath());
                recordLister(lister.getName(), json.toString());
            }
            statistics.addAndGet(objects.size());
            if (stopped) break;
//            objects.clear(); 上次其实不能做 clear,会导致 lister 中的列表被清空
            lister.listForward();
            objects = lister.currents();
            hasNext = lister.hasNext();
        }
    }

    protected abstract IResultOutput getNewResultSaver(String order) throws IOException;

    private void listing(IFileLister lister) {
        int order = UniOrderUtils.getOrder();
        String orderStr = String.valueOf(order);
        ILineProcess lineProcessor = null;
        IResultOutput saver = null;
        try {
            saver = getNewResultSaver(orderStr);
            saverMap.put(orderStr, saver);
            if (processor != null) {
                lineProcessor = processor.clone();
                lineProcessor.changeSaveOrder(orderStr);
                processorMap.put(orderStr, lineProcessor);
            }
            export(lister, saver, lineProcessor);
            procedureLogger.info("{}-|-", lister.getName());
            progressMap.remove(lister.getName()); // 只有 export 成功情况下才移除 record
        }  catch (QiniuException e) {
            try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
            errorLogger.error("{}: {}, {}", lister.getName(), e.error(), e);
            if (e.response != null) e.response.close();
        } catch (Throwable e) {
            try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
            errorLogger.error("{}: {}", lister.getName(), e);
        } finally {
            try { FileUtils.createIfNotExists(infoLogFile); } catch (IOException ignored) {}
            infoLogger.info("{}\t{}\t{}", orderStr, lister.getName(), lister.count());
            if (saver != null) {
                saver.closeWriters();
                saver = null; // let gc work
            }
            saverMap.remove(orderStr);
            processorMap.remove(orderStr);
            if (lineProcessor != null) {
                lineProcessor.closeResource();
                lineProcessor = null;
            }
            UniOrderUtils.returnOrder(order);
            lister.close();
            listerMap.remove(lister.getName());
        }
    }

    private void processNodeLister(IFileLister lister) {
        if (lister.hasNext()) {
            listerMap.put(lister.getName(), lister);
            integer.incrementAndGet();
            executorPool.execute(() -> {
                listing(lister);
                integer.decrementAndGet();
            });
        } else {
            progressMap.remove(lister.getName());
            lister.close();
        }
    }

    private List loopForFutures(List>> futures) throws Exception {
        Iterator>> iterator;
        Future> future;
        IFileLister tempLister;
        List nextDirectories = new ArrayList<>();
//            iterator = futures.iterator();
//            while (iterator.hasNext()) {
//                future = iterator.next();
//                if (future.isDone()) {
//                    tempLister = future.get();
//                    if (tempLister != null) {
//                        processNodeLister(tempLister);
//                        if (tempLister.getDirectories() != null && tempLister.getDirectories().size() > 0) {
//                            listForNextIteratively(tempLister.getDirectories());
//                        }
//                    }
//                    iterator.remove();
//                }
//            }
//        List nextDirectories = new ArrayList<>();
        iterator = futures.iterator();
        while (iterator.hasNext()) {
            future = iterator.next();
            if (future.isDone()) {
                tempLister = future.get();
                if (tempLister != null) {
                    if (tempLister.getDirectories() != null && tempLister.getDirectories().size() > 0) {
                        if (hasAntiPrefixes) {
                            nextDirectories.addAll(tempLister.getDirectories().parallelStream()
                                    .filter(this::checkPrefix)
                                    .collect(Collectors.toList()));
                        } else {
                            nextDirectories.addAll(tempLister.getDirectories());
                        }
                        tempLister.getDirectories().clear();
                    }
                    processNodeLister(tempLister);
                }
                integer.decrementAndGet();
                iterator.remove();
            }
        }
        iterator = null;
        future = null;
        tempLister = null;
        return nextDirectories;
    }

    private AtomicInteger integer = new AtomicInteger(0);

    private List listForNextIteratively(List directories) throws Exception {
        List>> futures = new ArrayList<>(directories.size() * 2 / 3 + 1);
        List nextDirectories = new ArrayList<>();
        Future> future;
        List tempDirectories;
        for (File directory : directories) {
            if (integer.get() < threads) {
                future = executorPool.submit(() -> {
                    try {
                        return generateLister(directory);
                    } catch (IOException e) {
                        try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
                        errorLogger.error("generate lister failed by {}\t{}", directory.getPath(), directoriesMap.get(directory.getPath()), e);
                        return null;
                    }
                });
                if (future.isDone()) {
                    try {
                        IFileLister futureLister = future.get();
                        if (futureLister != null) {
                            if (futureLister.getDirectories() != null && futureLister.getDirectories().size() > 0) {
                                if (hasAntiPrefixes) {
                                    nextDirectories.addAll(futureLister.getDirectories().parallelStream()
                                            .filter(this::checkPrefix)
                                            .collect(Collectors.toList()));
                                } else {
                                    nextDirectories.addAll(futureLister.getDirectories());
                                }
                                futureLister.getDirectories().clear();
                            }
                            processNodeLister(futureLister);
                        }
                    } catch (Exception e) {
                        try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
                        errorLogger.error("execute lister failed", e);
                    }
                } else {
                    integer.incrementAndGet();
                    futures.add(future);
                }
            } else {
                try {
                    IFileLister futureLister = generateLister(directory);
                    if (futureLister.getDirectories() != null && futureLister.getDirectories().size() > 0) {
                        if (hasAntiPrefixes) {
                            nextDirectories.addAll(futureLister.getDirectories().parallelStream()
                                    .filter(this::checkPrefix)
                                    .collect(Collectors.toList()));
                        } else {
                            nextDirectories.addAll(futureLister.getDirectories());
                        }
                        futureLister.getDirectories().clear();
                    }
                    processNodeLister(futureLister);
                } catch (Exception e) {
                    try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
                    errorLogger.error("generate lister failed by {}\t{}", directory.getPath(), directoriesMap.get(directory.getPath()), e);
                }
            }
            tempDirectories = loopForFutures(futures);
            nextDirectories.addAll(tempDirectories);
            tempDirectories.clear();
        }
        while (futures.size() > 0) {
            tempDirectories = loopForFutures(futures);
            nextDirectories.addAll(tempDirectories);
            tempDirectories.clear();
        }
        futures = null;
        future = null;
        tempDirectories = null;
        directories.clear();
        directories = null;
        nextDirectories.parallelStream().forEach(dir -> recordListerByDirectory(dir.getPath()));
        return nextDirectories;
    }

    private List> checkListerInPool(int cValue, int initTiny) {
        int count = 0;
        IFileLister iLister;
        boolean notCheck = true;
        List> list = new ArrayList<>(listerMap.values());
        Iterator> iterator = list.iterator();
        String directory;
        String start;
        Map endMap;
        int tiny = initTiny;
        int accUnit = initTiny / 2;
        while (!executorPool.isTerminated()) {
            if (count >= 1200) {
                notCheck = false;
                while (iterator.hasNext()) {
                    iLister = iterator.next();
                    if(!iLister.hasNext()) iterator.remove();
                }
                if (list.size() > 0 && list.size() <= tiny) {
                    tiny = initTiny;
                    rootLogger.info("unfinished: {}, cValue: {}, to re-split lister list...", list.size(), cValue);
                    for (IFileLister lister : list) {
                        // lister 的 prefix 为 final 对象,不能因为 truncate 的操作之后被修改
                        directory = lister.getName();
                        start = lister.truncate();
                        endMap = directoriesMap.get(directory);
                        if (endMap == null) endMap = new HashMap<>();
                        endMap.put("start", start);
                        rootLogger.info("directory: {}, nextFilepath: {}, endMap: {}", directory, start, endMap);
                    }
                } else if (list.size() <= cValue) {
                    tiny += accUnit;
                    count = 900;
                } else {
                    count = 0;
                }
                refreshRecordAndStatistics();
            }
            sleep(1000);
            count++;
        }
        if (notCheck) return new ArrayList<>();
        else return list;
    }

    private void directoriesListing() throws Exception {
//        while (directories != null && directories.size() > 0) {
//            directories = directories.parallelStream().map(this::directoriesFromLister).filter(Objects::nonNull)
//                    .reduce((list1, list2) -> { list1.addAll(list2); return list1; }).orElse(null);
//        }
        while (directories.size() > 0) {
            directoriesMap.clear();
            directories = listForNextIteratively(directories);
            refreshRecordAndStatistics();
        }
        executorPool.shutdown();
        if (threads > 1) {
            int cValue = threads >= 10 ? threads / 2 : 3;
            int tiny = threads >= 30 ? threads / 10 : threads >= 10 ? 3 : 1;
            List> list = checkListerInPool(cValue, tiny);
            while (list.size() > 0) {
                list.parallelStream().forEach(lister -> recordListerByDirectory(lister.getName() + "-||-0"));
                int multiple = threads / list.size();
                int maxIndex = multiple - 1;
                executorPool = Executors.newFixedThreadPool(threads);
                listerMap.clear();
                list.parallelStream().forEach(lister -> {
                    if (lister.getRemainedItems() == null) return;
                    int remainedSize = lister.getRemainedItems().size();
                    if (remainedSize < multiple) {
                        if (remainedSize > 0) {
                            try {
                                IFileLister sLister = getLister(lister.getName() + "-||-0",
                                        lister.getRemainedItems(), null, null, unitLen);
                                listerMap.put(sLister.getName(), sLister);
                                executorPool.execute(() -> listing(lister));
                            } catch (IOException e) {
                                try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
                                errorLogger.error("generate lister failed by {}\t{}", lister.getName(),
                                        directoriesMap.get(lister.getName()), e);
                            }
                        }
                        return;
                    }
                    int size = remainedSize % multiple == 0 ? remainedSize / multiple : remainedSize / multiple + 1;
                    for (int i = 0; i < multiple; i++) {
                        try {
                            IFileLister sLister = getLister(String.join("-||-", lister.getName(), String.valueOf(i)),
                                    lister.getRemainedItems().subList(size * i, i == maxIndex ? remainedSize : size * (i + 1)),
                                    null, null, unitLen);
                            listerMap.put(sLister.getName(), sLister);
                            executorPool.execute(() -> listing(lister));
                        } catch (IOException e) {
                            try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
                            errorLogger.error("generate lister failed by {}\t{}",
                                    String.join("-||-", lister.getName(), String.valueOf(i)),
                                    directoriesMap.get(lister.getName()), e);
                        }
                    }
                });
                executorPool.shutdown();
                list = checkListerInPool(cValue, tiny);
            }
        }
        while (!executorPool.isTerminated()) {
            sleep(2000);
            if (countInterval-- <= 0) {
                countInterval = 300;
                refreshRecordAndStatistics();
            }
        }
    }

    @Override
    public void export() throws Exception {
        String info = processor == null ? String.join(" ", "list files from path:", path) :
                String.join(" ", "read files from path:", path, "and", processor.getProcessName());
        rootLogger.info("{} running...", info);
        rootLogger.info("order\tprefix\tquantity");
        showdownHook();
        IFileLister startFileLister = null;
        if (directories == null || directories.size() == 0) {
            File originFile = new File(realPath);
            recordListerByDirectory(realPath);
            if (originFile.isDirectory()) startFileLister = generateLister(originFile);
            else startFileLister = getLister(realPath);
            directories = startFileLister.getDirectories();
        }
        try {
            if (directories == null || directories.size() == 0) {
                if (hasAntiPrefixes) rootLogger.info("there are no directories to check anti-prefixes.");
                if (startFileLister.hasNext()) listing(startFileLister);
                else startFileLister.close();
            } else {
                if (hasAntiPrefixes) {
                    directories = directories.parallelStream().filter(this::checkPrefix)
                            .peek(directory -> recordListerByDirectory(directory.getPath())).collect(Collectors.toList());
                } else {
                    directories.parallelStream().forEach(directory -> recordListerByDirectory(directory.getPath()));
                }
                executorPool = Executors.newFixedThreadPool(threads);
                if (startFileLister != null) processNodeLister(startFileLister);
                directoriesListing();
            }
            rootLogger.info("{} finished, results in {}.", info, savePath);
            endAction();
        } catch (Throwable e) {
            stopped = true;
            rootLogger.error("export failed", e);
            endAction();
            System.exit(-1);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy