com.qiniu.datasource.CloudStorageContainer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of qsuits Show documentation
Show all versions of qsuits Show documentation
qiniu-suits is a efficient tools for qiniu api implemented by java8.
package com.qiniu.datasource;
import com.google.gson.JsonObject;
import com.qiniu.common.JsonRecorder;
import com.qiniu.common.QiniuException;
import com.qiniu.common.SuitsException;
import com.qiniu.interfaces.IDataSource;
import com.qiniu.interfaces.ILineProcess;
import com.qiniu.interfaces.ILister;
import com.qiniu.interfaces.ITypeConvert;
import com.qiniu.persistence.FileSaveMapper;
import com.qiniu.interfaces.IResultOutput;
import com.qiniu.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.misc.Signal;
import sun.misc.SignalHandler;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.qiniu.entry.CommonParams.lineFormats;
public abstract class CloudStorageContainer implements IDataSource, IResultOutput, T> {
static final Logger rootLogger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME);
static final Logger errorLogger = LoggerFactory.getLogger("error");
static final File errorLogFile = new File("qsuits.error");
private static final Logger infoLogger = LoggerFactory.getLogger("info");
private static final File infoLogFile = new File("qsuits.info");
private static final Logger procedureLogger = LoggerFactory.getLogger("procedure");
private static final File procedureLogFile = new File("procedure.log");
protected String bucket;
protected List antiPrefixes;
protected boolean hasAntiPrefixes = false;
protected Map> prefixesMap;
protected List prefixes;
protected boolean prefixLeft;
protected boolean prefixRight;
protected int unitLen;
protected int threads;
protected int retryTimes = 5;
protected boolean saveTotal;
protected String savePath;
protected String saveFormat;
protected String saveSeparator;
protected List rmFields;
protected Map indexMap;
protected List fields;
protected ExecutorService executorPool; // 线程池
protected ILineProcess processor; // 定义的资源处理器
protected List originPrefixList = new ArrayList<>();
public static String firstPoint;
private String lastPoint;
private ConcurrentMap> prefixAndEndedMap = new ConcurrentHashMap<>();
private ConcurrentMap> saverMap = new ConcurrentHashMap<>();
private ConcurrentMap> processorMap = new ConcurrentHashMap<>();
public CloudStorageContainer(String bucket, Map> prefixesMap, List antiPrefixes,
boolean prefixLeft, boolean prefixRight, Map indexMap, List fields,
int unitLen, int threads) throws IOException {
this.bucket = bucket;
this.prefixLeft = prefixLeft;
this.prefixRight = prefixRight;
// 先设置 antiPrefixes 后再设置 prefixes,因为可能需要从 prefixes 中去除 antiPrefixes 含有的元素
this.antiPrefixes = antiPrefixes;
if (antiPrefixes != null && antiPrefixes.size() > 0) hasAntiPrefixes = true;
this.unitLen = unitLen;
this.threads = threads;
// default save parameters
this.saveTotal = true; // 默认全记录保存
this.savePath = "result";
this.saveFormat = "tab";
this.saveSeparator = "\t";
if (fields == null || fields.size() == 0) {
this.fields = ConvertingUtils.getOrderedFields(new ArrayList<>(this.indexMap.values()), rmFields);
else this.fields = fields;
// 由于目前指定包含 "|" 字符的前缀列举会导致超时,因此先将该字符及其 ASCII 顺序之前的 "{" 和之后的("|}~")统一去掉,从而优化列举的超
// 时问题,简化前缀参数的设置,也避免为了兼容该字符去修改代码算法
firstPoint = originPrefixList.get(0);
lastPoint = originPrefixList.get(originPrefixList.size() - 1);
// 不调用则各参数使用默认值
public void setSaveOptions(boolean saveTotal, String savePath, String format, String separator, List rmFields)
throws IOException {
this.saveTotal = saveTotal;
this.savePath = savePath;
this.saveFormat = format;
if (!lineFormats.contains(saveFormat)) throw new IOException("please check your format for map to string.");
this.saveSeparator = separator;
this.rmFields = rmFields;
if (rmFields != null && rmFields.size() > 0) {
this.fields = ConvertingUtils.getFields(new ArrayList<>(fields), rmFields);
public void setRetryTimes(int retryTimes) {
this.retryTimes = retryTimes < 1 ? 5 : retryTimes;
private void setIndexMapWithDefault(Map indexMap) {
if (indexMap == null || indexMap.size() == 0) {
if (this.indexMap == null) this.indexMap = new HashMap<>();
for (String fileInfoField : ConvertingUtils.defaultFileFields) {
this.indexMap.put(fileInfoField, fileInfoField);
} else {
this.indexMap = indexMap;
public void setProcessor(ILineProcess processor) {
this.processor = processor;
private void setPrefixesAndMap(Map> prefixesMap) throws IOException {
if (prefixesMap == null || prefixesMap.size() <= 0) {
this.prefixesMap = new HashMap<>();
prefixLeft = true;
prefixRight = true;
if (hasAntiPrefixes) prefixes = originPrefixList.stream().sorted().collect(Collectors.toList());
} else {
if (prefixesMap.containsKey(null)) throw new IOException("");
this.prefixesMap = new HashMap<>(prefixesMap);
prefixes = prefixesMap.keySet().stream().sorted().collect(Collectors.toList());
int size = prefixes.size();
Iterator iterator = prefixes.iterator();
String temp = iterator.next();
Map value = prefixesMap.get(temp);
String start = null;
String end = null;
String marker = null;
if (temp.equals("") && !iterator.hasNext()) {
if (value != null && value.size() > 0) {
start = "".equals(value.get("start")) ? null : value.get("start");
end = "".equals(value.get("end")) ? null : value.get("end");
marker = "".equals(value.get("marker")) ? null : value.get("marker");
if (start == null && end == null && marker == null) throw new IOException("prefixes can not only be empty string(\"\")");
while (iterator.hasNext() && size > 0) {
String prefix = iterator.next();
if (prefix.startsWith(temp)) {
end = value == null ? null : value.get("end");
if (end == null || "".equals(end)) {
} else if (end.compareTo(prefix) >= 0) {
throw new IOException(temp + "'s end can not be more larger than " + prefix + " in " + prefixesMap);
} else {
temp = prefix;
value = prefixesMap.get(temp);
if (hasAntiPrefixes && prefixes != null && prefixes.size() > 0) {
String lastAntiPrefix = antiPrefixes.stream().max(Comparator.naturalOrder()).orElse(null);
if (prefixRight && lastAntiPrefix != null && lastAntiPrefix.compareTo(prefixes.get(prefixes.size() - 1)) <= 0) {
throw new IOException("max anti-prefix can not be same as or more larger than max prefix.");
private synchronized void insertIntoPrefixesMap(String prefix, Map markerAndEnd) {
prefixesMap.put(prefix, markerAndEnd);
* 检验 prefix 是否有效,在 antiPrefixes 前缀列表中或者为空均无效
* @param prefix 待检验的 prefix
* @return 检验结果,true 表示 prefix 有效不需要剔除
boolean checkPrefix(String prefix) {
if (prefix == null) return false;
if (hasAntiPrefixes) {
for (String antiPrefix : antiPrefixes) {
if (prefix.startsWith(antiPrefix)) return false;
return true;
} else {
return true;
protected abstract ITypeConvert getNewConverter();
protected abstract ITypeConvert getNewStringConverter() throws IOException;
private JsonRecorder recorder = new JsonRecorder();
void recordListerByPrefix(String prefix) {
JsonObject json = prefixesMap.get(prefix) == null ? null : JsonUtils.toJsonObject(prefixesMap.get(prefix));
try { FileUtils.createIfNotExists(procedureLogFile); } catch (IOException ignored) {}
procedureLogger.info(recorder.put(prefix, json));
* 执行列举操作,直到当前的 lister 列举结束,并使用 processor 对象执行处理过程
* @param lister 已经初始化的 lister 对象
* @param saver 用于列举结果持久化的文件对象
* @param processor 用于资源处理的处理器对象
* @throws IOException 列举出现错误或者持久化错误抛出的异常
public void export(ILister lister, IResultOutput saver, ILineProcess processor) throws IOException {
ITypeConvert converter = getNewConverter();
ITypeConvert stringConverter = null;
if (saveTotal) {
stringConverter = getNewStringConverter();
List convertedList;
List writeList;
List objects = lister.currents();
boolean hasNext = lister.hasNext();
int retry;
Map map = prefixAndEndedMap.get(lister.getPrefix());
// 初始化的 lister 包含首次列举的结果列表,需要先取出,后续向前列举时会更新其结果列表
while (objects.size() > 0 || hasNext) {
if (stringConverter != null) {
writeList = stringConverter.convertToVList(objects);
if (writeList.size() > 0) saver.writeSuccess(String.join("\n", writeList), false);
if (stringConverter.errorSize() > 0) saver.writeToKey("failed", stringConverter.errorLines(), false);
if (processor != null) {
convertedList = converter.convertToVList(objects);
if (converter.errorSize() > 0) saver.writeError(converter.errorLines(), false);
// 如果抛出异常需要检测下异常是否是可继续的异常,如果是程序可继续的异常,忽略当前异常保持数据源读取过程继续进行
try {
} catch (QiniuException e) {
if (HttpRespUtils.checkException(e, 2) < -1) throw e;
if (e.response != null) e.response.close();
if (hasNext) {
JsonObject json = recorder.getOrDefault(lister.getPrefix(), new JsonObject());
json.addProperty("marker", lister.getMarker());
json.addProperty("end", lister.getEndPrefix());
try { FileUtils.createIfNotExists(procedureLogFile); } catch (IOException ignored) {}
procedureLogger.info(recorder.put(lister.getPrefix(), json));
if (map != null) map.put("start", lister.currentEndKey());
retry = retryTimes;
while (true) {
try {
lister.listForward(); // 要求 listForward 实现中先做 hashNext 判断,if (!hasNext) 置空;
objects = lister.currents();
} catch (SuitsException e) {
retry = HttpRespUtils.listExceptionWithRetry(e, retry);
try {FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
errorLogger.error("list objects by prefix:{} retrying...", lister.getPrefix(), e);
hasNext = lister.hasNext();
protected abstract IResultOutput getNewResultSaver(String order) throws IOException;
* 将 lister 对象放入线程池进行执行列举,如果 processor 不为空则同时执行 process 过程
* @param lister 列举对象
void listing(ILister lister) {
// 持久化结果标识信息
int order = UniOrderUtils.getOrder();
String orderStr = String.valueOf(order);
IResultOutput saver = null;
ILineProcess lineProcessor = null;
try {
// 多线程情况下不要直接使用传入的 processor,因为对其关闭会造成 clone 的对象无法进行结果持久化的写入
if (processor != null) {
lineProcessor = processor.clone();
processorMap.put(orderStr, lineProcessor);
saver = getNewResultSaver(orderStr);
saverMap.put(orderStr, saver);
export(lister, saver, lineProcessor);
} catch (QiniuException e) {
try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
errorLogger.error("{}: {}, {}", lister.getPrefix(), recorder.getJson(lister.getPrefix()), e.error(), e);
if (e.response != null) e.response.close();
} catch (Throwable e) {
try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
errorLogger.error("{}: {}", lister.getPrefix(), recorder.getJson(lister.getPrefix()), e);
} finally {
try { FileUtils.createIfNotExists(infoLogFile); } catch (IOException ignored) {}
infoLogger.info("{}\t{}\t{}", orderStr, lister.getPrefix(), lister.count());
if (saver != null) saver.closeWriters();
if (lineProcessor != null) lineProcessor.closeResource();
UniOrderUtils.returnOrder(order); // 最好执行完 close 再归还 order,避免上个文件描述符没有被使用,order 又被使用
protected abstract ILister getLister(String prefix, String marker, String start, String end) throws SuitsException;
ILister generateLister(String prefix) throws SuitsException {
int retry = retryTimes;
Map map = prefixesMap.get(prefix);
String marker;
String start;
String end;
if (map == null) {
marker = start = end = null;
} else {
marker = map.get("marker");
start = map.get("start");
end = map.get("end");
while (true) {
try {
return getLister(prefix, marker, start, end);
} catch (SuitsException e) {
retry = HttpRespUtils.listExceptionWithRetry(e, retry);
try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
errorLogger.error("generate lister by prefix:{} retrying...", prefix, e);
private List moreValidPrefixes(ILister lister, boolean doFutureCheck) {
boolean next;
try {
next = doFutureCheck ? lister.hasFutureNext() : lister.hasNext();
} catch (SuitsException e) {
next = lister.hasNext();
String startPrefix = lister.getPrefix();
String point = null;
if (next) {
// 如果存在 next 且当前获取的最后一个对象文件名不为空,则可以根据最后一个对象的文件名计算后续的前缀字符
String endKey = lister.currentEndKey();
int prefixLen = startPrefix.length();
if (endKey != null) {
if (endKey.length() > prefixLen) {
// 如果最后一个对象的文件名长度大于 prefixLen,则可以取出从当前前缀开始的下一个字符 point,用于和预定义前缀列表进行比较,
// 确定 lister 的 endPrefix
point = endKey.substring(prefixLen, prefixLen + 1);
// 如果此时下一个字符比预定义的最后一个前缀大的话(如中文文件名的情况)说明后续根据预定义前缀再检索无意义,则直接返回即可
if (point.compareTo(lastPoint) > 0) {
point = null;
// 如果 point 比第一个预定义前缀小则设置 lister 的结束位置到第一个预定义前缀
} else if (point.compareTo(firstPoint) < 0) {
point = firstPoint;
lister.setEndPrefix(startPrefix + firstPoint);
} else {
insertIntoPrefixesMap(startPrefix + point, new HashMap(){{
put("marker", lister.getMarker());
} else {
point = firstPoint;
// 无 next 时直接将 lister 的结束位置设置到第一个预定义前
lister.setEndPrefix(startPrefix + firstPoint);
} else {
return moreValidPrefixes(lister, true);
if (point != null) {
String finalPoint = point;
return originPrefixList.stream().filter(prefix -> prefix.compareTo(finalPoint) >= 0)
.map(prefix -> lister.getPrefix() + prefix).filter(this::checkPrefix)
} else {
return null;
private List> filteredListerByPrefixes(Stream prefixesStream) {
List> prefixesLister = prefixesStream.map(prefix -> {
try {
return generateLister(prefix);
} catch (SuitsException e) {
try { FileUtils.createIfNotExists(errorLogFile); } catch (IOException ignored) {}
errorLogger.error("generate lister failed by {}\t{}", prefix, prefixesMap.get(prefix), e);
return null;
}).filter(generated -> {
if (generated == null) return false;
else if (generated.currents().size() > 0 || generated.hasNext()) return true;
else {
return false;
if (prefixesLister.size() > 0) {
ILister lastLister = prefixesLister.stream().max(Comparator.comparing(ILister::getPrefix)).get();
Map map = prefixesMap.get(lastLister.getPrefix());
if (map == null) {
prefixAndEndedMap.put(lastLister.getPrefix(), new HashMap<>());
} else if (!map.containsKey("remove")) {
prefixAndEndedMap.put(lastLister.getPrefix(), map);
Iterator> it = prefixesLister.iterator();
while (it.hasNext()) {
ILister nLister = it.next();
if(!nLister.hasNext() || (nLister.getEndPrefix() != null && !"".equals(nLister.getEndPrefix()))) {
executorPool.execute(() -> listing(nLister));
return prefixesLister;
private void processNodeLister(ILister lister) {
if (lister.currents().size() > 0 || lister.hasNext()) {
executorPool.execute(() -> listing(lister));
} else {
private List> computeToNextLevel(List> listerList) {
return listerList.parallelStream().map(lister -> {
List nextPrefixes = moreValidPrefixes(lister, true);
if (nextPrefixes != null) {
return filteredListerByPrefixes(nextPrefixes.stream());
} else {
return null;
}).filter(Objects::nonNull).reduce((list1, list2) -> { list1.addAll(list2); return list1; }).orElse(null);
private List checkListerInPool(List> listerList, int cValue, int tiny) {
List extremePrefixes = null;
int count = 0;
ILister iLister;
Iterator> iterator;
String prefix;
String nextMarker;
String start;
Map endMap;
Map prefixMap;
while (!executorPool.isTerminated()) {
if (count >= 1800) {
iterator = listerList.iterator();
while (iterator.hasNext()) {
iLister = iterator.next();
if(!iLister.hasNext()) iterator.remove();
if (listerList.size() > 0 && listerList.size() <= tiny) {
rootLogger.info("unfinished: {}, cValue: {}, to re-split prefixes...\n", listerList.size(), cValue);
for (ILister lister : listerList) {
// lister 的 prefix 为 final 对象,不能因为 truncate 的操作之后被修改
prefix = lister.getPrefix();
nextMarker = lister.truncate();
// 防止 truncate 过程中原来的线程中丢失了 prefixAndEndedMap 的操作,这里再判断一次
endMap = prefixAndEndedMap.get(prefix);
prefixMap = new HashMap<>();
if (endMap == null) {
prefixMap.put("remove", "remove");
} else {
start = lister.currentEndKey();
if (start != null) endMap.put("start", start);
rootLogger.info("prefix: {}, nextMarker: {}, endMap: {}\n", prefix, nextMarker, endMap);
// 如果 truncate 时的 nextMarker 已经为空说明已经列举完成了
if (nextMarker == null || nextMarker.isEmpty()) continue;
if (extremePrefixes == null) extremePrefixes = new ArrayList<>();
prefixMap.put("marker", nextMarker);
insertIntoPrefixesMap(prefix, prefixMap);
} else if (listerList.size() <= cValue) {
count = 1200;
} else {
count = 0;
try {
} catch (InterruptedException ignored) {
int i = 0;
while (i < 1000) i++;
return extremePrefixes;
private List lastEndedPrefixes() {
List phraseLastPrefixes = new ArrayList<>(prefixAndEndedMap.keySet());
String previousPrefix;
Map prefixMap;
String start;
Set startPrefixes = prefixes == null ? new HashSet<>() : new HashSet<>(prefixes);
for (String prefix : phraseLastPrefixes) {
prefixMap = prefixAndEndedMap.get(prefix);
rootLogger.info("prefix: {}, endMap: {}", prefix, prefixMap);
if (prefixMap == null || prefixMap.size() == 0) {
// recorder.remove(prefix);
start = prefixMap.get("start");
// 由于优先使用 marker 原则,为了 start 生效则将可能的 marker 删除
if (start != null && !"".equals(start)) {
} else {
if (startPrefixes.contains(prefix)) {
if (prefixRight) prefixAndEndedMap.put("", prefixMap);
} else {
previousPrefix = prefix.substring(0, prefix.length() - 1);
prefixAndEndedMap.put(previousPrefix, prefixMap);
phraseLastPrefixes = prefixAndEndedMap.keySet().stream().sorted().collect(Collectors.toList());
for (String phraseLastPrefix : phraseLastPrefixes) recordListerByPrefix(phraseLastPrefix);
return phraseLastPrefixes;
private void waitAndTailListing(List> listerList) {
int cValue = threads < 10 ? 3 : threads / 2;
int tiny = threads >= 300 ? 30 : threads >= 200 ? 20 : threads >= 100 ? 10 : threads >= 50 ? threads / 10 :
threads >= 10 ? 3 : 1;
List extremePrefixes = checkListerInPool(listerList, cValue, tiny);
while (extremePrefixes != null && extremePrefixes.size() > 0) {
for (String prefix : extremePrefixes) recordListerByPrefix(prefix);
executorPool = Executors.newFixedThreadPool(threads);
listerList = filteredListerByPrefixes(extremePrefixes.parallelStream());
while (listerList != null && listerList.size() > 0 && listerList.size() <= threads) {
listerList = computeToNextLevel(listerList);
if (listerList != null && listerList.size() > 0) {
listerList.parallelStream().forEach(lister -> executorPool.execute(() -> listing(lister)));
extremePrefixes = checkListerInPool(listerList, cValue, tiny);
List phraseLastPrefixes = lastEndedPrefixes();
if (phraseLastPrefixes.size() > 0) {
executorPool = Executors.newFixedThreadPool(phraseLastPrefixes.size());
listerList = filteredListerByPrefixes(phraseLastPrefixes.parallelStream());
listerList.parallelStream().forEach(lister -> executorPool.execute(() -> listing(lister)));
while (!executorPool.isTerminated()) {
try {
} catch (InterruptedException ignored) {
int i = 0;
while (i < 1000) i++;
void endAction() throws IOException {
ILineProcess processor;
for (Map.Entry> saverEntry : saverMap.entrySet()) {
processor = processorMap.get(saverEntry.getKey());
if (processor != null) processor.closeResource();
String record = recorder.toString();
if (recorder.size() > 0) {
FileSaveMapper.ext = ".json";
String path = new File(savePath).getCanonicalPath();
FileSaveMapper saveMapper = new FileSaveMapper(new File(path).getParent());
// if (path.endsWith("/")) path = path.substring(0, path.length() - 1);
String fileName = path.substring(path.lastIndexOf(FileUtils.pathSeparator) + 1) + "-prefixes";
saveMapper.writeToKey(fileName, record, true);
rootLogger.info("please check the prefixes breakpoint in {}{}, it can be used for one more time listing remained objects.",
fileName, FileSaveMapper.ext);
void showdownHook() {
SignalHandler handler = signal -> {
try {
} catch (IOException e) {
// 设置INT信号(Ctrl+C中断执行)交给指定的信号处理器处理,废掉系统自带的功能
Signal.handle(new Signal("INT"), handler);
* 根据当前参数值创建多线程执行数据源导出工作
public void export() throws Exception {
String info = "list objects from " + getSourceName() + " bucket: " + bucket + (processor == null ? "" : " and "
+ processor.getProcessName());
rootLogger.info("{} running...", info);
FileSaveMapper.append = false; // 默认让持久化非追加写入(即清除之前存在的文件)
ILister startLister = null;
if (prefixes == null || prefixes.size() == 0) {
startLister = generateLister("");
if (threads > 1) {
prefixes = moreValidPrefixes(startLister, false);
if (prefixes == null) threads = 1;
if (threads <= 1) {
rootLogger.info("{} finished.", info);
} else {
if (prefixLeft && prefixes.get(0).compareTo("") > 0) {
insertIntoPrefixesMap("", new HashMap(){{ put("end", prefixes.get(0)); }});
startLister = generateLister("");
prefixes = prefixes.parallelStream().filter(this::checkPrefix).peek(this::recordListerByPrefix)
executorPool = Executors.newFixedThreadPool(threads);
try {
if (startLister != null) processNodeLister(startLister);
List> listerList = filteredListerByPrefixes(prefixes.parallelStream());
while (listerList != null && listerList.size() > 0 && listerList.size() < threads) {
listerList = computeToNextLevel(listerList);
if (listerList != null && listerList.size() > 0) {
listerList.parallelStream().forEach(lister -> executorPool.execute(() -> listing(lister)));
rootLogger.info("{} finished.", info);
} catch (Throwable e) {
rootLogger.error("export failed", e);
© 2015 - 2025 Weber Informatics LLC | Privacy Policy