org.vertexium.accumulo.util.HdfsLargeDataStore Maven / Gradle / Ivy
package org.vertexium.accumulo.util;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.vertexium.Property;
import java.io.IOException;
import java.io.OutputStream;
public class HdfsLargeDataStore extends LimitOutputStream.LargeDataStore {
private final FileSystem fs;
private final String dataDir;
private final String rowKey;
private final Property property;
private Path hdfsPath;
private String relativeFileName;
public HdfsLargeDataStore(FileSystem fs, String dataDir, String rowKey, Property property) {
this.fs = fs;
this.dataDir = dataDir;
this.rowKey = rowKey;
this.property = property;
}
@Override
public OutputStream createOutputStream() throws IOException {
this.hdfsPath = createFileName();
return this.fs.create(this.hdfsPath);
}
protected Path createFileName() throws IOException {
this.relativeFileName = createHdfsFileName(rowKey, property);
Path path = new Path(dataDir, this.relativeFileName);
if (!this.fs.mkdirs(path.getParent())) {
throw new IOException("Could not create directory " + path.getParent());
}
if (this.fs.exists(path)) {
this.fs.delete(path, true);
}
return path;
}
public Path getFullHdfsPath() {
return hdfsPath;
}
public String getRelativeFileName() {
return relativeFileName;
}
private String createHdfsFileName(String rowKey, Property property) throws IOException {
String fileName = HdfsLargeDataStore.encodeFileName(property.getName() + "_" + property.getKey() + "_" + property.getTimestamp());
return rowKey + "/" + fileName;
}
private static String encodeFileName(String fileName) {
StringBuilder result = new StringBuilder();
for (char ch : fileName.toCharArray()) {
if ((ch >= '0' && ch <= '9')
|| (ch >= 'A' && ch <= 'Z')
|| (ch >= 'a' && ch <= 'z')) {
result.append(ch);
} else if (ch == ' ') {
result.append('_');
} else {
String hex = "0000" + Integer.toHexString((int) ch);
result.append(hex.substring(hex.length() - 4));
}
}
return result.toString();
}
}