zhao.algorithmMagic.io.OutputHDFS Maven / Gradle / Ivy
package zhao.algorithmMagic.io;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import zhao.algorithmMagic.exception.OperatorOperationException;
import zhao.algorithmMagic.integrator.ImageRenderingIntegrator;
import zhao.algorithmMagic.operands.matrix.ColorMatrix;
import zhao.algorithmMagic.operands.matrix.ColumnDoubleMatrix;
import zhao.algorithmMagic.operands.matrix.ColumnIntegerMatrix;
import zhao.algorithmMagic.operands.table.Cell;
import zhao.algorithmMagic.operands.table.DataFrame;
import zhao.algorithmMagic.operands.table.Series;
import zhao.algorithmMagic.utils.ASIO;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
/**
* HDFS 平台数据输出设备类,在该类示例化出来的对象中能够直接将数据输出到HDFS文件系统中。
*
* HDFS platform data output device class, which can directly output data to the HDFS file system in the instantiated objects of this class.
*
* @author 赵凌宇
* 2023/4/6 20:01
*/
public final class OutputHDFS implements OutputComponent {
private final static Logger LOGGER = LoggerFactory.getLogger("OutputHDFS");
private final FileSystem fileSystem;
private final Path outputPath;
private final String format;
private final char sep;
private boolean isOpen = false;
private FSDataOutputStream fsDataOutputStream;
private BufferedWriter bufferedWriter;
/**
* @param fileSystem HDFS 文件系统对象。
*
* HDFS File System Objects.
* @param outputPath HDFS 文件数据输出目录。
*
* HDFS file data output directory.
* @param format HDFS 文件数据输出格式。
*
* HDFS file data output format.
* @param sep 文件数据输出时需要使用的分隔符。
*
* Delimiters required for file data output.
*/
public OutputHDFS(FileSystem fileSystem, Path outputPath, String format, char sep) {
if (fileSystem == null || outputPath == null || format == null) {
throw new OperatorOperationException("The parameter in [FileSystem fileSystem, Path outputPath, String format] cannot be null!!!!");
}
this.fileSystem = fileSystem;
this.outputPath = outputPath;
this.format = format;
this.sep = sep;
}
public static OutputBuilder builder() {
return new OutputHDFSBuilder();
}
/**
* 启动数据输出组件.
*
* Start data output component.
*
* @return 如果启动成功返回true
*/
@Override
public boolean open() {
try {
LOGGER.info("OutputHDFS.open()");
fsDataOutputStream = fileSystem.create(outputPath);
bufferedWriter = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream));
this.isOpen = true;
return true;
} catch (IOException e) {
LOGGER.error("OutputHDFS.open() error!!!", e);
return false;
}
}
/**
* @return 如果组件已经启动了,在这里返回true.
*
* If the component has already started, return true here
*/
@Override
public boolean isOpen() {
LOGGER.info("OutputHDFS.isOpen()");
return this.isOpen;
}
/**
* 将一份二进制数据输出。
*
* Output a binary data.
*
* @param data 需要被输出的二进制数据包。
*
* The binary data package that needs to be output.
*/
@Override
public void writeByteArray(byte[] data) {
LOGGER.info("OutputHDFS.writeByteArray(byte[] data)");
try {
fsDataOutputStream.write(data);
} catch (IOException e) {
throw new OperatorOperationException(e);
}
}
/**
* 输出一个 整形 矩阵对象
*
* @param matrix 需要被输出的矩阵
*/
@Override
public void writeMat(ColumnIntegerMatrix matrix) {
int rowCount = -1;
try {
// 输出列
bufferedWriter.write("colName");
String[] rowFieldNames = matrix.getRowFieldNames();
for (String colName : matrix.getColFieldNames()) {
bufferedWriter.write(sep);
bufferedWriter.write(colName);
}
bufferedWriter.newLine();
for (int[] ints : matrix.toArrays()) {
bufferedWriter.write(rowFieldNames[++rowCount]);
for (int aInt : ints) {
bufferedWriter.write(sep);
bufferedWriter.write(String.valueOf(aInt));
}
}
} catch (IOException e) {
throw new OperatorOperationException("Write data exception!", e);
}
}
/**
* 输出一个 double类型的 矩阵对象
*
* @param matrix 需要被输出的矩阵
*/
@Override
public void writeMat(ColumnDoubleMatrix matrix) {
int rowCount = -1;
try {
// 输出列
bufferedWriter.write("colName");
String[] rowFieldNames = matrix.getRowFieldNames();
for (String colName : matrix.getColFieldNames()) {
bufferedWriter.write(sep);
bufferedWriter.write(colName);
}
bufferedWriter.newLine();
for (double[] ints : matrix.toArrays()) {
bufferedWriter.write(rowFieldNames[++rowCount]);
for (double aInt : ints) {
bufferedWriter.write(sep);
bufferedWriter.write(String.valueOf(aInt));
}
}
} catch (IOException e) {
throw new OperatorOperationException("Write data exception!", e);
}
}
/**
* 将图像矩阵所包含的图像直接输出到目标。
*
* Directly output the images contained in the image matrix to the target.
*
* @param colorMatrix 需要被输出的图像矩阵对象。
*
* The image matrix object that needs to be output.
*/
@Override
public void writeImage(ColorMatrix colorMatrix) {
LOGGER.info("OutputHDFS.writeImage(ColorMatrix colorMatrix)");
BufferedImage bufferedImage = ImageRenderingIntegrator.drawBufferedImage(
colorMatrix.toArrays(), colorMatrix.getColCount(), colorMatrix.getRowCount(), 1, false
);
try {
ImageIO.write(bufferedImage, format, fsDataOutputStream);
} catch (IOException e) {
throw new OperatorOperationException(e);
}
}
/**
* 将一个 DataFrame 中的数据按照数据输出组件进行输出.
*
* Output the data in a DataFrame according to the data output component.
*
* @param dataFrame 需要被输出的数据对象
*/
@Override
public void writeDataFrame(DataFrame dataFrame) {
LOGGER.info("OutputHDFS.writeDataFrame(DataFrame dataFrame)");
try {
bufferedWriter.write("rowNumber");
for (Cell> cell : dataFrame.getFields()) {
bufferedWriter.write(sep);
bufferedWriter.write(cell.getStringValue());
}
bufferedWriter.newLine();
int count = 0;
for (Series cells : dataFrame) {
bufferedWriter.write(String.valueOf(++count));
for (Cell> cell : cells) {
bufferedWriter.write(sep);
bufferedWriter.write(cell.toString());
}
bufferedWriter.newLine();
}
} catch (IOException e) {
throw new OperatorOperationException(e);
}
}
/**
* Closes this stream and releases any system resources associated
* with it. If the stream is already closed then invoking this
* method has no effect.
*
*
As noted in {@link AutoCloseable#close()}, cases where the
* close may fail require careful attention. It is strongly advised
* to relinquish the underlying resources and to internally
* mark the {@code Closeable} as closed, prior to throwing
* the {@code IOException}.
*/
@Override
public void close() {
LOGGER.info("OutputHDFS.close()");
ASIO.close(this.bufferedWriter);
ASIO.close(this.fsDataOutputStream);
}
}