All Downloads are FREE. Search and download functionalities are using the official Maven repository.

zhao.algorithmMagic.io.InputHDFS Maven / Gradle / Ivy

There is a newer version: 1.42
Show newest version
package zhao.algorithmMagic.io;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import zhao.algorithmMagic.exception.OperatorOperationException;
import zhao.algorithmMagic.operands.table.*;
import zhao.algorithmMagic.utils.ASIO;
import zhao.algorithmMagic.utils.ASStr;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;

/**
 * HDFS数据输入组件对象,能够从HDFS平台中获取到文件数据。
 * 

* HDFS data input component object, which can obtain file data from the HDFS platform. * * @author 赵凌宇 * 2023/4/6 8:40 */ public final class InputHDFS implements InputComponent { private final static Logger LOGGER = LoggerFactory.getLogger("InputHDFS"); private final FileSystem fileSystem; private final Path inputPath; private final char sep; private final String charset; private final Series field; private final int pk; private final int rowCount; private FSDataInputStream fsDataInputStream; private BufferedReader bufferedReader; private boolean isOpen; /** * @param fileSystem HDFS 文件系统对象 * @param inputPath 需要被读取的文件路径 * @param sep 读取时需要使用的分隔符 * @param charset 读取时需要使用字符集 * @param field 读取时的表字段名称行 * @param pk 读取时的表主键对象 * @param rowCount 读取矩阵的时候的目标行数 */ InputHDFS(FileSystem fileSystem, Path inputPath, char sep, String charset, Cell field, int pk, int rowCount) { this.rowCount = rowCount; if (fileSystem == null || inputPath == null || field == null) { throw new OperatorOperationException("The parameter in [FileSystem fileSystem, Path inputPath, String[] field] cannot be null!!!!"); } this.fileSystem = fileSystem; this.inputPath = inputPath; this.sep = sep; this.charset = charset; this.field = FieldCell.parse(field.getValue()); this.pk = pk; this.isOpen = false; } /** * @return 开始构建本数据组件对象。 *

* Start building this data component object. */ public static InputBuilder builder() { return new InputHDFSBuilder(); } /** * 启动数据输入组件 * * @return 如果启动成功返回true */ @Override public boolean open() { try { LOGGER.info("InputHDFS.open()"); this.fsDataInputStream = fileSystem.open(this.inputPath); this.bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream)); return true; } catch (IOException e) { LOGGER.error("InputHDFS.open() error!!!", e); return false; } } /** * @return 如果组件已经启动了,在这里返回true */ @Override public boolean isOpen() { LOGGER.info("InputHDFS.isOpen()"); return this.isOpen; } /** * 从数据输入组件中提取出 byte 数组的数据,一般情况下,这里返回的都是一些二进制的数据。 *

* Extract the data of the byte array from the data input component. Generally, the returned data here is some binary data. * * @return byte[] 的数据对象 */ @Override public byte[] getByteArray() { LOGGER.info("getByteArray()"); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try { while (bufferedReader.ready()) { byteArrayOutputStream.write(bufferedReader.readLine().getBytes(charset)); } return byteArrayOutputStream.toByteArray(); } catch (IOException e) { throw new OperatorOperationException(e); } finally { ASIO.close(byteArrayOutputStream); } } /** * 从数据输入组件中提取出 int 矩阵数据,一般情况下,这里返回的是一些矩阵元素数据。 *

* From the data input component, int matrix data is increasingly generated. Generally, some matrix element data is returned here. * * @return int[][] */ @Override public int[][] getInt2Array() { LOGGER.info("getInt2Array()"); if (this.rowCount > 0) { return ASStr.strToIntMat(this.bufferedReader, this.rowCount, this.sep); } else { return ASStr.strToIntMat(this.bufferedReader, this.sep); } } /** * 从数据输入组件中提取出 int 矩阵数据,一般情况下,这里返回的是一些矩阵元素数据。 *

* From the data input component, double matrix data is increasingly generated. Generally, some matrix element data is returned here. * * @return double[][] */ @Override public double[][] getDouble2Array() { LOGGER.info("getDouble2Array()"); if (this.rowCount > 0) { return ASStr.strToDoubleMat(this.bufferedReader, this.rowCount, this.sep); } else { return ASStr.strToDoubleMat(this.bufferedReader, this.sep); } } /** * 从数据输入组件获取到 DataFrame 对象,该函数有些数据输入组件可能不支持。 *

* Retrieve the DataFrame object from the data input component, which may not be supported by some data input components. * * @return 从数据输入组件中获取到的DataFrame数据封装对象。 *

* The DataFrame data encapsulation object obtained from the data input component. */ @Override public DataFrame getDataFrame() { LOGGER.info("getDataFrame()"); DataFrame select = FDataFrame.select(this.field, pk); try { while (bufferedReader.ready()) { select.insert(FinalSeries.parse(ASStr.splitByChar(bufferedReader.readLine(), sep))); } return select; } catch (IOException e) { throw new OperatorOperationException(e); } } /** * 从数据输入组件获取到 DataFrame 对象,该函数有些数据输入组件可能不支持。 *

* Retrieve the DataFrame object from the data input component, which may not be supported by some data input components. * * @return 从数据输入组件中获取到的DataFrame数据封装对象。 *

* The DataFrame data encapsulation object obtained from the data input component. */ @Override public DataFrame getSFDataFrame() { LOGGER.info("getSFDataFrame()"); DataFrame select = FDataFrame.select(this.field, pk); try { while (bufferedReader.ready()) { select.insert(SingletonSeries.parse(ASStr.splitByChar(bufferedReader.readLine(), sep))); } return select; } catch (IOException e) { throw new OperatorOperationException(e); } } /** * 从数据输入组件中提取出 数据流 对象。 *

* Extract the data flow object from the data input component. * * @return 数据输入流对象 */ @Override public InputStream getInputStream() { LOGGER.info("getInputStream()"); return this.fsDataInputStream; } /** * 从数据输入组件中提取出 图像缓存 对象,需要注意的是,该操作在有些情况下可能不被支持。 *

* Extracting image cache objects from the data input component, it should be noted that this operation may not be supported in some cases. * * @return 图像缓存对象。 */ @Override public BufferedImage getBufferedImage() { LOGGER.info("getBufferedImage()"); try { return ImageIO.read(this.fsDataInputStream); } catch (IOException e) { throw new OperatorOperationException(e); } } /** * Closes this stream and releases any system resources associated * with it. If the stream is already closed then invoking this * method has no effect. * *

As noted in {@link AutoCloseable#close()}, cases where the * close may fail require careful attention. It is strongly advised * to relinquish the underlying resources and to internally * mark the {@code Closeable} as closed, prior to throwing * the {@code IOException}. */ @Override public void close() { LOGGER.info("close()"); ASIO.close(this.bufferedReader); isOpen = false; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy