All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.azbh111.utils.java.string.model.InpuStreamFromString Maven / Gradle / Ivy

The newest version!
package com.github.azbh111.utils.java.string.model;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;

/**
 * 把 CharSequence 包装成 InputStream, 流式读取
 * 避免String.getBytes产生数据拷贝, 适合处理大字符串
 *
 * @author: zyp
 * @since: 2021/12/14 下午1:33
 */
public class InpuStreamFromString extends InputStream {
    private final CharSequence charSequence;
    private int pos;
    private Charset charset;
    private int maxBytesPerChar;
    private CharsetEncoder charsetEncoder;
    private CharBuffer charBuffer;
    private ByteBuffer byteBuffer;
    private boolean flush;

    public InpuStreamFromString(CharSequence charSequence, Charset charset) {
        this.charSequence = charSequence;
        this.charset = charset;
        this.charsetEncoder = charset.newEncoder();
//        读取字符的字符缓冲区
        this.charBuffer = CharBuffer.allocate(2);
        this.maxBytesPerChar = (int) Math.ceil(this.charsetEncoder.maxBytesPerChar());
//        字节缓冲器, 将字符缓冲区的字符编码成byte放在这里
//        可能有emoji, 占用两个char, 所以需要2倍空间
        this.byteBuffer = ByteBuffer.allocate(maxBytesPerChar << 1);
        this.byteBuffer.flip();
    }

    /**
     * 处理一个字符
     */
    private boolean decodeOneChar() throws IOException {
        if (pos >= charSequence.length()) {
//           某些编码在结尾会有特殊数据, 需要flush一下
            if (!flush) {
                charsetEncoder.flush(this.byteBuffer);
                this.byteBuffer.flip();
                flush = true;
                return true;
            }
            return false;
        }
        char char1 = charSequence.charAt(pos++);
        this.charBuffer.put(char1);
        if (Character.isHighSurrogate(char1)) {
            char char2 = charSequence.charAt(pos);
            if (Character.isLowSurrogate(char2)) {
//                处理emoji
                this.charBuffer.put(char2);
                pos++;
            }
        }
        this.charBuffer.flip();
//        编码到字符缓冲区中
        charsetEncoder.encode(this.charBuffer, this.byteBuffer, true);
        this.charBuffer.clear();
        this.byteBuffer.flip();
        return true;
    }

    @Override
    public int read(byte[] b) throws IOException {
        return read(b, 0, b.length);
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
//        先读缓存
        if (len <= 0) {
            return 0;
        }
        int read = 0;
        while (len > read) {
//            先把剩余的数据读了
            read += readBuffer(byteBuffer, b, off + read, len - read);
            if (byteBuffer.hasRemaining()) {
                break; // 读够了
            }
            byteBuffer.clear();
//            读新的数据
            if (!decodeOneChar()) {
//                读完了
                byteBuffer.flip();
                return read == 0 ? -1 : read;
            }
        }
        return read;
    }

    private int readBuffer(ByteBuffer buf, byte[] b, int off, int len) {
        int leftLen = Math.min(buf.remaining(), len);
        int readLen = leftLen;
        while (leftLen-- > 0) {
            b[off++] = buf.get();
        }
        return readLen;
    }

    @Override
    public long skip(long n) throws IOException {
        throw new IOException("skip not supportƒed");
    }

    @Override
    public int read() throws IOException {
        if (byteBuffer.hasRemaining()) {
            return byteBuffer.get();
        }
        byteBuffer.clear();
        if (!decodeOneChar()) {
//                读完了
            return -1;
        }
        return byteBuffer.get();
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy