com.mayabot.nlp.common.BufferedReaderLFCR Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mynlp Show documentation
Show all versions of mynlp Show documentation
Maya Nlp subproject :mynlp
package com.mayabot.nlp.common;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
/**
* 返回行,但是包含边界
*/
public class BufferedReaderLFCR implements Closeable, ParagraphReader {
private Reader in;
private char cb[];
private int nChars, nextChar;
private static final int INVALIDATED = -2;
private static final int UNMARKED = -1;
private int markedChar = UNMARKED;
private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
private static int defaultCharBufferSize = 8192;
private static int defaultExpectedLineLength = 80;
/**
* Creates a buffering character-input stream that uses an input buffer of
* the specified size.
*
* @param in A Reader
* @param sz Input-buffer size
* @throws IllegalArgumentException If {@code sz <= 0}
*/
public BufferedReaderLFCR(Reader in, int sz) {
if (sz <= 0) {
throw new IllegalArgumentException("Buffer size <= 0");
}
this.in = in;
cb = new char[sz];
nextChar = nChars = 0;
}
/**
* Creates a buffering character-input stream that uses a default-sized
* input buffer.
*
* @param in A Reader
*/
public BufferedReaderLFCR(Reader in) {
this(in, defaultCharBufferSize);
}
/**
* Checks to make sure that the stream has not been closed
*/
private void ensureOpen() throws IOException {
if (in == null) {
throw new IOException("Stream closed");
}
}
/**
* Fills the input buffer, taking the mark into account if it is valid.
*/
private void fill() throws IOException {
int dst;
if (markedChar <= UNMARKED) {
/* No mark */
dst = 0;
} else {
/* Marked */
int delta = nextChar - markedChar;
if (delta >= readAheadLimit) {
/* Gone past read-ahead limit: Invalidate mark */
markedChar = INVALIDATED;
readAheadLimit = 0;
dst = 0;
} else {
if (readAheadLimit <= cb.length) {
/* Shuffle in the current buffer */
System.arraycopy(cb, markedChar, cb, 0, delta);
markedChar = 0;
dst = delta;
} else {
/* Reallocate buffer to accommodate read-ahead limit */
char ncb[] = new char[readAheadLimit];
System.arraycopy(cb, markedChar, ncb, 0, delta);
cb = ncb;
markedChar = 0;
dst = delta;
}
nextChar = nChars = delta;
}
}
int n;
do {
n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
if (n > 0) {
nChars = dst + n;
nextChar = dst;
}
}
/**
* Reads a line of text. A line is considered to be terminated by any one
* of a line feed ('\n'), a carriage return ('\r'), or a carriage return
* followed immediately by a linefeed.
*
* @param ignoreLF If true, the next '\n' will be skipped
* @return A String containing the contents of the line, not including
* any line-termination characters, or null if the end of the
* stream has been reached
* @throws IOException If an I/O error occurs
* @see java.io.LineNumberReader#readLine()
*/
public String readLine(boolean ignoreLF) throws IOException {
StringBuffer s = null;
int startChar;
ensureOpen();
// boolean omitLF = ignoreLF || skipLF;
bufferLoop:
for (; ; ) {
if (nextChar >= nChars) {
fill();
}
if (nextChar >= nChars) { /* EOF */
if (s != null && s.length() > 0) {
return s.toString();
} else {
return null;
}
}
boolean eol = false;
char c = 0;
int i;
// /* Skip a leftover '\n', if necessary */
// if (omitLF && (cb[nextChar] == '\n'))
// nextChar++;
// skipLF = false;
// omitLF = false;
charLoop:
for (i = nextChar; i < nChars; i++) {
c = cb[i];
if ((c == '\n') || (c == '\r') || (c == '。')) {
eol = true;
break charLoop;
}
}
startChar = nextChar;
nextChar = i;
if (eol) {
String str;
if (s == null) {
str = new String(cb, startChar, i - startChar + 1);
} else {
s.append(cb, startChar, i - startChar + 1);
str = s.toString();
}
nextChar++;
// if (c == '\r') {
// skipLF = true;
// }
return str;
}
if (s == null) {
s = new StringBuffer(defaultExpectedLineLength);
}
s.append(cb, startChar, i - startChar);
}
}
/**
* Reads a line of text. A line is considered to be terminated by any one
* of a line feed ('\n'), a carriage return ('\r'), or a carriage return
* followed immediately by a linefeed.
*
* @return A String containing the contents of the line, not including
* any line-termination characters, or null if the end of the
* stream has been reached
* @throws IOException If an I/O error occurs
*/
public String readLine() throws IOException {
return readLine(false);
}
@Override
public void close() throws IOException {
if (in == null) {
return;
}
try {
in.close();
} finally {
in = null;
cb = null;
}
}
public Stream lines() {
Iterator iter = new Iterator() {
String nextLine = null;
@Override
public boolean hasNext() {
if (nextLine != null) {
return true;
} else {
try {
nextLine = readLine();
return (nextLine != null);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
@Override
public String next() {
if (nextLine != null || hasNext()) {
String line = nextLine;
nextLine = null;
return line;
} else {
throw new NoSuchElementException();
}
}
};
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
iter, Spliterator.ORDERED | Spliterator.NONNULL), false);
}
@Override
public String next() throws IOException {
return readLine();
}
}