
eu.interedition.text.QueryResultTextStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of text-core Show documentation
Show all versions of text-core Show documentation
Stand-off Markup/Annotation Text Model
The newest version!
package eu.interedition.text;
import com.google.common.base.Predicate;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import eu.interedition.text.simple.SimpleTextRepository;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import javax.annotation.Nullable;
import static eu.interedition.text.Query.and;
import static eu.interedition.text.Query.any;
import static eu.interedition.text.Query.rangeOverlap;
import static eu.interedition.text.Query.text;
/**
* @author Gregor Middell
*/
public class QueryResultTextStream implements TextStream {
private final TextRepository repository;
private final Layer text;
private final Query query;
private final long pageSize;
private QueryResultTextStream(TextRepository repository, Layer text, Query query, long pageSize) {
this.repository = repository;
this.text = text;
this.query = query;
this.pageSize = pageSize;
}
public QueryResultTextStream(TextRepository repository, Layer text, Query query) {
this(repository, text, query, Long.MAX_VALUE);
}
public QueryResultTextStream(TextRepository repository, Layer text) {
this(repository, text, any());
}
@Override
public void stream(final Listener listener) throws IOException {
final long contentLength = text.length();
text.stream(new Text.Consumer() {
@Override
public void consume(Reader textReader) throws IOException {
final SortedMap>> starts = Maps.newTreeMap();
final SortedMap>> ends = Maps.newTreeMap();
long offset = 0;
long next = 0;
long pageEnd = 0;
listener.start(contentLength);
final Set> annotationData = Sets.newHashSet();
while (true) {
if ((offset % pageSize) == 0) {
pageEnd = Math.min(offset + pageSize, contentLength);
final TextRange pageRange = new TextRange(offset, pageEnd);
final QueryResult page = repository.query(and(query, text(text), rangeOverlap(pageRange)));
try {
for (Layer a : page) {
for (Anchor anchor : a.getAnchors()) {
if (!text.equals(anchor.getText())) {
continue;
}
final TextRange range = anchor.getRange();
final long start = range.getStart();
final long end = range.getEnd();
if (start >= offset) {
Set> starting = starts.get(start);
if (starting == null) {
starts.put(start, starting = Sets.newHashSet());
}
starting.add(a);
annotationData.add(a);
}
if (end <= pageEnd) {
Set> ending = ends.get(end);
if (ending == null) {
ends.put(end, ending = Sets.newHashSet());
}
ending.add(a);
annotationData.add(a);
}
}
}
} finally {
Closeables.closeQuietly(page);
}
next = Math.min(starts.isEmpty() ? contentLength : starts.firstKey(), ends.isEmpty() ? contentLength : ends.firstKey());
}
if (offset == next) {
final Set> startEvents = (!starts.isEmpty() && offset == starts.firstKey() ? starts.remove(starts.firstKey()) : Sets.>newHashSet());
final Set> endEvents = (!ends.isEmpty() && offset == ends.firstKey() ? ends.remove(ends.firstKey()) : Sets.>newHashSet());
final Set> emptyEvents = Sets.newHashSet(Sets.filter(endEvents, emptyIn(text)));
endEvents.removeAll(emptyEvents);
if (!endEvents.isEmpty()) listener.end(offset, substract(annotationData, endEvents, true));
if (!startEvents.isEmpty())
listener.start(offset, substract(annotationData, startEvents, false));
if (!emptyEvents.isEmpty()) listener.end(offset, substract(annotationData, emptyEvents, true));
next = Math.min(starts.isEmpty() ? contentLength : starts.firstKey(), ends.isEmpty() ? contentLength : ends.firstKey());
}
if (offset == contentLength) {
break;
}
final long readTo = Math.min(pageEnd, next);
if (offset < readTo) {
final char[] currentText = new char[(int) (readTo - offset)];
int read = textReader.read(currentText);
if (read > 0) {
listener.text(new TextRange(offset, offset + read), new String(currentText, 0, read));
offset += read;
}
}
}
listener.end();
}
});
}
private Predicate> emptyIn(final Layer text) {
return new SimpleTextRepository.AnyAnchorPredicate(new Predicate>() {
@Override
public boolean apply(@Nullable Anchor> input) {
return input.getRange().length() == 0 && text.equals(input.getText());
}
});
}
private static Iterable> substract(Iterable> from, Set> selector, boolean remove) {
final List> filtered = Lists.newArrayList();
for (Iterator> it = from.iterator(); it.hasNext(); ) {
final Layer annotation = it.next();
if (selector.contains(annotation)) {
filtered.add(annotation);
if (remove) {
it.remove();
}
}
}
return filtered;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy