All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.text.QueryResultTextStream Maven / Gradle / Ivy

The newest version!
package eu.interedition.text;

import com.google.common.base.Predicate;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import eu.interedition.text.simple.SimpleTextRepository;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import javax.annotation.Nullable;

import static eu.interedition.text.Query.and;
import static eu.interedition.text.Query.any;
import static eu.interedition.text.Query.rangeOverlap;
import static eu.interedition.text.Query.text;

/**
 * @author Gregor Middell
 */
public class QueryResultTextStream implements TextStream {

    private final TextRepository repository;
    private final Layer text;
    private final Query query;
    private final long pageSize;

    private QueryResultTextStream(TextRepository repository, Layer text, Query query, long pageSize) {
        this.repository = repository;
        this.text = text;
        this.query = query;
        this.pageSize = pageSize;
    }

    public QueryResultTextStream(TextRepository repository, Layer text, Query query) {
        this(repository, text, query, Long.MAX_VALUE);
    }

    public QueryResultTextStream(TextRepository repository, Layer text) {
        this(repository, text, any());
    }

    @Override
    public void stream(final Listener listener) throws IOException {
        final long contentLength = text.length();
        text.stream(new Text.Consumer() {
            @Override
            public void consume(Reader textReader) throws IOException {
                final SortedMap>> starts = Maps.newTreeMap();
                final SortedMap>> ends = Maps.newTreeMap();

                long offset = 0;
                long next = 0;
                long pageEnd = 0;

                listener.start(contentLength);

                final Set> annotationData = Sets.newHashSet();
                while (true) {
                    if ((offset % pageSize) == 0) {
                        pageEnd = Math.min(offset + pageSize, contentLength);
                        final TextRange pageRange = new TextRange(offset, pageEnd);
                        final QueryResult page = repository.query(and(query, text(text), rangeOverlap(pageRange)));
                        try {
                            for (Layer a : page) {
                                for (Anchor anchor : a.getAnchors()) {
                                    if (!text.equals(anchor.getText())) {
                                        continue;
                                    }
                                    final TextRange range = anchor.getRange();
                                    final long start = range.getStart();
                                    final long end = range.getEnd();
                                    if (start >= offset) {
                                        Set> starting = starts.get(start);
                                        if (starting == null) {
                                            starts.put(start, starting = Sets.newHashSet());
                                        }
                                        starting.add(a);
                                        annotationData.add(a);
                                    }
                                    if (end <= pageEnd) {
                                        Set> ending = ends.get(end);
                                        if (ending == null) {
                                            ends.put(end, ending = Sets.newHashSet());
                                        }
                                        ending.add(a);
                                        annotationData.add(a);
                                    }
                                }
                            }
                        } finally {
                            Closeables.closeQuietly(page);
                        }

                        next = Math.min(starts.isEmpty() ? contentLength : starts.firstKey(), ends.isEmpty() ? contentLength : ends.firstKey());
                    }

                    if (offset == next) {
                        final Set> startEvents = (!starts.isEmpty() && offset == starts.firstKey() ? starts.remove(starts.firstKey()) : Sets.>newHashSet());
                        final Set> endEvents = (!ends.isEmpty() && offset == ends.firstKey() ? ends.remove(ends.firstKey()) : Sets.>newHashSet());

                        final Set> emptyEvents = Sets.newHashSet(Sets.filter(endEvents, emptyIn(text)));
                        endEvents.removeAll(emptyEvents);

                        if (!endEvents.isEmpty()) listener.end(offset, substract(annotationData, endEvents, true));
                        if (!startEvents.isEmpty())
                            listener.start(offset, substract(annotationData, startEvents, false));
                        if (!emptyEvents.isEmpty()) listener.end(offset, substract(annotationData, emptyEvents, true));

                        next = Math.min(starts.isEmpty() ? contentLength : starts.firstKey(), ends.isEmpty() ? contentLength : ends.firstKey());
                    }

                    if (offset == contentLength) {
                        break;
                    }

                    final long readTo = Math.min(pageEnd, next);
                    if (offset < readTo) {
                        final char[] currentText = new char[(int) (readTo - offset)];
                        int read = textReader.read(currentText);
                        if (read > 0) {
                            listener.text(new TextRange(offset, offset + read), new String(currentText, 0, read));
                            offset += read;
                        }
                    }
                }

                listener.end();
            }
        });
    }


    private Predicate> emptyIn(final Layer text) {
        return new SimpleTextRepository.AnyAnchorPredicate(new Predicate>() {
            @Override
            public boolean apply(@Nullable Anchor input) {
                return input.getRange().length() == 0 && text.equals(input.getText());
            }
        });
    }

    private static  Iterable> substract(Iterable> from, Set> selector, boolean remove) {
        final List> filtered = Lists.newArrayList();
        for (Iterator> it = from.iterator(); it.hasNext(); ) {
            final Layer annotation = it.next();
            if (selector.contains(annotation)) {
                filtered.add(annotation);
                if (remove) {
                    it.remove();
                }
            }
        }
        return filtered;
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy