org.geneweaver.io.reader.ArchiveReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gweaver-stream-io Show documentation
Show all versions of gweaver-stream-io Show documentation
The IO bundle for Geneweaver.
/*-
*
* Copyright 2018, 2020 The Jackson Laboratory Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @author Matthew Gerring
*/
package org.geneweaver.io.reader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Function;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.geneweaver.domain.Entity;
public class ArchiveReader extends AbstractStreamReader {
private Iterator iterator;
private int linesProcessed;
@Override
public Stream stream() throws ReaderException {
try {
this.linesProcessed = 0;
this.iterator = createIterator(false);
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false);
} catch (IOException e) {
throw new ReaderException(e);
}
}
public List wind() throws ReaderException {
try {
if (iterator==null) this.iterator = createIterator(false);
StreamReader reader = ((StreamIterator)iterator).getActiveReader();
return reader.wind();
} catch (IOException e) {
throw new ReaderException(e);
}
}
@Override
public boolean isEmpty() {
if (iterator==null) return false;
try {
StreamReader reader = ((StreamIterator)iterator).getActiveReader();
return reader.isEmpty();
} catch (IOException | ReaderException e) {
return true;
}
}
private Iterator createIterator(boolean closeStream) throws IOException, ReaderException {
String name = request.name();
if (name.toLowerCase().endsWith(".zip")) {
return new ZipIterator(request.stream());
} else if (name.toLowerCase().endsWith(".tar")) {
return new TarIterator(request.stream());
} else {
throw new IllegalArgumentException("Cannot find archive reader for "+name);
}
}
@Override
public Function> getDefaultConnector() {
throw new IllegalArgumentException("An archive may contain different files of different types!");
}
@Override
public int linesProcessed() {
return linesProcessed;
}
@Override
public boolean isDataSource() {
return request.isFileRequest();
}
@Override
public void close() throws IOException {
if (iterator!=null && iterator instanceof Closeable) {
((Closeable)iterator).close();
}
}
private abstract class StreamIterator implements Iterator, Closeable {
protected InputStream parent;
protected Iterator currentIterator;
protected AbstractStreamReader reader;
public StreamIterator(InputStream in) throws IOException {
this.parent = in;
}
public StreamReader getActiveReader() throws IOException, ReaderException {
if (reader!=null && !reader.isEmpty()) return reader;
nextIterator(); // If stream exhausted, see if there is another file.
return reader;
}
@Override
public boolean hasNext() {
if (currentIterator==null) return false;
boolean more = currentIterator.hasNext();
if (!more) {
linesProcessed+=reader.linesProcessed();
}
return more;
}
@Override
public T next() {
if (currentIterator==null) return null;
T next = currentIterator.next();
if (!currentIterator.hasNext()) {
linesProcessed+=reader.linesProcessed();
try {
currentIterator = nextIterator();
} catch (IOException | ReaderException e) {
throw new RuntimeException(e);
}
}
return next;
}
protected abstract Iterator nextIterator() throws IOException, ReaderException;
protected boolean isEntryValid(String name) {
if (name==null) return false;
if (request.getFileFilter()!=null) {
if (!name.matches(request.getFileFilter())) {
return false;
}
}
return true;
}
public void close() throws IOException {
parent.close();
}
}
/**
* Kind of an interator but remove() does not work.
*
* @author gerrim
*
*/
private class TarIterator extends StreamIterator {
private TarArchiveInputStream tstream;
public TarIterator(InputStream in) throws IOException, ReaderException {
super(new TarArchiveInputStream(in));
this.tstream = (TarArchiveInputStream)parent;
this.currentIterator = nextIterator();
}
@Override
protected Iterator nextIterator() throws IOException, ReaderException {
TarArchiveEntry entry = tstream.getNextTarEntry();
if (entry==null) return null;
while(!isEntryValid(entry.getName())) {
entry = tstream.getNextTarEntry();
if (entry==null) return null;
}
this.reader = ReaderFactory.getReader(new ReaderRequest(tstream, entry.getName(), false));
reader.setChunkSize(getChunkSize());
reader.setEntryName(entry.getName());
return reader.stream().iterator();
}
}
/**
* Kind of an iterator but remove() does not work.
*
* @author gerrim
*
*/
private class ZipIterator extends StreamIterator {
private ZipInputStream zstream;
public ZipIterator(InputStream in) throws IOException, ReaderException {
super(new ZipInputStream(in));
this.zstream = (ZipInputStream)parent;
this.currentIterator = nextIterator();
}
@Override
protected Iterator nextIterator() throws IOException, ReaderException {
ZipEntry entry = zstream.getNextEntry();
if (entry==null) return null;
while(!isEntryValid(entry.getName())) {
entry = zstream.getNextEntry();
if (entry==null) return null;
}
this.reader = ReaderFactory.getReader(new ReaderRequest(zstream, entry.getName(), false));
reader.setChunkSize(getChunkSize());
reader.setEntryName(entry.getName());
return reader.stream().iterator();
}
}
}