All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.geneweaver.io.reader.ArchiveReader Maven / Gradle / Ivy

/*-
 * 
 * Copyright 2018, 2020  The Jackson Laboratory Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * @author Matthew Gerring
 */
package org.geneweaver.io.reader;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Function;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.geneweaver.domain.Entity;

public class ArchiveReader extends AbstractStreamReader {

	private Iterator iterator;
	private int linesProcessed;

	@Override
	public Stream stream() throws ReaderException {
		try {
			this.linesProcessed = 0;
			this.iterator = createIterator(false);
			return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false);
			
		} catch (IOException e) {
			throw new ReaderException(e);
		}
	}
	
	public List wind() throws ReaderException {

		try {
			if (iterator==null) this.iterator = createIterator(false);
			
			StreamReader reader = ((StreamIterator)iterator).getActiveReader();
			return reader.wind();
			
		} catch (IOException e) {
			throw new ReaderException(e);
		}
	}

	@Override
	public boolean isEmpty() {
		
		if (iterator==null) return false;
		try {
			StreamReader reader = ((StreamIterator)iterator).getActiveReader();
			return reader.isEmpty();
		} catch (IOException | ReaderException e) {
			return true;
		}
	}

	private Iterator createIterator(boolean closeStream) throws IOException, ReaderException {
		
		String name = request.name();
		if (name.toLowerCase().endsWith(".zip")) {
			return new ZipIterator(request.stream());
			
		} else if (name.toLowerCase().endsWith(".tar")) {
			return new TarIterator(request.stream());
		} else {
			throw new IllegalArgumentException("Cannot find archive reader for "+name);
		}
	}


	@Override
	public  Function> getDefaultConnector() {
		throw new IllegalArgumentException("An archive may contain different files of different types!");
	}

	@Override
	public int linesProcessed() {
		return linesProcessed;
	}

	@Override
	public boolean isDataSource() {
		return request.isFileRequest();
	}

	@Override
	public void close() throws IOException {
		if (iterator!=null && iterator instanceof Closeable) {
			((Closeable)iterator).close();
		}
	}

	private abstract class StreamIterator implements Iterator, Closeable {

		protected InputStream parent;
		protected Iterator currentIterator;
		protected AbstractStreamReader reader;

		public StreamIterator(InputStream in) throws IOException {
			this.parent = in;
		}
		
		public StreamReader getActiveReader() throws IOException, ReaderException {
			if (reader!=null && !reader.isEmpty()) return reader;
			nextIterator(); // If stream exhausted, see if there is another file.
			return reader;
		}

		@Override
		public boolean hasNext() {
			if (currentIterator==null) return false;
			boolean more = currentIterator.hasNext();
			if (!more) {
				linesProcessed+=reader.linesProcessed();
			}
			return more;
		}

		@Override
		public T next() {
			if (currentIterator==null) return null;
			
			T next = currentIterator.next();
			
			if (!currentIterator.hasNext()) {
				linesProcessed+=reader.linesProcessed();
				try {
					currentIterator = nextIterator();
				} catch (IOException | ReaderException e) {
					throw new RuntimeException(e);
				}
			}
			
			return next;
		}

		protected abstract Iterator nextIterator() throws IOException, ReaderException;
		
		protected boolean isEntryValid(String name) {
			if (name==null) return false;
			if (request.getFileFilter()!=null) {
				if (!name.matches(request.getFileFilter())) {
					return false;
				}
			}
			return true;
		}

		public void close() throws IOException {
			parent.close();
		}
	}

	/**
	 * Kind of an interator but remove() does not work.
	 * 
	 * @author gerrim
	 *
	 */
	private class TarIterator extends StreamIterator {

		private TarArchiveInputStream tstream;
		
		public TarIterator(InputStream in) throws IOException, ReaderException {
			super(new TarArchiveInputStream(in));
			this.tstream = (TarArchiveInputStream)parent;
			this.currentIterator = nextIterator();
		}
		
		@Override
		protected Iterator nextIterator() throws IOException, ReaderException {
			TarArchiveEntry entry = tstream.getNextTarEntry();
			if (entry==null) return null;
			while(!isEntryValid(entry.getName())) {
				entry = tstream.getNextTarEntry();
				if (entry==null) return null;
			}
			this.reader = ReaderFactory.getReader(new ReaderRequest(tstream, entry.getName(), false));
			reader.setChunkSize(getChunkSize());
			reader.setEntryName(entry.getName());
			return reader.stream().iterator();
		}
	}

	/**
	 * Kind of an iterator but remove() does not work.
	 * 
	 * @author gerrim
	 *
	 */
	private class ZipIterator extends StreamIterator {

		private ZipInputStream zstream;
		public ZipIterator(InputStream in) throws IOException, ReaderException {
			super(new ZipInputStream(in));
			this.zstream = (ZipInputStream)parent;
			this.currentIterator = nextIterator();
		}
		
		@Override
		protected Iterator nextIterator() throws IOException, ReaderException {
			ZipEntry entry = zstream.getNextEntry();
			if (entry==null) return null;
			while(!isEntryValid(entry.getName())) {
				entry = zstream.getNextEntry();
				if (entry==null) return null;
			}
			this.reader = ReaderFactory.getReader(new ReaderRequest(zstream, entry.getName(), false));
			reader.setChunkSize(getChunkSize());
			reader.setEntryName(entry.getName());
			return reader.stream().iterator();
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy