All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.format.cdx.MultiCDXInputSource Maven / Gradle / Ivy

There is a newer version: 1.1.9
Show newest version
package org.archive.format.cdx;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.logging.Logger;

import org.archive.format.gzip.zipnum.ZipNumIndex;
import org.archive.format.gzip.zipnum.ZipNumParams;
import org.archive.util.iterator.CloseableCompositeIterator;
import org.archive.util.iterator.CloseableIterator;
import org.archive.util.iterator.SortedCompositeIterator;

public class MultiCDXInputSource implements CDXInputSource {

	private final static Logger LOGGER = Logger.getLogger(MultiCDXInputSource.class.getName());
	
	protected List cdx;
	
	public List getCdx() {
		return cdx;
	}

	public void setCdx(List cdx) {
		this.cdx = cdx;
	}
	
	public void setCdxUris(List cdxUris) throws IOException {
		cdx = new ArrayList();
		
		for (String uri : cdxUris) {
			if (uri.endsWith(".cdx") || uri.endsWith(".cdx.gz")) {
				cdx.add(new CDXFile(uri));
			} else if (uri.endsWith("ALL.summary") && uri.contains("/")) {
				cdx.add(ZipNumIndex.createIndexWithSummaryPath(uri));
			} else {
				//Skipping?
			}
		}
	}


	public final static Comparator defaultComparator = new Comparator() {
		public int compare(String s1, String s2) {
			return s1.compareTo(s2);
		}
	};
	
	public final static Comparator defaultReverseComparator = new Comparator() {
		public int compare(String s1, String s2) {
			return -s1.compareTo(s2);
		}
	};
	
	protected Comparator comparator = defaultComparator;
	protected Comparator reverseComparator = defaultReverseComparator;	
		
	
	public CloseableIterator getCDXIterator(String key, String prefix, boolean exact, ZipNumParams params) throws IOException {
		
		SortedCompositeIterator scitr = new SortedCompositeIterator(cdx.size(), params.isReverse() ? reverseComparator : comparator);
		
		CloseableIterator iter = null;
		
		for (CDXInputSource cdxReader : cdx) {
			try {
				iter = cdxReader.getCDXIterator(key, prefix, exact, params);
				scitr.addIterator(iter);
			} catch (IOException io) {
				LOGGER.warning(io.toString());
			}
		}
		
		return scitr;
	}
	
	// A special iterator which initializes on actual first use
	protected static class LazyInitIterator implements CloseableIterator
	{
		CDXInputSource source;
		CloseableIterator iter;
		boolean failed = false;
		
		String key, start, end;
		ZipNumParams params;
		
		protected LazyInitIterator(CDXInputSource source, String key, String start, String end, ZipNumParams params)
		{
			this.key = key;
			this.start = start;
			this.end = end;
			
			this.params = params;
			
			this.source = source;
		}
		
		protected void initIter()
		{
			if (iter != null) {
				return;
			}
			
			try {
	            iter = source.getCDXIterator(key, start, end, params);
            } catch (IOException io) {
				LOGGER.warning(io.toString());
				iter = null;
            }
		}

		@Override
        public boolean hasNext() {
			initIter();
			
			if (iter == null) {
				return false;
			}
			
			return iter.hasNext();
        }

		@Override
        public String next() {
			initIter();
			
			if (iter == null) {
				return null;
			}
			
			return iter.next();
        }

		@Override
        public void remove() {

        }

		@Override
        public void close() throws IOException {
			if (iter != null) {
				iter.close();
			}
        }		
	}
	
	public CloseableIterator createSeqIterator(String key, String start, String end, ZipNumParams params)
	{
		CloseableCompositeIterator composite = new CloseableCompositeIterator();
		CloseableIterator iter = null;
		
		for (int i = 0; i < cdx.size(); i++) {
			try {
				CDXInputSource cdxReader = cdx.get(i);
				
				if (i == (cdx.size() - 1)) {
					iter = cdxReader.getCDXIterator(key, start, end, params);
				} else {
					iter = new LazyInitIterator(cdxReader, key, start, end, params);
				}
				
				if (!params.isReverse()) {
					composite.addLast(iter);
				} else {
					composite.addFirst(iter);
				}
				
			} catch (IOException io) {
				LOGGER.warning(io.toString());
			}
		}
		
		return composite;
	}
	
	
	public CloseableIterator getCDXIterator(String key, String start, String end, ZipNumParams params) throws IOException {
		
		if (params.isSequential()) {
			return this.createSeqIterator(key, start, end, params);
		}
		
		SortedCompositeIterator scitr = new SortedCompositeIterator(cdx.size(), params.isReverse() ? reverseComparator : comparator);
		
		CloseableIterator iter = null;
		
		for (CDXInputSource cdxReader : cdx) {
			try {
				iter = cdxReader.getCDXIterator(key, start, end, params);
				scitr.addIterator(iter);
			} catch (IOException io) {
				LOGGER.warning(io.toString());
			}
		}
		
		return scitr;
	}

	@Override
    public long getTotalLines() {
		long sum = 0;
		
		for (CDXInputSource cdxReader : cdx) {
			sum += cdxReader.getTotalLines();
		}
		
		return sum;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy