All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dell.doradus.utilities.BigSet Maven / Gradle / Ivy

/*
 * Copyright (C) 2014 Dell, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dell.doradus.utilities;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import com.dell.doradus.olap.io.BSTR;
import com.dell.doradus.search.util.HeapSort;

/**
 * Implements a set of byte sequences stored in temporary files.
 * The set may be in READ or WRITE state. In WRITE state it is possible to add
 * new values; when switched to READ state it is not possible to add data anymore.
 * Iteration of data in READ state is in increasing order.
 * Data are deleted completely after the last value is read or the "add" call
 * switches to WRITE state. 
 */
public class BigSet implements Iterable {
	private enum Status {
		READ,	// READ state
		WRITE,	// WRITE state
		NONE	// Initial state (no data)
	}
	
	private Status m_status = Status.NONE;	// Current state
	private final int m_cacheSize;			// Number of records to store in memory
	private final String m_fileName;		// Data files name prefix
	private int m_nextNo = 1;				// Next file to write data into number
	private List m_list;				// Memory buffer
	
	/**
	 * Constructor creates a BigSet with a given file prefix and memory buffer size
	 * (in records). Actual files will get a name <fileName>_number.
	 * 
	 * @param fileName	File name prefix
	 * @param cacheSize	Memory buffer size (in records)
	 */
	public BigSet(String fileName, int cacheSize) {
		m_fileName = fileName;
		m_cacheSize = cacheSize;
	}

	@Override
	public Iterator iterator() {
		openRead();
		return new BigSetIterator();
	}
	
	/**
	 * Adding a new byte sequence to the set.
	 * 
	 * @param value
	 */
	public void add(byte[] value) {
		add(new BSTR(value));
	}

	/**
	 * Adding a string to the set.
	 * @param value
	 */
	public void add(String value) {
		add(new BSTR(value));
	}
	
	/**
	 * Adding a wrapped byte sequence in the set.
	 * @param value
	 */
	public void add(BSTR value) {
		openWrite();
		if(m_list.size() >= m_cacheSize) flushPartial();
		m_list.add(value);
	}
	
	/**
	 * Flushing and closing the files.
	 */
	public void close() {
		if(m_list.size() > 0) {
			flushPartial();
		}
	}
	
	/**
	 * Deleting data files.
	 */
	public void delete() {
		for(int i = 1; i < m_nextNo; i++) {
			new File(m_fileName + "_" + i).delete();
		}
		m_nextNo = 1;
	}
	
	//--------------------- PRIVATE ------------------------
	
	/**
	 * File of BSTRs as Iterable
	 */
	public static class BSTRFileIterable implements Iterable {
		final String m_fileName;
		
		public BSTRFileIterable(String fileName) {
			m_fileName = fileName;
		}
		
		/**
		 * Sequential read iterator implementation
		 */
		public static class BSTRFileIterator implements Iterator {
			private FInputStream m_input;		// File to read from
			private BSTR m_bstr = new BSTR();	// Next BSTR (just read)
			
			public BSTRFileIterator(String fileName) {
				m_input = new FInputStream(fileName);
			}
			
			public void close() { m_input.close(); }

			@Override
			public boolean hasNext() {
				if(m_input == null) return false;
				else if(m_input.end()) {
					m_input.close();
					m_input = null;
					return false;
				} else return true;
			}

			@Override
			public BSTR next() { 
				m_input.readVString(m_bstr);
				return new BSTR(m_bstr);
			}

			@Override
			public void remove() {
				throw new UnsupportedOperationException("remove not supported");
			}
		}
		
		@Override
		public Iterator iterator() {
			return new BSTRFileIterator(m_fileName);
		}
		
	}
	
	private class BigSetIterator implements Iterator {
		BSTR m_last = null;
		Iterator m_iterator;
		
		public BigSetIterator() {
			HeapSort heapSort = new HeapSort(); 
			for(int i = 1; i < m_nextNo; i++) {
				heapSort.add(new BSTRFileIterable(m_fileName + "_" + i));
			}
			
			m_iterator = heapSort.iterator();
			while (m_iterator.hasNext()) {
				BSTR next = m_iterator.next();
				if (!next.equals(m_last)) {
					m_last = next;
					break;
				}
			}
			
		}

		@Override
		public boolean hasNext() {
			return m_last != null;
		}

		@Override
		public BSTR next() {
			BSTR result = m_last;
			while (m_iterator.hasNext()) {
				BSTR next = m_iterator.next();
				if (!next.equals(result)) {
					m_last = next;
					return result;
				}
			}
			m_last = null;
			return result;
		}

		@Override
		public void remove() {
			throw new RuntimeException("remove not supported");
		}
		
	}
	

	private void flushPartial() {
		FOutputStream output = new FOutputStream(m_fileName + "_" + m_nextNo);
		Collections.sort(m_list);
		for(BSTR val : m_list) output.writeVString(val);
		output.close();
		m_nextNo++;
		m_list.clear();
	}

	private void openWrite() {
		if (m_status != Status.WRITE) {
			if (m_status == Status.READ) {
				delete();
			}
			m_list = new ArrayList(m_cacheSize);
			m_status = Status.WRITE;
		}
	}
	
	private void openRead() {
		if (m_status == Status.WRITE) {
			close();
		}
		m_status = Status.READ;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy