All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.structures.FSOMapFileLexiconOutputStreamGeneric Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is FSOMapFileLexiconOutputStreamGeneric.java.
 *
 * The Original Code is Copyright (C) 2017-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *  Craig Macdonald
 */
package org.terrier.structures;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.WritableComparable;
import org.terrier.structures.collections.FSOrderedMapFile;
import org.terrier.structures.seralization.FixedSizeWriteableFactory;

@SuppressWarnings("unchecked")
public abstract class FSOMapFileLexiconOutputStreamGeneric>
	extends LexiconOutputStream
{

	protected FixedSizeWriteableFactory keyFactory;
	protected final T2 tempKey;
	protected final FSOrderedMapFile.MapFileWriter mapFileWriter;
	protected IndexOnDisk index = null;
	protected String leValueClassname = null;
	protected final String structureName;

	public FSOMapFileLexiconOutputStreamGeneric(String path, String prefix,
			String _structureName, FixedSizeWriteableFactory _keyFactory) throws IOException {
		super();
		this.structureName = _structureName;
		mapFileWriter = FSOrderedMapFile.mapFileWrite(FSOMapFileLexicon.constructFilename(structureName, path, prefix, FSOMapFileLexicon.MAPFILE_EXT));
		keyFactory = _keyFactory;
		tempKey = keyFactory.newInstance();
		
	}

	public FSOMapFileLexiconOutputStreamGeneric(
			IndexOnDisk _index,
			String _structureName,
			Class> valueFactoryClass) throws IOException {
		
		this(_index.getPath(), _index.getPrefix(), _structureName, (FixedSizeWriteableFactory) getKeyFactory(_index, _structureName));
		this.index = _index;
		leValueClassname = valueFactoryClass.getName();
	}

	public FSOMapFileLexiconOutputStreamGeneric(
			IndexOnDisk _index,
			String _structureName,
			FixedSizeWriteableFactory _keyFactory,
			Class> valueFactoryClass) throws IOException {
		this(_index.getPath(), _index.getPrefix(), _structureName, _keyFactory);
		this.index = _index;
		leValueClassname = valueFactoryClass.getName();
	}

	public FSOMapFileLexiconOutputStreamGeneric(IndexOnDisk _index,
			String _structureName, FixedSizeWriteableFactory _keyFactory,
			String valueFactoryClassName) throws IOException {
		this(_index.getPath(), _index.getPrefix(), _structureName, _keyFactory);
		this.index = _index;
		leValueClassname = valueFactoryClassName;
	}

	public FSOMapFileLexiconOutputStreamGeneric(String filename,
			FixedSizeWriteableFactory _keyFactory) throws IOException {
		
		mapFileWriter = FSOrderedMapFile.mapFileWrite(filename);
		structureName = null;
		leValueClassname = null;
		index = null;
		keyFactory = _keyFactory;
		tempKey = keyFactory.newInstance();
	}

	@Override
	public int writeNextEntry(T1 _key, LexiconEntry _value) throws IOException {
		setKey(_key);
		//System.err.println(_key.toString() + " => " + _value.toString());
		mapFileWriter.write(tempKey, _value);
		super.incrementCounters(_value);
		return keyFactory.getSize() /* + TODO */;
	}
	
	protected abstract void setKey(T1 k);
	
	@Override
	public void close() {
		super.close();
		try{
			mapFileWriter.close();
		} catch (Exception ioe) {}
	}
	
	static FixedSizeWriteableFactory getKeyFactory(PropertiesIndex _index, String _structureName) throws IOException
	{
		_index.addIndexStructure(_structureName+"-keyfactory", 
				org.terrier.structures.seralization.FixedSizeTextFactory.class.getName(),
				"java.lang.String", "${max.term.length}");
		_index.flush();
		return (FixedSizeWriteableFactory)_index.getIndexStructure(_structureName+"-keyfactory");
	}
	
	/**
	 * Adds Lexicon to index
	 * @param index
	 * @param structureName
	 * @param leValueClassname
	 */
	public static void addLexiconToIndex(IndexOnDisk index, 
			String structureName,
			Class> lexClass,
			Class> lexInputStreamClass,
			Class> lexEntryInputStreamClass,
			String leValueClassname)
	{
		index.addIndexStructure(
				structureName+"-valuefactory",
				leValueClassname,
				"", "");
		index.addIndexStructure(
				structureName, 
				lexClass.getName(),
				"java.lang.String,org.terrier.structures.IndexOnDisk",
				"structureName,index");
		
		if (lexInputStreamClass != null)
			index.addIndexStructureInputStream(
					structureName, 
					lexInputStreamClass.getName(),
					"java.lang.String,org.terrier.structures.IndexOnDisk",
					"structureName,index");
		if (lexEntryInputStreamClass != null)
			index.addIndexStructureInputStream(
					structureName+"-entry", 
					lexEntryInputStreamClass.getName(),
					"java.lang.String,org.terrier.structures.IndexOnDisk",
					"structureName,index");
	}
	
	/**
	 * Adds Lexicon to index
	 * @param index
	 * @param structureName
	 * @param leValueClassname
	 */
//	public static void addLexiconToIndex(IndexOnDisk index, 
//			String structureName, String leValueClassname)
//	{
//		index.addIndexStructure(
//				structureName+"-valuefactory",
//				leValueClassname,
//				"", "");
//		index.addIndexStructure(
//				structureName, 
//				"org.terrier.structures.FSOMapFileLexicon",
//				"java.lang.String,org.terrier.structures.IndexOnDisk",
//				"structureName,index");
//		index.addIndexStructureInputStream(
//				structureName, 
//				"org.terrier.structures.FSOMapFileLexicon$MapFileLexiconIterator",
//				"java.lang.String,org.terrier.structures.IndexOnDisk",
//				"structureName,index");
//		index.addIndexStructureInputStream(
//				structureName+"-entry", 
//				"org.terrier.structures.FSOMapFileLexiconGeneric$MapFileLexiconEntryIterator",
//				"java.lang.String,org.terrier.structures.IndexOnDisk",
//				"structureName,index");
//	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy