All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.structures.FSOMapFileLexiconGeneric Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is FSOMapFileLexiconGeneric.java.
 *
 * The Original Code is Copyright (C) 2017-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *  Craig Macdonald
 */
package org.terrier.structures;

import java.io.Closeable;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.io.WritableComparable;
import org.terrier.structures.collections.FSOrderedMapFile;
import org.terrier.structures.seralization.FixedSizeWriteableFactory;
import org.terrier.utility.ApplicationSetup;
import org.terrier.utility.Files;
import org.terrier.utility.io.RandomDataInput;
import org.terrier.utility.io.RandomDataInputMemory;
import org.terrier.utility.io.WrappedIOException;

@SuppressWarnings("rawtypes")
public abstract class FSOMapFileLexiconGeneric extends MapLexicon {

	public static final String ID_EXT = ".fsomapid";
	public static final String MAPFILE_EXT = FSOrderedMapFile.USUAL_EXTENSION;

    static class OnDiskLookup implements Id2EntryIndexLookup, java.io.Closeable
    {
        final RandomDataInput lexIdFile;
        protected static final long SIZE_OF_INT = 4;
        public OnDiskLookup(String path, String prefix, String structureName) throws IOException
        {
            lexIdFile = Files.openFileRandom(
            		constructFilename(structureName, path, prefix, ID_EXT));
        }
        
        public int getIndex(int termid) throws IOException
        {
            lexIdFile.seek(SIZE_OF_INT * (long)termid);
            return lexIdFile.readInt();
        }
        
        public void close() throws IOException
        {
            lexIdFile.close();
        }
    }
    
    static class InMemoryLookup implements Id2EntryIndexLookup
    {
        protected final int[] id2index;
        public InMemoryLookup(String path, String prefix, String structureName, int size) 
            throws IOException
        {
        	String filename = constructFilename(structureName, path, prefix, ID_EXT);
            size = (int)(Files.length(filename) / (long)4);
        	DataInputStream lexIdFile = new DataInputStream(Files.openFileStream(filename));
            id2index = new int[size];
            for(int i=0;i implementation,
     * depending on the value of dataSource.
     * 
    *
  1. fileinmem - use a RandomDataInputMemory instance over the file
  2. *
  3. file - use file on disk, as normal.
  4. *
  5. anything else: assume to be a class name, and instantiate using the * expected constructor.
  6. *
* @param * @param filename - filename of mapfile * @param keyFactory - factory to create keys * @param valueFactory - factory to create values * @param dataSource - what type of object to instantiate * @return - the created FSOrderedMapFile * @throws IOException - if any problems occur */ @SuppressWarnings("unchecked") static > FSOrderedMapFile loadMapFile(String filename, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory, String dataSource) throws IOException { if (dataSource.equals("fileinmem")) return new FSOrderedMapFile( new RandomDataInputMemory(filename), filename, keyFactory, valueFactory); if (dataSource.equals("file")) return new FSOrderedMapFile( filename, false, keyFactory, valueFactory); //else, we've been given a class name to instantiate FSOrderedMapFilertr = null; try { Class mapClass = ApplicationSetup.getClass(dataSource).asSubclass(FSOrderedMapFile.class); rtr = (FSOrderedMapFile) mapClass .getConstructor(String.class, Boolean.TYPE, FixedSizeWriteableFactory.class, FixedSizeWriteableFactory.class) .newInstance(filename, false, keyFactory, valueFactory); } catch (Exception e) { throw new WrappedIOException("Could not find a class for FSOMapFileLexicon", e); } return rtr; } @SuppressWarnings("unchecked") public FSOMapFileLexiconGeneric(String structureName, String path, String prefix, FixedSizeWriteableFactory _keyFactory, FixedSizeWriteableFactory _valueFactory, String termIdLookup, String dataFile) throws IOException { /* if dataSource is fileinmem, then the file will be wholly loaded into memory. * file means use on disk. Otherwise use as a class name */ super( loadMapFile( constructFilename(structureName, path, prefix, MAPFILE_EXT), _keyFactory, _valueFactory, dataFile)); this.keyFactory = _keyFactory; if ("aligned".equals(termIdLookup)) { setTermIdLookup(new IdIsIndex()); } else if ("file".equals(termIdLookup)) { setTermIdLookup(new OnDiskLookup(path, prefix, structureName)); } else if ("fileinmem".equals(termIdLookup)) { setTermIdLookup(new InMemoryLookup(path, prefix, structureName, this.map.size())); } else if ("disabled".equals(termIdLookup)) { setTermIdLookup(null); } else { throw new IOException("Unrecognised value ("+termIdLookup+") for termIdlookup for structure "+structureName); } } /** * Constructs a filename * @param structureName * @param path * @param prefix * @param extension * @return filename */ public static String constructFilename(String structureName, String path, String prefix, String extension) { return path + "/"+ prefix +"." + structureName + extension; } /** * An iterator over the lexicon */ public abstract static class MapFileLexiconIterator> implements Iterator>, Closeable { protected Iterator> parent; /** * Construct an instance of the class with * @param structureName * @param index * @throws IOException */ @SuppressWarnings("unchecked") public MapFileLexiconIterator(String structureName, IndexOnDisk index) throws IOException { this( structureName, index.getPath(), index.getPrefix(), (FixedSizeWriteableFactory)index.getIndexStructure(structureName+"-keyfactory"), (FixedSizeWriteableFactory)index.getIndexStructure(structureName+"-valuefactory")); } /** * Construct an instance of the class with * @param structureName * @param path * @param prefix * @param keyFactory * @param valueFactory * @throws IOException */ public MapFileLexiconIterator(String structureName, String path, String prefix, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory) throws IOException { this(constructFilename(structureName, path, prefix, MAPFILE_EXT), keyFactory, valueFactory); } /** * Construct an instance of the class with * @param filename * @param keyFactory * @param valueFactory * @throws IOException */ public MapFileLexiconIterator(String filename, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory) throws IOException { this(new FSOrderedMapFile.EntryIterator(filename, keyFactory, valueFactory)); } /** * Construct an instance of the class with * @param _parent */ public MapFileLexiconIterator(Iterator> _parent) { parent = _parent; } /** * {@inheritDoc} */ public boolean hasNext() { return parent.hasNext(); } /** * {@inheritDoc} */ public void remove() { parent.remove(); } /** * {@inheritDoc} */ public void close() throws IOException { if (parent instanceof Closeable) ((Closeable)parent).close(); } } /** Iterate through the values in order */ public static class MapFileLexiconEntryIterator> implements Iterator, Closeable, Skipable { protected Iterator> internalIterator; /** * Construct an instance of the class with * @param structureName * @param index * @throws IOException */ @SuppressWarnings("unchecked") public MapFileLexiconEntryIterator(String structureName, IndexOnDisk index) throws IOException { this( structureName.replaceFirst("-entry", ""), index.getPath(), index.getPrefix(), (FixedSizeWriteableFactory)index.getIndexStructure(structureName.replaceFirst("-entry", "")+"-keyfactory"), (FixedSizeWriteableFactory)index.getIndexStructure(structureName.replaceFirst("-entry", "")+"-valuefactory")); } /** * Construct an instance of the class with * @param structureName * @param path * @param prefix * @param keyFactory * @param valueFactory * @throws IOException */ public MapFileLexiconEntryIterator(String structureName, String path, String prefix, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory) throws IOException { this(constructFilename(structureName, path, prefix, MAPFILE_EXT), keyFactory, valueFactory); } /** * Construct an instance of the class with * @param filename * @param keyFactory * @param valueFactory * @throws IOException */ public MapFileLexiconEntryIterator(String filename, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory) throws IOException { //this(new FSOrderedMapFile.EntryIterator(filename, keyFactory, valueFactory)); this(getTotalIterator(filename, keyFactory, valueFactory)); } static > Iterator> getTotalIterator(String filename, FixedSizeWriteableFactory keyFactory, FixedSizeWriteableFactory valueFactory) throws IOException { Iterator> rtr = new FSOrderedMapFile.EntryIterator(filename, keyFactory, valueFactory); return rtr; } /** * Construct an instance of the class with * @param _internalIterator */ public MapFileLexiconEntryIterator(Iterator> _internalIterator) { internalIterator = _internalIterator; } /** * {@inheritDoc} */ public boolean hasNext() { return internalIterator.hasNext(); } /** * {@inheritDoc} */ public LexiconEntry next() { return internalIterator.next().getValue(); } /** * {@inheritDoc} */ public void remove() { internalIterator.remove(); } /** * {@inheritDoc} */ public void close() throws IOException { if (internalIterator instanceof java.io.Closeable) ((java.io.Closeable)internalIterator).close(); } /** * {@inheritDoc} */ public void skip(int numEntries) throws IOException { if (numEntries == 0) return; if (! (internalIterator instanceof Skipable)) throw new UnsupportedOperationException("Skipping not supported"); ((Skipable)internalIterator).skip(numEntries); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy