All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.util.TermMap Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.util;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2010-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.AbstractObject2ObjectMap;
import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.fastutil.objects.AbstractObjectSet;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.fastutil.objects.ObjectSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.io.ByteArrayPostingList;
import it.unimi.dsi.big.mg4j.tool.Scan;

import java.util.Collection;
import java.util.NoSuchElementException;

/** A hash map from {@linkplain MutableString mutable strings} to 
 * {@linkplain ByteArrayPostingList byte-array posting lists} used
 * by {@link Scan} for indexing.
 * 
 * 

The map implemented by this class is very simple, and does not support {@link Collection#remove(Object)}. * It uses * */ public class TermMap extends AbstractObject2ObjectMap { private static final long serialVersionUID = 1L; private MutableString[] key; private ByteArrayPostingList[] value; private int mask; private int size; private int length; private final static long PRIME = ( 1L << 61 ) - 1; private final static long LOW = ( 1L << 32 ) - 1; private final static long HIGH = LOW << 32; private final static long multAdd( int x, long a, long b ) { final long a0 = ( a & LOW ) * x; final long a1 = ( a & HIGH ) * x; final long c0 = a0 + ( a1 << 32 ); final long c1 = ( a0 >>> 32 ) + a1; return ( c0 & PRIME ) + ( c1 >>> 29 ) + b; } private final static long rehash( final int x ) { // TODO: we should really use tabulation-based 5-way independent hashing. final long h = multAdd( x, 104659742703825433L, 8758810104009432107L ); return ( h & PRIME ) + ( h >>> 61 ); } public TermMap() { length = 1024; mask = length - 1; key = new MutableString[ length ]; value = new ByteArrayPostingList[ length ]; } private int findPos( final MutableString k, final int hash, final int rehash ) { int pos = rehash & mask; MutableString key[] = this.key, s; //int i = 0; while( ( s = key[ pos ] ) != null && ( s.hashCode() != hash || ! s.equals( k ) ) ) { pos = ( pos + 1 ) & mask; //i++; } //System.err.println( i ); return pos; } @Override public void clear() { length = 1024; mask = length - 1; size = 0; key = new MutableString[ length ]; value = new ByteArrayPostingList[ length ]; } @Override public ObjectSet keySet() { return new AbstractObjectSet() { @Override public ObjectIterator iterator() { return new AbstractObjectIterator() { private int i = 0; private int pos = -1; @Override public boolean hasNext() { return i < size; } @Override public MutableString next() { if ( ! hasNext() ) throw new NoSuchElementException(); while( key[ ++pos ] == null ); i++; return key[ pos ]; } }; } @Override public boolean contains( Object o ) { return get( o ) != null; } @Override public int size() { return size; } }; } @Override public ObjectSet values() { return new AbstractObjectSet() { @Override public ObjectIterator iterator() { return new AbstractObjectIterator() { private int i = 0; private int pos = -1; @Override public boolean hasNext() { return i < size; } @Override public ByteArrayPostingList next() { if ( ! hasNext() ) throw new NoSuchElementException(); while( key[ ++pos ] == null ); i++; return value[ pos ]; } }; } // TODO @Override public boolean contains( Object o ) { throw new UnsupportedOperationException(); } @Override public int size() { return size; } }; } @Override public ByteArrayPostingList put( MutableString k, ByteArrayPostingList v ) { int hash = k.hashCode(); int pos = findPos( k, hash, (int)rehash( hash ) ); if ( key[ pos ] != null ) return value[ pos ]; size++; key[ pos ] = k; value[ pos ] = v; if ( size * 4 / 3 > length ) { length *= 2; mask = length - 1; final MutableString newKey[] = new MutableString[ length ]; final ByteArrayPostingList[] newValue = new ByteArrayPostingList[ length ]; final MutableString[] key = this.key; final ByteArrayPostingList[] value = this.value; for( int i = key.length; i-- != 0; ) { if ( key[ i ] != null ) { hash = key[ i ].hashCode(); pos = (int)rehash( hash ) & mask; while( newKey[ pos ] != null ) pos = ( pos + 1 ) & mask; newKey[ pos ] = key[ i ]; newValue[ pos ] = value[ i ]; } } this.key = newKey; this.value = newValue; } return null; } @Override public int size() { return size; } @Override public ObjectSet> object2ObjectEntrySet() { return new AbstractObjectSet>() { @Override public ObjectIterator> iterator() { return new AbstractObjectIterator>() { private int i = 0; private int pos = -1; @Override public boolean hasNext() { return i < size; } @Override public Entry next() { if ( ! hasNext() ) throw new NoSuchElementException(); while( key[ ++pos ] == null ); i++; return new BasicEntry( key[ pos ], value[ pos ] ); } }; } @SuppressWarnings("unchecked") @Override public boolean contains( Object o ) { it.unimi.dsi.fastutil.objects.Object2ObjectMap.Entry e = (it.unimi.dsi.fastutil.objects.Object2ObjectMap.Entry)o; final ByteArrayPostingList byteArrayPostingList = get( e.getKey() ); return byteArrayPostingList == e.getValue(); } @Override public int size() { return size; } }; } @Override public ByteArrayPostingList get( final Object k ) { final MutableString s = (MutableString)k; final int hash = s.hashCode(); final int pos = findPos( s, hash, (int)rehash( hash ) ); return key[ pos ] != null ? value[ pos ] : null; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy