![JAR search and dependency download from the Maven repository](/logo.png)
src.it.unimi.dsi.big.mg4j.util.TermMap Maven / Gradle / Ivy
Show all versions of mg4j-big Show documentation
package it.unimi.dsi.big.mg4j.util;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2010-2011 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.fastutil.objects.AbstractObject2ObjectMap;
import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.fastutil.objects.AbstractObjectSet;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.fastutil.objects.ObjectSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.io.ByteArrayPostingList;
import it.unimi.dsi.big.mg4j.tool.Scan;
import java.util.Collection;
import java.util.NoSuchElementException;
/** A hash map from {@linkplain MutableString mutable strings} to
* {@linkplain ByteArrayPostingList byte-array posting lists} used
* by {@link Scan} for indexing.
*
* The map implemented by this class is very simple, and does not support {@link Collection#remove(Object)}.
* It uses
*
*/
public class TermMap extends AbstractObject2ObjectMap {
private static final long serialVersionUID = 1L;
private MutableString[] key;
private ByteArrayPostingList[] value;
private int mask;
private int size;
private int length;
private final static long PRIME = ( 1L << 61 ) - 1;
private final static long LOW = ( 1L << 32 ) - 1;
private final static long HIGH = LOW << 32;
private final static long multAdd( int x, long a, long b ) {
final long a0 = ( a & LOW ) * x;
final long a1 = ( a & HIGH ) * x;
final long c0 = a0 + ( a1 << 32 );
final long c1 = ( a0 >>> 32 ) + a1;
return ( c0 & PRIME ) + ( c1 >>> 29 ) + b;
}
private final static long rehash( final int x ) {
// TODO: we should really use tabulation-based 5-way independent hashing.
final long h = multAdd( x, 104659742703825433L, 8758810104009432107L );
return ( h & PRIME ) + ( h >>> 61 );
}
public TermMap() {
length = 1024;
mask = length - 1;
key = new MutableString[ length ];
value = new ByteArrayPostingList[ length ];
}
private int findPos( final MutableString k, final int hash, final int rehash ) {
int pos = rehash & mask;
MutableString key[] = this.key, s;
//int i = 0;
while( ( s = key[ pos ] ) != null && ( s.hashCode() != hash || ! s.equals( k ) ) ) {
pos = ( pos + 1 ) & mask;
//i++;
}
//System.err.println( i );
return pos;
}
@Override
public void clear() {
length = 1024;
mask = length - 1;
size = 0;
key = new MutableString[ length ];
value = new ByteArrayPostingList[ length ];
}
@Override
public ObjectSet keySet() {
return new AbstractObjectSet() {
@Override
public ObjectIterator iterator() {
return new AbstractObjectIterator() {
private int i = 0;
private int pos = -1;
@Override
public boolean hasNext() {
return i < size;
}
@Override
public MutableString next() {
if ( ! hasNext() ) throw new NoSuchElementException();
while( key[ ++pos ] == null );
i++;
return key[ pos ];
}
};
}
@Override
public boolean contains( Object o ) {
return get( o ) != null;
}
@Override
public int size() {
return size;
}
};
}
@Override
public ObjectSet values() {
return new AbstractObjectSet() {
@Override
public ObjectIterator iterator() {
return new AbstractObjectIterator() {
private int i = 0;
private int pos = -1;
@Override
public boolean hasNext() {
return i < size;
}
@Override
public ByteArrayPostingList next() {
if ( ! hasNext() ) throw new NoSuchElementException();
while( key[ ++pos ] == null );
i++;
return value[ pos ];
}
};
}
// TODO
@Override
public boolean contains( Object o ) {
throw new UnsupportedOperationException();
}
@Override
public int size() {
return size;
}
};
}
@Override
public ByteArrayPostingList put( MutableString k, ByteArrayPostingList v ) {
int hash = k.hashCode();
int pos = findPos( k, hash, (int)rehash( hash ) );
if ( key[ pos ] != null ) return value[ pos ];
size++;
key[ pos ] = k;
value[ pos ] = v;
if ( size * 4 / 3 > length ) {
length *= 2;
mask = length - 1;
final MutableString newKey[] = new MutableString[ length ];
final ByteArrayPostingList[] newValue = new ByteArrayPostingList[ length ];
final MutableString[] key = this.key;
final ByteArrayPostingList[] value = this.value;
for( int i = key.length; i-- != 0; ) {
if ( key[ i ] != null ) {
hash = key[ i ].hashCode();
pos = (int)rehash( hash ) & mask;
while( newKey[ pos ] != null ) pos = ( pos + 1 ) & mask;
newKey[ pos ] = key[ i ];
newValue[ pos ] = value[ i ];
}
}
this.key = newKey;
this.value = newValue;
}
return null;
}
@Override
public int size() {
return size;
}
@Override
public ObjectSet> object2ObjectEntrySet() {
return new AbstractObjectSet>() {
@Override
public ObjectIterator> iterator() {
return new AbstractObjectIterator>() {
private int i = 0;
private int pos = -1;
@Override
public boolean hasNext() {
return i < size;
}
@Override
public Entry next() {
if ( ! hasNext() ) throw new NoSuchElementException();
while( key[ ++pos ] == null );
i++;
return new BasicEntry( key[ pos ], value[ pos ] );
}
};
}
@SuppressWarnings("unchecked")
@Override
public boolean contains( Object o ) {
it.unimi.dsi.fastutil.objects.Object2ObjectMap.Entry e = (it.unimi.dsi.fastutil.objects.Object2ObjectMap.Entry)o;
final ByteArrayPostingList byteArrayPostingList = get( e.getKey() );
return byteArrayPostingList == e.getValue();
}
@Override
public int size() {
return size;
}
};
}
@Override
public ByteArrayPostingList get( final Object k ) {
final MutableString s = (MutableString)k;
final int hash = s.hashCode();
final int pos = findPos( s, hash, (int)rehash( hash ) );
return key[ pos ] != null ? value[ pos ] : null;
}
}