All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unimi.dsi.util.InternedMutableStringSet Maven / Gradle / Ivy

Go to download

Blazegraph Modifications to the DSI utils. This are forked from version 1.10.0 under LGPLv2.1.

There is a newer version: 2.1.4
Show newest version
package it.unimi.dsi.util;

/*		 
 * DSI utilities
 *
 * Copyright (C) 2006-2009 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 2.1 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */

import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
import it.unimi.dsi.io.OutputBitStream;

import java.io.IOException;

/** A set of interned mutable strings.
 *
 * 

This class extends {@link it.unimi.dsi.fastutil.objects.ObjectOpenHashSet} by * providing an {@link #intern(MutableString)} method with a semantics similar to * that of {@link String#intern()}. */ public class InternedMutableStringSet extends ObjectOpenHashSet { public final static class Term extends MutableString { private static final long serialVersionUID = 0L; public int lastDocument; public int lastPosition = -1; public final FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream( 1 ); public final OutputBitStream obs = new OutputBitStream( fbaos, 0 ); public Term( MutableString s ) { super( s ); } public void addOccurrence( int document, int position ) throws IOException { obs.writeDelta( document - lastDocument ); if ( document != lastDocument ) lastPosition = -1; obs.writeDelta( position - lastPosition - 1 ); lastDocument = document; lastPosition = position; } } private static final long serialVersionUID = 0L; private int free; private int p; private int count; private byte[] state; public InternedMutableStringSet() { super(); } public InternedMutableStringSet( final int n, final float f ) { super( n, f ); } public InternedMutableStringSet( final int n ) { super( n ); } /** Returns an interned, canonical copy contained in this set of the specified mutable string. * *

The semantics of this method is essentially the same as that of * {@link java.util.Collection#add(Object)}, but * this method will return a mutable string * equal to s currently in this set. The string will * never be s, as in the case s is * not in this set a {@linkplain MutableString#compact() compact copy} * of s will be stored instead. * *

The purpose of this method is similar to that of {@link String#intern()}, * but obviously here the user has much greater control. * * @param s the mutable string that must be interned. * @return the mutable string equal to s stored in this set. */ public Term intern( final MutableString s ) { // Duplicate code from add()--keep in line! final int i = findInsertionPoint( s ); if ( i < 0 ) return (Term)(key[ -( i + 1 ) ]); if ( state[ i ] == FREE ) free--; state[ i ] = OCCUPIED; final Term t = (Term)( key[ i ] = new Term( s ) ); if ( ++count >= maxFill ) { int newP = Math.min( p + growthFactor(), PRIMES.length - 1 ); // Just to be sure that size changes when p is very small. while( PRIMES[ newP ] == PRIMES[ p ] ) newP++; rehash( newP ); // Table too filled, let's rehash } if ( free == 0 ) rehash( p ); return t; } //Copied from add(...) in the fastutil 6.5.11 private int findInsertionPoint(MutableString k) { int pos = ( (k) == null ? 0x87fcd5c : it.unimi.dsi.fastutil.HashCommon.murmurHash3( (k).hashCode() ^ mask ) ) & mask; // There's always an unused entry. while( used[ pos ] ) { if ( ( (key[ pos ]) == null ? (k) == null : (key[ pos ]).equals(k) ) ) return pos; pos = ( pos + 1 ) & mask; } // TODO Auto-generated method stub return pos; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy