All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.bits.PrefixCoderTransformationStrategy Maven / Gradle / Ivy

Go to download

The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.

There is a newer version: 2.7.3
Show newest version
package it.unimi.dsi.bits;

/*
 * DSI utilities
 *
 * Copyright (C) 2007-2019 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.compression.PrefixCodec;
import it.unimi.dsi.compression.PrefixCoder;
import it.unimi.dsi.fastutil.chars.Char2IntMap;
import it.unimi.dsi.fastutil.chars.Char2IntOpenHashMap;

/** A transformation strategy mapping strings using a {@linkplain PrefixCodec prefix-free encoder}.
 *
 * 

The actual encoding must be provided via a map from characters to symbols, and a set * of codewords. The default return value of the map will be used for unknown characters. * *

This strategy creates a new {@link LongArrayBitVector} each time {@link #toBitVector(CharSequence)} is invoked. */ public class PrefixCoderTransformationStrategy implements TransformationStrategy { private static final long serialVersionUID = 1; protected final BitVector[] codeWord; protected final Char2IntOpenHashMap char2symbol; protected final boolean prefixFree; /** Create a new transformation strategy based on a prefix-free coder. * * @param coder a prefix-free coder. * @param char2symbol a map from character to symbols (the default returned value will be used for unknown symbols). * @param prefixFree whether it is required that the resulting bit vectors are prefix-free: in this case, symbol 0 will * be appended to each string, and will not be allowed to appear in any string. */ public PrefixCoderTransformationStrategy(final PrefixCoder coder, final Char2IntOpenHashMap char2symbol, final boolean prefixFree) { this(coder.codeWords(), char2symbol, prefixFree); } protected PrefixCoderTransformationStrategy(final PrefixCoderTransformationStrategy transformationStrategy) { this(transformationStrategy.codeWord, transformationStrategy.char2symbol, transformationStrategy.prefixFree); } protected PrefixCoderTransformationStrategy(final BitVector[] codeWord, final Char2IntOpenHashMap char2symbol, final boolean prefixFree) { this.codeWord = codeWord; this.char2symbol = char2symbol; this.prefixFree = prefixFree; } @Override public LongArrayBitVector toBitVector(final CharSequence s) { final BitVector[] codeWord = this.codeWord; final Char2IntMap char2symbol = this.char2symbol; final int length = s.length(); int numBits = (int) (prefixFree ? codeWord[0].length() : 0); for(int i = length; i-- != 0;) numBits += codeWord[char2symbol.get(s.charAt(i))].length(); final LongArrayBitVector result = LongArrayBitVector.getInstance(numBits); for(int i = 0; i < s.length(); i++) result.append(codeWord[char2symbol.get(s.charAt(i))]); if (prefixFree) result.append(codeWord[0]); return result; } @Override public long length(final CharSequence s) { final BitVector[] codeWord = this.codeWord; final Char2IntMap char2symbol = this.char2symbol; final int length = s.length(); int numBits = (int) (prefixFree ? codeWord[0].length() : 0); for(int i = length; i-- != 0;) numBits += codeWord[char2symbol.get(s.charAt(i))].length(); return numBits; } @Override public long numBits() { long numBits = 0; for(int i = codeWord.length; i-- != 0;) numBits += codeWord[i].length(); return numBits; } @Override public PrefixCoderTransformationStrategy copy() { return new PrefixCoderTransformationStrategy(this); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy