All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.descriptor.DescriptorEncoder Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

package com.actelion.research.chem.descriptor;

/**
 * DescriptorEncoder encodes int[] based descriptors
 * into byte arrays that may be used to instantiate Strings
 */

public class DescriptorEncoder {
    public static final int MAX_COUNT_VALUE = 63;
    private static final int BITS = 6;
    private static final int PAIR_BITS = 4;

    // CODE Strings must contain highest ASCII character as the end
    private static final byte[] sCode = "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".getBytes();
    private static final byte[] sCodeMultipleMin = "!#$%&()*+,-./".getBytes();
    private static final byte[] sCodeMultipleMax = ":;<=>?[]^{|}~".getBytes();
    private static int[]  sDecode,sDecodeMultiple;

    private byte[]  mBytes;
    private int     mByteIndex,mAvailableBits,mTempData,mByteMask;
	private int     mTempDataLong;

    public DescriptorEncoder() {
    	if (sDecode == null) {
    		synchronized(this) {
		        int len = 1 << BITS;
		        assert len <= sCode.length : "Error in encoding, not enough characters.";
		
		        sDecode = new int[sCode[sCode.length-1]+1];
		        for (int i=0; i 0) {
			no >>= 1;
			bits++;
			}
		return bits;
		}

	private void encodeStart(int bitCount) {
        mBytes = new byte[(bitCount + BITS - 1) / BITS];
        mAvailableBits = BITS;
        mByteIndex = 0;
        }

    private void encodeBits(int data, int bits) {
        int mask = (bits == 0) ? 0 : 1 << (bits - 1);
        while (mask != 0) {
            if (mAvailableBits == 0) {
                mBytes[mByteIndex] = sCode[mBytes[mByteIndex]];
                mByteIndex++;
                mAvailableBits = BITS;
                }
            mBytes[mByteIndex] <<= 1;
            if ((data & mask) != 0)
                mBytes[mByteIndex] |= 1;
            mask >>>= 1;
            mAvailableBits--;
            }
        }

	private void encodeBits(long data, int bits) {
		long mask = (bits == 0) ? 0L : 1L << (bits - 1);
		while (mask != 0) {
			if (mAvailableBits == 0) {
				mBytes[mByteIndex] = sCode[mBytes[mByteIndex]];
				mByteIndex++;
				mAvailableBits = BITS;
			}
			mBytes[mByteIndex] <<= 1;
			if ((data & mask) != 0)
				mBytes[mByteIndex] |= 1;
			mask >>>= 1;
			mAvailableBits--;
			}
    	}

	private void encodeBitsEnd() {
        mBytes[mByteIndex] <<= mAvailableBits;
        mBytes[mByteIndex] = sCode[mBytes[mByteIndex]];
        }

    private void decodeStart(byte[] bytes) {
        mBytes = bytes;
        mByteIndex = 0;
        mTempData = sDecode[mBytes[0]];
		mTempDataLong = sDecode[mBytes[0]];
        mByteMask = 1 << (BITS - 1);
        }

    private int decodeBits(int bits) {
        int data = 0;
        while (bits != 0) {
            if (mByteMask == 0) {
                mByteIndex++;
                mTempData = sDecode[mBytes[mByteIndex]];
                mByteMask = 1 << (BITS - 1);
                }
            data <<= 1;
            if ((mTempData & mByteMask) != 0)
                data |= 1;
            mByteMask >>>= 1;
            bits--;
            }
        return data;
        }

	private long decodeBitsLong(int bits) {
		long data = 0L;
		while (bits != 0) {
			if (mByteMask == 0) {
				mByteIndex++;
				mTempDataLong = sDecode[mBytes[mByteIndex]];
				mByteMask = 1 << (BITS - 1);
			}
			data <<= 1;
			if ((mTempDataLong & (long)mByteMask) != 0)
				data |= 1L;
			mByteMask >>>= 1;
			bits--;
			}
		return data;
		}

	private byte[] decodeDuplicateBytes(byte[] bytes) {
        int length = bytes.length;
        for (int i=0; i replacement.length+1) {
                    mBytes[newIndex++] = replacement[replacement.length-1];
                    oldIndex += replacement.length+1;
                    count -= replacement.length+1;
                    }
                if (count > 1) {
                    mBytes[newIndex++] = replacement[count-2];
                    oldIndex += count;
                    continue;
                    }
                }

            mBytes[newIndex++] = mBytes[oldIndex++];
            }
        return newIndex;
        }
    }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy