org.apfloat.internal.FloatCarryCRTStepStrategy Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of apfloat Show documentation
High performance arbitrary precision arithmetic library
There is a newer version: 1.14.0
package org.apfloat.internal;

import java.math.BigInteger;

import org.apfloat.ApfloatRuntimeException;
import org.apfloat.spi.CarryCRTStepStrategy;
import org.apfloat.spi.DataStorage;
import static org.apfloat.internal.FloatModConstants.*;

/**
 * Class for performing the final steps of a three-modulus
 * Number Theoretic Transform based convolution. Works for the
 * float type.
 *
 * All access to this class must be externally synchronized.
 *
 * @since 1.7.0
 * @version 1.8.0
 * @author Mikko Tommila
 */

public class FloatCarryCRTStepStrategy
    extends FloatCRTMath
    implements CarryCRTStepStrategy
{
    /**
     * Creates a carry-CRT steps object using the specified radix.
     *
     * @param radix The radix that will be used.
     */

    public FloatCarryCRTStepStrategy(int radix)
    {
        super(radix);
    }

    public float[] crt(DataStorage resultMod0, DataStorage resultMod1, DataStorage resultMod2, DataStorage dataStorage, long size, long resultSize, long offset, long length)
        throws ApfloatRuntimeException
    {
        long skipSize = (offset == 0 ? size - resultSize + 1: 0);   // For the first block, ignore the first 1-3 elements
        long lastSize = (offset + length == size ? 1: 0);           // For the last block, add 1 element
        long nonLastSize = 1 - lastSize;                            // For the other than last blocks, move 1 element
        long subResultSize = length - skipSize + lastSize;

        long subStart = size - offset,
             subEnd = subStart - length,
             subResultStart = size - offset - length + nonLastSize + subResultSize,
             subResultEnd = subResultStart - subResultSize;

        DataStorage.Iterator src0 = resultMod0.iterator(DataStorage.READ, subStart, subEnd),
                             src1 = resultMod1.iterator(DataStorage.READ, subStart, subEnd),
                             src2 = resultMod2.iterator(DataStorage.READ, subStart, subEnd),
                             dst = dataStorage.iterator(DataStorage.WRITE, subResultStart, subResultEnd);

        float[] carryResult = new float[3],
                  sum = new float[3],
                  tmp = new float[3];

        // Preliminary carry-CRT calculation (happens in parallel in multiple blocks)
        for (long i = 0; i < length; i++)
        {
            float y0 = MATH_MOD_0.modMultiply(T0, src0.getFloat()),
                    y1 = MATH_MOD_1.modMultiply(T1, src1.getFloat()),
                    y2 = MATH_MOD_2.modMultiply(T2, src2.getFloat());

            multiply(M12, y0, sum);
            multiply(M02, y1, tmp);

            if (add(tmp, sum) != 0 ||
                compare(sum, M012) >= 0)
            {
                subtract(M012, sum);
            }

            multiply(M01, y2, tmp);

            if (add(tmp, sum) != 0 ||
                compare(sum, M012) >= 0)
            {
                subtract(M012, sum);
            }

            add(sum, carryResult);

            float result = divide(carryResult);

            // In the first block, ignore the first element (it's zero in full precision calculations)
            // and possibly one or two more in limited precision calculations
            if (i >= skipSize)
            {
                dst.setFloat(result);
                dst.next();
            }

            src0.next();
            src1.next();
            src2.next();
        }

        // Calculate the last words (in base math)
        float result0 = divide(carryResult);
        float result1 = carryResult[2];

        assert (carryResult[0] == 0);
        assert (carryResult[1] == 0);

        // Last block has one extra element (corresponding to the one skipped in the first block)
        if (subResultSize == length - skipSize + 1)
        {
            dst.setFloat(result0);
            dst.close();

            result0 = result1;
            assert (result1 == 0);
        }

        float[] results = { result1, result0 };

        return results;
    }

    public float[] carry(DataStorage dataStorage, long size, long resultSize, long offset, long length, float[] results, float[] previousResults)
        throws ApfloatRuntimeException
    {
        long skipSize = (offset == 0 ? size - resultSize + 1: 0);   // For the first block, ignore the first 1-3 elements
        long lastSize = (offset + length == size ? 1: 0);           // For the last block, add 1 element
        long nonLastSize = 1 - lastSize;                            // For the other than last blocks, move 1 element
        long subResultSize = length - skipSize + lastSize;

        long subResultStart = size - offset - length + nonLastSize + subResultSize,
             subResultEnd = subResultStart - subResultSize;

        // Get iterators for the previous block carries, and dst, padded with this block's carries
        // Note that size could be 1 but carries size is 2
        DataStorage.Iterator src = arrayIterator(previousResults);
        DataStorage.Iterator dst = compositeIterator(dataStorage.iterator(DataStorage.READ_WRITE, subResultStart, subResultEnd), subResultSize, arrayIterator(results));

        // Propagate base addition through dst, and this block's carries
        float carry = baseAdd(dst, src, 0, dst, previousResults.length);
        carry = baseCarry(dst, carry, subResultSize);
        dst.close();                                                    // Iterator likely was not iterated to end

        assert (carry == 0);

        return results;
    }

    private float baseCarry(DataStorage.Iterator srcDst, float carry, long size)
        throws ApfloatRuntimeException
    {
        for (long i = 0; i < size && carry > 0; i++)
        {
            carry = baseAdd(srcDst, null, carry, srcDst, 1);
        }

        return carry;
    }

    // Wrap an array in a simple reverse-order iterator, padded with zeros
    private static DataStorage.Iterator arrayIterator(final float[] data)
    {
        return new DataStorage.Iterator()
        {
            public boolean hasNext()
            {
                return true;
            }

            public void next()
            {
                this.position--;
            }

            public float getFloat()
            {
                assert (this.position >= 0);
                return data[this.position];
            }

            public void setFloat(float value)
            {
                assert (this.position >= 0);
                data[this.position] = value;
            }

            private static final long serialVersionUID = 1L;

            private int position = data.length - 1;
        };
    }

    // Composite iterator, made by concatenating two iterators
    private static DataStorage.Iterator compositeIterator(final DataStorage.Iterator iterator1, final long size, final DataStorage.Iterator iterator2)
    {
        return new DataStorage.Iterator()
        {
            public boolean hasNext()
            {
                return (this.position < size ? iterator1.hasNext() : iterator2.hasNext());
            }

            public void next()
                throws ApfloatRuntimeException
            {
                (this.position < size ? iterator1 : iterator2).next();
                this.position++;
            }

            public float getFloat()
                throws ApfloatRuntimeException
            {
                return (this.position < size ? iterator1 : iterator2).getFloat();
            }

            public void setFloat(float value)
                throws ApfloatRuntimeException
            {
                (this.position < size ? iterator1 : iterator2).setFloat(value);
            }

            public void close()
                throws ApfloatRuntimeException
            {
                (this.position < size ? iterator1 : iterator2).close();
            }

            private static final long serialVersionUID = 1L;

            private long position;
        };
    }

    private static final long serialVersionUID = 3192182234524626533L;

    private static final FloatModMath MATH_MOD_0,
                                        MATH_MOD_1,
                                        MATH_MOD_2;
    private static final float T0,
                                 T1,
                                 T2;
    private static final float[] M01,
                                   M02,
                                   M12,
                                   M012;

    static
    {
        MATH_MOD_0 = new FloatModMath();
        MATH_MOD_1 = new FloatModMath();
        MATH_MOD_2 = new FloatModMath();

        MATH_MOD_0.setModulus(MODULUS[0]);
        MATH_MOD_1.setModulus(MODULUS[1]);
        MATH_MOD_2.setModulus(MODULUS[2]);

        // Probably sub-optimal, but it's a one-time operation

        BigInteger base = BigInteger.valueOf(Math.abs((long) MAX_POWER_OF_TWO_BASE)),   // In int case the base is 0x80000000
                   m0 = BigInteger.valueOf((long) MODULUS[0]),
                   m1 = BigInteger.valueOf((long) MODULUS[1]),
                   m2 = BigInteger.valueOf((long) MODULUS[2]),
                   m01 = m0.multiply(m1),
                   m02 = m0.multiply(m2),
                   m12 = m1.multiply(m2);

        T0 = m12.modInverse(m0).floatValue();
        T1 = m02.modInverse(m1).floatValue();
        T2 = m01.modInverse(m2).floatValue();

        M01 = new float[2];
        M02 = new float[2];
        M12 = new float[2];
        M012 = new float[3];

        BigInteger[] qr = m01.divideAndRemainder(base);
        M01[0] = qr[0].floatValue();
        M01[1] = qr[1].floatValue();

        qr = m02.divideAndRemainder(base);
        M02[0] = qr[0].floatValue();
        M02[1] = qr[1].floatValue();

        qr = m12.divideAndRemainder(base);
        M12[0] = qr[0].floatValue();
        M12[1] = qr[1].floatValue();

        qr = m0.multiply(m12).divideAndRemainder(base);
        M012[2] = qr[1].floatValue();
        qr = qr[0].divideAndRemainder(base);
        M012[0] = qr[0].floatValue();
        M012[1] = qr[1].floatValue();
    }
}