All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.sux4j.bits.JacobsonBalancedParentheses Maven / Gradle / Ivy

Go to download

Sux4j is an implementation of succinct data structure in Java. It provides a number of related implementations covering ranking/selection over bit arrays, compressed lists and minimal perfect hashing.

There is a newer version: 5.4.1
Show newest version
package it.unimi.dsi.sux4j.bits;

/*		 
 * Sux4J: Succinct data structures for Java
 *
 * Copyright (C) 2010-2016 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.bits.BitVector;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.sux4j.mph.HollowTrieMonotoneMinimalPerfectHashFunction;
import it.unimi.dsi.sux4j.util.EliasFanoLongBigList;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.Arrays;
import java.util.Collections;


/** An implementation of Jacobson's balanced parentheses data structure.
 * 
 * Warning: this class is a stub implementing just those method needed by a {@link HollowTrieMonotoneMinimalPerfectHashFunction}.
 * 
 * @author Sebastiano Vigna
 */

public class JacobsonBalancedParentheses implements BalancedParentheses {
	public static String binary( long l, boolean reverse ) {
		if ( reverse ) l = Long.reverse( l );
		MutableString s = new MutableString().append( "0000000000000000000000000000000000000000000000000000000000000000000000000" ).append( Long.toBinaryString( l ) );
		s.delete( 0, s.length() - 64 );
		s.insert( 0, '\n' );
		s.append( '\n' );
		for( int i = 0; i < 32; i++ ) s.append( " " ).append( Long.toHexString( ( l >>> ( 31 - i ) * 2 ) & 0x3 ) );
		s.append( '\n' );
		for( int i = 0; i < 16; i++ ) s.append( "   " ).append( Long.toHexString( ( l >>> ( 15 - i ) * 4 ) & 0xF ) );
		s.append( '\n' );
		return s.toString();
	}

	private static final long serialVersionUID = 1L;
	private static final boolean ASSERTS = false;
	private static final boolean DEBUG = false;
	private static final boolean DDEBUG = false;
	private transient long[] bits;
	protected final BitVector bitVector;
	private final SparseSelect openingPioneers;
	private final SparseRank openingPioneersRank;
	private final EliasFanoLongBigList openingPioneerMatches;
	private final SparseSelect closingPioneers;
	private final SparseRank closingPioneersRank;
	private final EliasFanoLongBigList closingPioneerMatches;

	public final static int countFarOpen( long word, int l ) {
		int c = 0, e = 0;
		while( l-- != 0 ) {
			if ( ( word & 1L << l ) != 0 ) {
				if ( ++e > 0 ) c++;
			}
			else {
				if ( e > 0 ) e = -1;
				else --e;
			}
		}

		return c;
	}

	public final static int findFarOpen( long word, int l, int k ) {
		int e = 0;
		while( l-- != 0 ) {
			if ( ( word & 1L << l ) != 0 ) {
				if ( ++e > 0 && k-- == 0 ) return l;
			}
			else {
				if ( e > 0 ) e = -1;
				else --e;
			}
		}

		return -1;
	}

	public final static int countFarClose( long word, int l ) {
		int c = 0, e = 0;
		for( int i = 0; i < l; i++ ) {
			if ( ( word & 1L << i ) != 0 ) {
				if ( e > 0 ) e = -1;
				else --e;
			}
			else {
				if ( ++e > 0 ) c++;
			}
		}

		return c;
	}

	public final static int findFarClose2( long word, int k ) {
		int e = 0;
		for( int i = 0; i < Long.SIZE; i++ ) {
			if ( ( word & 1L << i ) != 0 ) {
				if ( e > 0 ) e = -1;
				else --e;
			}
			else {
				if ( ++e > 0 && k-- == 0 ) return i;
			}
		}

		return -1;
	}

	
	public static final long ONES_STEP_4 = 0x1111111111111111L;
	public static final long MSBS_STEP_4 = 0x8L * ONES_STEP_4;
	public static final long ONES_STEP_8 = 0x0101010101010101L;
	public static final long MSBS_STEP_8 = 0x80L * ONES_STEP_8;
	private static final long ONES_STEP_16 = 0x0001000100010001L;
	private static final long MSBS_STEP_16 = 0x8000800080008000L;
	private static final long ONES_STEP_32 = 0x0000000100000001L;
	private static final long MSBS_STEP_32 = 0x8000000080000000L;
	
	public final static int findFarClose( long word, int k ) {
		// 00 -> 00 01 -> 01 11 -> 10 10 -> 00
		if ( DDEBUG ) System.err.println( "Before: " + binary( word, true ) );
		final long b1 = ( word & ( 0xA * ONES_STEP_4 ) ) >>> 1;
		final long b0 = word & ( 0x5 * ONES_STEP_4 );
		final long lsb = ( b1 ^ b0 ) & b1;
		//System.err.println( "b0:" + binary( b0, true ) );
		//System.err.println( "b1:" + binary( b1, true ) );
		//System.err.println( "lsb:" + binary( lsb, true ) );
		
		final long open2 = ( b1 & b0 ) << 1 | lsb;
		if ( DDEBUG ) System.err.println( "Open:" + binary( open2, false ) );
		// 00 -> 10 01 -> 01 11 -> 00 10 -> 00
		final long closed2 = ( ( b1 | b0 ) ^ ( 0x5 * ONES_STEP_4 ) ) << 1 | lsb; 
		if ( DDEBUG ) System.err.println( "Closed:" + binary( closed2, false ) );
		
		final long open4eccess = ( open2 & ( 0x3 * ONES_STEP_4  ) );
		final long closed4eccess = ( closed2 & ( 0xC * ONES_STEP_4 ) ) >>> 2;

		//if ( DDEBUG ) System.err.println( "Open e:  " + binary( open4eccess, false ) );
		//if ( DDEBUG ) System.err.println( "Closed e:" + binary( closed4eccess, false ) );

		long open4 = ( ( open4eccess | MSBS_STEP_4 ) - closed4eccess ) ^ MSBS_STEP_4;
		if ( DDEBUG ) System.err.println( "Diff (open4):" + binary( open4, false ) );

		final long open4mask = ( ( ( ( open4 & MSBS_STEP_4 ) >>> 3 ) | MSBS_STEP_4 ) - ONES_STEP_4 ) ^ MSBS_STEP_4;
		
		if ( DDEBUG ) System.err.println( "Mask (open4)  : " + binary( open4mask, false ) );

		//open4eccess = ( open & ( 0xC * ONES_STEP_4  ) ) >>> 2;
		//closed4eccess = closed & ( 0x3 * ONES_STEP_4 );

		long closed4 = ( ( closed4eccess | MSBS_STEP_4 ) - open4eccess ) ^ MSBS_STEP_4;
		if ( DDEBUG ) System.err.println( "Diff (closed4):" + binary( closed4, false ) );
		final long closed4mask = ( ( ( ( closed4 & MSBS_STEP_4 ) >>> 3 ) | MSBS_STEP_4 ) - ONES_STEP_4 ) ^ MSBS_STEP_4;
		if ( DDEBUG ) System.err.println( "Mask (closed4): " + binary( closed4mask, false ) );

		open4 = ( ( open2 & ( 0xC * ONES_STEP_4 ) ) >>> 2 ) + ( open4mask & open4 );
		closed4 = ( closed2 & ( 0x3 * ONES_STEP_4 ) ) + ( closed4mask & closed4 );
		
		if ( DDEBUG ) System.err.println( "Open 4:  " + binary( open4, false ) );
		if ( DDEBUG ) System.err.println( "Closed 4:" + binary( closed4, false ) );

		final long open8eccess = ( open4 & ( 0xF * ONES_STEP_8 ) );
		final long closed8eccess = ( closed4 & ( 0xF0 * ONES_STEP_8 ) ) >>> 4;
		
		long open8  = ( ( open8eccess | MSBS_STEP_8 ) - closed8eccess ) ^ MSBS_STEP_8;
		final long open8mask =  ( ( ( ( open8 & MSBS_STEP_8 ) >>> 7 ) | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ MSBS_STEP_8;

		long closed8 = ( ( closed8eccess | MSBS_STEP_8 ) - open8eccess ) ^ MSBS_STEP_8;
		final long closed8mask = ( ( ( ( closed8 & MSBS_STEP_8 ) >>> 7 ) | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ MSBS_STEP_8;
		
		open8 = ( ( open4 & ( 0xF0 * ONES_STEP_8 ) ) >>> 4 ) + ( open8mask & open8 );
		closed8 = ( closed4 & ( 0xF * ONES_STEP_8 ) ) + ( closed8mask & closed8 );

		if ( DDEBUG ) System.err.println( "Open 8:  " + binary( open8, false ) );
		if ( DDEBUG ) System.err.println( "Closed 8:" + binary( closed8, false ) );

		final long open16eccess = ( open8 & ( 0xFF * ONES_STEP_16 ) );
		final long closed16eccess = ( closed8 & ( 0xFF00 * ONES_STEP_16 ) ) >>> 8;
		
		long open16  = ( ( open16eccess | MSBS_STEP_16 ) - closed16eccess ) ^ MSBS_STEP_16;
		final long open16mask =  ( ( ( ( open16 & MSBS_STEP_16 ) >>> 15 ) | MSBS_STEP_16 ) - ONES_STEP_16 ) ^ MSBS_STEP_16;

		long closed16 = ( ( closed16eccess | MSBS_STEP_16 ) - open16eccess ) ^ MSBS_STEP_16;
		final long closed16mask = ( ( ( ( closed16 & MSBS_STEP_16 ) >>> 15 ) | MSBS_STEP_16 ) - ONES_STEP_16 ) ^ MSBS_STEP_16;
		
		open16 = ( ( open8 & ( 0xFF00 * ONES_STEP_16 ) ) >>> 8 ) + ( open16mask & open16 );
		closed16 = ( closed8 & ( 0xFF * ONES_STEP_16 ) ) + ( closed16mask & closed16 );

		if ( DDEBUG ) System.err.println( "Open 16:  " + binary( open16, false ) );
		if ( DDEBUG ) System.err.println( "Closed 16:" + binary( closed16, false ) );

		final long open32eccess = ( open16 & 0xFFFF * ONES_STEP_32 );
		final long closed32eccess = ( closed16 & ( 0xFFFF0000L * ONES_STEP_32 ) ) >>> 16;
		
		long open32  = ( ( open32eccess | MSBS_STEP_32 ) - closed32eccess ) ^ MSBS_STEP_32;
		final long open32mask =  ( ( ( ( open32 & MSBS_STEP_32 ) >>> 31 ) | MSBS_STEP_32 ) - ONES_STEP_32 ) ^ MSBS_STEP_32;

		long closed32 = ( ( closed32eccess | MSBS_STEP_32 ) - open32eccess ) ^ MSBS_STEP_32;
		final long closed32mask = ( ( ( ( closed32 & MSBS_STEP_32 ) >>> 31 ) | MSBS_STEP_32 ) - ONES_STEP_32 ) ^ MSBS_STEP_32;
		
		open32 = ( ( open16 & ( 0xFFFF0000L * ONES_STEP_32 ) ) >>> 16 )+ ( open32mask & open32 );
		closed32 = ( closed16 & ( 0xFFFF * ONES_STEP_32 ) ) + ( closed32mask & closed32 );
		
		if ( DDEBUG ) System.err.println( "Open 32:  " + binary( open32, false ) );
		if ( DDEBUG ) System.err.println( "Closed 32:" + binary( closed32, false ) );
		
		final long check32 = ( ( k - ( closed32 & 0xFFFFFFFFL ) ) >>> Long.SIZE - 1 ) - 1;
		long mask = check32 & 0xFFFFFFFFL;
		k -= closed32 & mask;
		k += open32 & mask;
		int shift = (int)( 32 & check32 );
		
		final long check16 = ( ( k - ( closed16 >>> shift & 0xFFFF ) ) >>> Long.SIZE - 1 ) - 1;
		mask = check16 & 0xFFFF;
		k -= closed16 >>> shift & mask;
		k += open16 >>> shift & mask;
		shift += 16 & check16;
		
		final long check8 = ( ( k - ( closed8 >>> shift & 0xFF ) ) >>> Long.SIZE - 1 ) - 1;
		mask = check8 & 0xFF;
		k -= closed8 >>> shift & mask;
		k += open8 >>> shift & mask;
		shift += 8 & check8;
		
		final long check4 = ( ( k - ( closed4 >>> shift & 0xF ) ) >>> Long.SIZE - 1 ) - 1;
		mask = check4 & 0xF;
		k -= closed4 >>> shift & mask;
		k += open4 >>> shift & mask;
		shift += 4 & check4;

		final long check2 = ( ( k - ( closed2 >>> shift & 0x3 ) ) >>> Long.SIZE - 1 ) - 1;
		mask = check2 & 0x3;
		k -= closed2 >>> shift & mask;
		k += open2 >>> shift & mask;
		shift += 2 & check2;

		return (int)( shift + k + ( ( word >>> shift & ( ( k << 1 ) | 1 ) ) << 1 ) );
	
	}
	
	public final static int findNearClose2( final long word ) {
		int c = 1;
		for( int i = 1; i < 64; i++ ) {
			if ( ( word & 1L << i ) != 0 ) c++;
			else c--;
			if ( c == 0 ) return i;
		}
		return 64;
	}


	private final static long L = 0x4038302820181008L;

	public final static int findNearClose( final long word ) {
		long byteSums = word - ( ( word & 0xa * ONES_STEP_4 ) >>> 1 );
		long zeroes, update;
		byteSums = ( byteSums & 3 * ONES_STEP_4 ) + ( ( byteSums >>> 2 ) & 3 * ONES_STEP_4 );
		//System.err.print( "**** " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		byteSums = ( ( byteSums + ( byteSums >>> 4 ) ) & 0x0f * ONES_STEP_8 ) * ( ONES_STEP_8 << 1 ); // Twice the number of open parentheses (cumulative by byte)

		//System.err.print( "**** " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
        // TODO: this can be simplified
		byteSums = ( ( L | MSBS_STEP_8 ) - byteSums ) ^ ( ( L ^ ~byteSums ) & MSBS_STEP_8 ); // Closed excess per byte
		//System.err.print( "Closed excess: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();

		// Set up flags for excess values that are already zero
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		//System.err.print( "Updates: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(update >>> i * 8 & 0xFF) + " " ); System.err.println();
		zeroes = ( MSBS_STEP_8 | ONES_STEP_8 * 7 ) & update;
		//System.err.print( "Zeroes: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		byteSums += ( word >>> 7 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 7 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 );
		//System.err.print( "Sums: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		//System.err.print( "Updates: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(update >>> i * 8 & 0xFF) + " " ); System.err.println();
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 6 ) & update;
		//System.err.print( "Zeroes: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		byteSums += ( word >>> 6 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 6 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		//System.err.print( "Sums: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		//System.err.print( "Updates: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(update >>> i * 8 & 0xFF) + " " ); System.err.println();
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 5 ) & update;
		//System.err.print( "Zeroes: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		byteSums += ( word >>> 5 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 5 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 4 ) & update;
		
		byteSums += ( word >>> 4 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 4 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 3 ) & update;
		
		byteSums += ( word >>> 3 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 3 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 2 ) & update;
		
		byteSums += ( word >>> 2 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 2 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		zeroes = zeroes & ~update | ( MSBS_STEP_8 | ONES_STEP_8 * 1 ) & update;
		
		byteSums += ( word >>> 1 & ONES_STEP_8 );
		byteSums = ( ( byteSums | MSBS_STEP_8 ) - ( ~( word >>> 1 ) & ONES_STEP_8 ) ) ^ ( ( byteSums ^ MSBS_STEP_8 ) & MSBS_STEP_8 ); 
		update = ( ~( byteSums | ( ( byteSums | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >>> 7;
		update = ( ( update | MSBS_STEP_8 ) - ONES_STEP_8 ) ^ ( ( update ^ ONES_STEP_8 ) | ~MSBS_STEP_8 );
		zeroes = zeroes & ~update | MSBS_STEP_8 & update;
		
		
		//for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		//for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		// TODO: check that in this case MSB(x&-x) isn't better.
		final int block = Long.numberOfTrailingZeros( zeroes >>> 7 & ONES_STEP_8 );
		// A simple trick to return 127 if block < 0 (i.e., no match)
		return ( (int)( block + ( zeroes >>> block & 0x7F ) ) | ( block >> 8 ) ) & 0x7F;

		/*		//assert block != -1;
		//block = block == -1 ? 0 : block / 8;
		assert block >= 7;
		excess = excess >>> block - 7 & 0xFF;
		System.out.println( "LSB: " + Fast.leastSignificantBit( zeroes & MSBS_STEP_8 ) + " Block: " + block ); 
		System.out.println("Excess: " + excess );
		
		if ( excess != 0 ) {
			for( int i = 0; i < 8; i++ ) {
				if ( ( origWord & 1L << block - 7 + i ) != 0 ) excess++;
				else excess--;
				if ( excess == 0 ) {
					assert ( zeroes >>> block -7 & 0x7F ) - 1 == i : (( zeroes >>> block -7 & 0x7F ) - 1 )+ " != " + i;

					return block -7 + i;
				}
			}
		}
		else {
			//assert ( zeroes >>> block -7 & 0x7F ) - 1 == 1 : ( ( zeroes >>> block -7 & 0x7F ) - 1 ) + " != " + 1;
			return (int)( block - 7 + ( zeroes >>> block -7 & 0x7F ) - 1 );
		}
		
        return -1;*/
        
	}
	
	private final static long L_ALT = 0x3830282018100800L + 0x0202020202020202L;

	public final static int findNearCloseAlt( long word ) {
		long byteSums = ( word << 6 ) - ( ( ( word << 6 ) & 0xa * ONES_STEP_4 ) >>> 1 );
		long zeroes, update;
		byteSums = ( byteSums & 3 * ONES_STEP_4 ) + ( ( byteSums >>> 2 ) & 3 * ONES_STEP_4 );
		//System.err.print( "**** " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		byteSums = ( ( byteSums + ( byteSums >>> 4 ) ) & 0x0f * ONES_STEP_8 ) * ( ONES_STEP_8 << 1 ); // Twice the number of open parentheses (cumulative by byte)

		//System.err.print( "**** " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		byteSums = ( ( MSBS_STEP_8 | byteSums ) - L_ALT ) ^ MSBS_STEP_8; // Closed excess per byte
		//System.err.print( "Closed excess: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();

		// Set up flags for excess values that are already zero
		update = ( ~( ( ( byteSums - ONES_STEP_8 ) & MSBS_STEP_8 ) >>> 7 ) & ONES_STEP_8 ) - ONES_STEP_8;
		//System.err.print( "Updates: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(update >>> i * 8 & 0xFF) + " " ); System.err.println();
		zeroes = ( MSBS_STEP_8 | ONES_STEP_8 * 1 ) & update;
		//System.err.print( "Zeroes: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		word >>= 2;
		byteSums -= ONES_STEP_8 * 2 - ( ( word & ( ONES_STEP_8 << 1 ) ) + ( word << 1 & ( ONES_STEP_8 << 1 ) ) );
		//System.err.print( "Sums: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		update = ( ~( ( ( ( byteSums | zeroes ) - ONES_STEP_8 ) ^ byteSums  ) >>> 7 ) & ONES_STEP_8 ) - ONES_STEP_8;
		//System.err.print( "Updates: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(update >>> i * 8 & 0xFF) + " " ); System.err.println();
		zeroes |= ( MSBS_STEP_8 | ONES_STEP_8 * 3 ) & update;
		//System.err.print( "Zeroes: " ); for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();

		word >>= 2;
		byteSums -= ONES_STEP_8 * 2 - ( ( word & ( ONES_STEP_8 << 1 ) ) + ( word << 1 & ( ONES_STEP_8 << 1 ) ) );
		update = ( ~( ( ( ( byteSums | zeroes )- ONES_STEP_8 )  ^ byteSums ) >>> 7 ) & ONES_STEP_8 ) - ONES_STEP_8;
		zeroes |= ( MSBS_STEP_8 | ONES_STEP_8 * 5 ) & update;
		
		word >>= 2;
		byteSums -= ONES_STEP_8 * 2 - ( ( word & ( ONES_STEP_8 << 1 ) ) + ( word << 1 & ( ONES_STEP_8 << 1 ) ) );
		update = ( ~( ( ( ( byteSums | zeroes )- ONES_STEP_8 )  ^ byteSums ) >>> 7 ) & ONES_STEP_8 ) - ONES_STEP_8;
		zeroes |= ( MSBS_STEP_8 | ONES_STEP_8 * 7 ) & update;
		
		//for( int i = 0; i < 8; i++ ) System.err.print( (byte)(byteSums >>> i * 8 & 0xFF) + " " ); System.err.println();
		//for( int i = 0; i < 8; i++ ) System.err.print( (byte)(zeroes >>> i * 8 & 0xFF) + " " ); System.err.println();
		
		// TODO: check that in this case MSB(x&-x) isn't better.
		final int block = Long.numberOfTrailingZeros( zeroes >>> 7 & ONES_STEP_8 );
		// A simple trick to return 127 if block < 0 (i.e., no match)
		return ( (int)( block + ( zeroes >>> block & 0x3F ) ) | ( block >> 8 ) ) & 0x7F;

		/*		//assert block != -1;
		//block = block == -1 ? 0 : block / 8;
		assert block >= 7;
		excess = excess >>> block - 7 & 0xFF;
		System.out.println( "LSB: " + Fast.leastSignificantBit( zeroes & MSBS_STEP_8 ) + " Block: " + block ); 
		System.out.println("Excess: " + excess );
		
		if ( excess != 0 ) {
			for( int i = 0; i < 8; i++ ) {
				if ( ( origWord & 1L << block - 7 + i ) != 0 ) excess++;
				else excess--;
				if ( excess == 0 ) {
					assert ( zeroes >>> block -7 & 0x7F ) - 1 == i : (( zeroes >>> block -7 & 0x7F ) - 1 )+ " != " + i;

					return block -7 + i;
				}
			}
		}
		else {
			//assert ( zeroes >>> block -7 & 0x7F ) - 1 == 1 : ( ( zeroes >>> block -7 & 0x7F ) - 1 ) + " != " + 1;
			return (int)( block - 7 + ( zeroes >>> block -7 & 0x7F ) - 1 );
		}
		
        return -1;*/
        
	}

	public JacobsonBalancedParentheses( final BitVector bv ) {
		this( bv, true, true, true );
	}
	
	public JacobsonBalancedParentheses( final long[] bits, final long length ) {
		this( LongArrayBitVector.wrap(  bits, length ) );
	}

	public JacobsonBalancedParentheses( final BitVector bitVector, final boolean findOpen, final boolean findClose, final boolean enclose ) {
		if ( ! findOpen && ! findClose && ! enclose ) throw new IllegalArgumentException( "You must specify at least one implemented method" );
		this.bitVector = bitVector;
		this.bits = bitVector.bits();
		final long length = bitVector.length();
		final int numWords = (int)( ( length + Long.SIZE - 1 ) / Long.SIZE );
		
		final byte count[] = new byte[ numWords ];
		final byte residual[] = new byte[ numWords ];

		if ( DEBUG ) System.err.println( "Expression: " + bitVector );
		
		LongArrayList closingPioneers = null, closingPioneerMatches = null, openingPioneers = null, openingPioneerMatches = null;

		if ( findOpen ) {
			closingPioneers = new LongArrayList();
			closingPioneerMatches = new LongArrayList();
			for( int block = 0; block < numWords; block++ ) {
				if ( DEBUG ) System.err.println( "Scanning word " + block + " (" + LongArrayBitVector.wrap( new long[] { bits[ block ] } ) + ")" );
				final int l = (int)Math.min( Long.SIZE, length - block * (long)Long.SIZE );

				if ( block > 0 ) {
					int excess = 0;
					int countFarClosing = countFarClose( bits[ block ], l );

					for( int j = 0; j < l; j++ ) {
						if ( ( bits[ block ] & 1L << j ) != 0 ) {
							if ( excess > 0 ) excess = -1;
							else --excess;
						}
						else {
							if ( ++excess > 0 ) {
								// Find block containing matching far open parenthesis
								int matchingBlock = block;
								while( count[ --matchingBlock ] == 0 );
								countFarClosing--;
								if ( --count[ matchingBlock ] == 0 || countFarClosing == 0 ) {
									// This is a closing pioneer
									if ( DEBUG ) System.err.println( "+) " + ( block * Long.SIZE + j ) + " " + Arrays.toString(  count ) );
									closingPioneers.add( block * Long.SIZE + j );
									closingPioneerMatches.add( ( block * Long.SIZE + j ) - ( matchingBlock * Long.SIZE + findFarOpen( bits[ matchingBlock ], Long.SIZE, residual[ matchingBlock ] ) ) );
								}
								residual[ matchingBlock ]++;
							}
						}
					}
				}
				count[ block ] = (byte)countFarOpen( bits[ block ], l );
				if ( DEBUG ) System.err.println( "Stack updated: " + Arrays.toString(  count ) );
			}

			for( int i = count.length; i-- != 0; ) if ( count[ i ] != 0 ) throw new IllegalArgumentException( "Unbalanced parentheses" );
			if ( DEBUG ) System.err.println( "):" + closingPioneers );
			if ( DEBUG ) System.err.println( "):" + closingPioneerMatches );
		}

		if ( findClose ) {
			Arrays.fill( residual, (byte)0 );

			openingPioneers = new LongArrayList();
			openingPioneerMatches = new LongArrayList();

			for( int block = numWords; block-- != 0; ) {
				if ( DEBUG ) System.err.println( "Scanning word " + block + " (" + LongArrayBitVector.wrap( new long[] { bits[ block ] } ) + ")" );
				final int l = (int)Math.min( Long.SIZE, length - block * (long)Long.SIZE );

				if ( block != numWords -1 ) {
					int excess = 0;
					int countFarOpening = countFarOpen( bits[ block ], l );
					boolean somethingAdded = false;

					for( int j = l; j-- != 0; ) {
						if ( ( bits[ block ] & 1L << j ) == 0 ) {
							if ( excess > 0 ) excess = -1;
							else --excess;
						}
						else {
							if ( ++excess > 0 ) {
								// Find block containing matching far close parenthesis
								int matchingBlock = block;
								while( count[ ++matchingBlock ] == 0 );
								countFarOpening--;
								if ( --count[ matchingBlock ] == 0 || countFarOpening == 0 ) {
									// This is an opening pioneer
									if ( DEBUG ) System.err.println( "+( " + ( block * (long)Long.SIZE + j ) + " " + Arrays.toString(  count ) );
									openingPioneers.add( block * (long)Long.SIZE + j );
									openingPioneerMatches.add( - ( block * (long)Long.SIZE + j ) + ( matchingBlock * (long)Long.SIZE + findFarClose( bits[ matchingBlock ], residual[ matchingBlock ]) ) );
									//if ( block == 14 ) System.err.println( "Adding " + block * (long)Long.SIZE + j );
									if ( ASSERTS ) somethingAdded = true;
								}
								residual[ matchingBlock ]++;
							}
						}					
					}				
					if ( ASSERTS ) assert somethingAdded || countFarOpen( bits[ block ], l ) == 0 : "No pioneers for block " + block + " " + LongArrayBitVector.wrap(  new long[] { bits[ block ] }, l ) + " (" + l + ") " + countFarOpen( bits[ block ], l );
				}
				count[ block ] = (byte)countFarClose( bits[ block ], l );
				if ( DEBUG ) System.err.println( "Stack updated: " + Arrays.toString(  count ) );
			}

			for( int i = count.length; i-- != 0; ) if ( count[ i ] != 0 ) throw new IllegalArgumentException( "Unbalanced parentheses" );
			if ( DEBUG ) System.err.println( "(:" + openingPioneers );
			if ( DEBUG ) System.err.println( "(:" + openingPioneerMatches );

			Collections.reverse( openingPioneers );
			Collections.reverse( openingPioneerMatches );
		}

		this.closingPioneers = closingPioneers != null ? new SparseSelect( closingPioneers ) : null;
		this.closingPioneersRank = closingPioneers != null ? this.closingPioneers.getRank() : null;
		this.closingPioneerMatches = closingPioneers != null ? new EliasFanoLongBigList( closingPioneerMatches ) : null;
		this.openingPioneers = openingPioneers != null ? new SparseSelect( openingPioneers ) : null;
		this.openingPioneersRank = openingPioneers != null ? this.openingPioneers.getRank() : null;
		this.openingPioneerMatches = openingPioneers != null ? new EliasFanoLongBigList( openingPioneerMatches ) : null;
}
	
	public long enclose( long pos ) {
		throw new UnsupportedOperationException();
	}

	public long findClose( final long pos ) {
		if ( DEBUG ) System.err.println( "findClose(" + pos + ")..." );
		final int word = (int)( pos / Long.SIZE );
		final int bit = (int)( pos & LongArrayBitVector.WORD_MASK );
		if ( ( bits[ word ] & 1L << bit ) == 0 ) throw new IllegalArgumentException();

		int result = findNearClose( bits[ word ] >>> bit );

		if ( ASSERTS ) {
			int c = 1;
			int b = bit;
			while( ++b < Long.SIZE ) {
				if ( ( bits[ word ] & 1L << b ) == 0 ) c--;
				else c++;
				if ( c == 0 ) break;
			}

			if ( ASSERTS ) assert ( c != 0 ) == ( result < 0 || result >= Long.SIZE - bit ) : "c: " + c + " bit: " + (b - bit) + " result:" + result + " " + LongArrayBitVector.wrap(  new long[] { bits[ word ] >>> bit }, Long.SIZE - bit ) + " (" +(Long.SIZE -bit)+ ")";
			if ( ASSERTS ) assert ( c != 0 ) || ( b == bit + result ) : b + " != " + ( bit + result ) + " (bit:" + bit + ")" + LongArrayBitVector.wrap(  new long[] { bits[ word ] >>> bit } );
		}
		if ( result < Long.SIZE - bit ) {
			if ( DEBUG ) System.err.println( "Returning in-word value: " + ( word * (long)Long.SIZE + bit + result ) );
			return word * (long)Long.SIZE + bit + result;
		}

		final long pioneerIndex = openingPioneersRank.rank( pos + 1 ) - 1;
		final long pioneer = openingPioneers.select( pioneerIndex );
		final long match = pioneer + openingPioneerMatches.getLong( pioneerIndex );

		if ( pos == pioneer ) {
			if ( DEBUG ) System.err.println( "Returning exact pioneer match: " + match );
			return match;
		}
		
		if ( DEBUG ) System.err.println( "pioneer: " + pioneer + "; match: " + match );
		int dist = (int)( pos - pioneer );
		
		if ( ASSERTS ) assert word == pioneer / Long.SIZE : "pos: " + pos + " word:" + word + " pioneer: " + pioneer + " word:" + pioneer / Long.SIZE  + " result:" + result;
		if ( ASSERTS ) assert word != match / Long.SIZE;
		if ( ASSERTS ) assert pioneer < pos;
		
		int e = 2 * Long.bitCount( ( bits[ word ] >>> pioneer ) & ( 1L << dist ) - 1 ) - dist; 
		if ( ASSERTS ) {
			assert e >= 1;
			int ee = 0;
			for( long p = pioneer; p < pos; p++ ) if ( ( bits[ (int)( p / Long.SIZE ) ] & 1L << p ) != 0 ) ee++;
			else ee--;
			assert ee == e: ee + " != " + e;
		}
		if ( DEBUG ) System.err.println( "eccess: " + e );
		
		final int matchWord = (int)( match / Long.SIZE );
		final int matchBit = (int)( match % Long.SIZE );
		
		final int numFarClose = matchBit - 2 * Long.bitCount( bits[ matchWord ] & ( 1L << matchBit ) - 1 );

		if ( DEBUG ) System.err.println( "far close before match: " + numFarClose );
		return matchWord * (long)Long.SIZE + findFarClose( bits[ matchWord ], numFarClose - e );
	}

	public long findOpen( long pos ) {
		throw new UnsupportedOperationException();
	}

	public long numBits() {
		return 
			( openingPioneers != null ? ( openingPioneers.numBits() + openingPioneersRank.numBits() + openingPioneerMatches.numBits() ) : 0 ) + 
			( closingPioneers != null ? closingPioneers.numBits() + closingPioneersRank.numBits() + closingPioneerMatches.numBits() : 0 );
	}

	private void readObject( final ObjectInputStream s ) throws IOException, ClassNotFoundException {
		s.defaultReadObject();
		bits = bitVector.bits();
	}

	public BitVector bitVector() {
		return bitVector;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy