All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.groupbyinc.common.jregex.Bitset Maven / Gradle / Ivy

There is a newer version: 198
Show newest version
/**
 * Copyright (c) 2001, Sergey A. Samokhodkin
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification, 
 * are permitted provided that the following conditions are met:
 * 
 * - Redistributions of source code must retain the above copyright notice, 
 * this list of conditions and the following disclaimer. 
 * - Redistributions in binary form 
 * must reproduce the above copyright notice, this list of conditions and the following 
 * disclaimer in the documentation and/or other materials provided with the distribution.
 * - Neither the name of jregex nor the names of its contributors may be used 
 * to endorse or promote products derived from this software without specific prior 
 * written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * @version 1.2_01
 */

package jregex;

class Bitset implements UnicodeConstants{
   private static final Block[][] categoryBits=new Block[CATEGORY_COUNT][BLOCK_COUNT];
   static{
      for(int i=Character.MIN_VALUE;i<=Character.MAX_VALUE;i++){
         int cat=Character.getType((char)i);
         int blockNo=(i>>8)&0xff;
         Block b=categoryBits[cat][blockNo];
         if(b==null) categoryBits[cat][blockNo]=b=new Block();
//if(i>32 && i<127)System.out.println((char)i+" -> ["+cat+"]["+blockNo+"].("+i+")");
         b.set(i&0xff);
      }
   }
   
   private boolean positive=true;
   private boolean isLarge=false;
   
   boolean[] block0;  //1-byte bit set
   private static final boolean[] emptyBlock0=new boolean[BLOCK_SIZE];
   
   Block[] blocks;  //2-byte bit set
   
   private int weight;
   
   final void reset(){
      positive=true;
      block0=null;
      blocks=null;
      isLarge=false;
      weight=0;
   }
   
   final static void unify(Bitset bs,Term term){
      if(bs.isLarge){
         term.type=Term.BITSET2;
         term.bitset2=Block.toBitset2(bs.blocks);
      }
      else{
         term.type=Term.BITSET;
         term.bitset=bs.block0==null? emptyBlock0: bs.block0;
      }
      term.inverse=!bs.positive;
      term.weight=bs.positive? bs.weight: MAX_WEIGHT-bs.weight;
   }

   final void setPositive(boolean b){
      positive=b;
   }
   
   final boolean isPositive(){
      return positive;
   }
   
   final boolean isLarge(){
      return isLarge;
   }
   
   private final void enableLargeMode(){
      if(isLarge) return;
      Block[] blocks=new Block[BLOCK_COUNT];
      this.blocks=blocks;
      if(block0!=null){
         blocks[0]=new Block(block0);
      }
      isLarge=true;
   }
   
   final int getWeight(){
      return positive? weight: MAX_WEIGHT-weight;
   }
   
   final void setWordChar(boolean unicode){
      if(unicode){
         setCategory(Lu);
         setCategory(Ll);
         setCategory(Lt);
         setCategory(Lo);
         setCategory(Nd);
         setChar('_');
      }
      else{
         setRange('a','z');
         setRange('A','Z');
         setRange('0','9');
         setChar('_');
      }
   }
   
   final void setDigit(boolean unicode){
      if(unicode){
         setCategory(Nd);
      }
      else{
         setRange('0','9');
      }
   }
   
   final void setSpace(boolean unicode){
      if(unicode){
         setCategory(Zs);
         setCategory(Zp);
         setCategory(Zl);
      }
      else{
         setChar(' ');
         setChar('\r');
         setChar('\n');
         setChar('\t');
         setChar('\f');
      }
   }
   
   final void setCategory(int c){
      if(!isLarge) enableLargeMode();
      Block[] catBits=categoryBits[c];
      weight+=Block.add(this.blocks,catBits,0,BLOCK_COUNT-1,false);
//System.out.println("["+this+"].setCategory("+c+"): weight="+weight);
   }
   
   final void setChars(String chars){
      for(int i=chars.length()-1;i>=0;i--) setChar(chars.charAt(i));
   }
   
   final void setChar(char c){
      setRange(c,c);
   }
   
   final void setRange(char c1,char c2){
//System.out.println("["+this+"].setRange("+c1+","+c2+"):");
//if(c1>31 && c1<=126 && c2>31 && c2<=126) System.out.println("setRange('"+c1+"','"+c2+"'):");
//else System.out.println("setRange(["+Integer.toHexString(c1)+"],["+Integer.toHexString(c2)+"]):");
      if(c2>=256 || isLarge){
         int s=0;
         if(!isLarge){
            enableLargeMode();
         }
         Block[] blocks=this.blocks;
         for(int c=c1;c<=c2;c++){
            int i2=(c>>8)&0xff;
            int i=c&0xff;
            Block block=blocks[i2];
            if(block==null){
               blocks[i2]=block=new Block();
            }
            if(block.set(i))s++;
         }
         weight+=s;
      }
      else{
         boolean[] block0=this.block0;
         if(block0==null){
            this.block0=block0=new boolean[BLOCK_SIZE];
         }
         weight+=set(block0,true,c1,c2);
      }
   }
   
   final void add(Bitset bs){
      add(bs,false);
   }
   
   final void add(Bitset bs,boolean inverse){
      weight+=addImpl(this,bs,!bs.positive^inverse);
   }
   
   private final static int addImpl(Bitset bs1, Bitset bs2, boolean inv){
      int s=0;
      if(!bs1.isLarge && !bs2.isLarge && !inv){
         if(bs2.block0!=null){
            boolean[] bits=bs1.block0;
            if(bits==null) bs1.block0=bits=new boolean[BLOCK_SIZE];
            s+=add(bits,bs2.block0,0,BLOCK_SIZE-1,false);
         }
      }
      else {
         if(!bs1.isLarge) bs1.enableLargeMode();
         if(!bs2.isLarge) bs2.enableLargeMode();
         s+=Block.add(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv);
      }
      return s;
   }
   
   final void subtract(Bitset bs){
      subtract(bs,false);
   }
   
   final void subtract(Bitset bs,boolean inverse){
//System.out.println("["+this+"].subtract(["+bs+"],"+inverse+"):");
      weight+=subtractImpl(this,bs,!bs.positive^inverse);
   }
   
   private final static int subtractImpl(Bitset bs1,Bitset bs2,boolean inv){
      int s=0;
      if(!bs1.isLarge && !bs2.isLarge && !inv){
         boolean[] bits1,bits2;
         if((bits2=bs2.block0)!=null){
            bits1=bs1.block0;
            if(bits1==null) return 0;
            s+=subtract(bits1,bits2,0,BLOCK_SIZE-1,false);
         }
      }
      else {
         if(!bs1.isLarge) bs1.enableLargeMode();
         if(!bs2.isLarge) bs2.enableLargeMode();
         s+=Block.subtract(bs1.blocks,bs2.blocks,0,BLOCK_COUNT-1,inv);
      }
      return s;
   }
   
   final void intersect(Bitset bs){
      intersect(bs,false);
   }
   
   final void intersect(Bitset bs,boolean inverse){
//System.out.println("["+this+"].intersect(["+bs+"],"+inverse+"):");
      subtract(bs,!inverse);
   }
   
   static final int add(boolean[] bs1,boolean[] bs2,int from,int to,boolean inv){
//System.out.println("Bitset.add(boolean[],boolean[],"+inv+"):");
      int s=0;
      for(int i=from;i<=to;i++){
         if(bs1[i]) continue;
         if(!(bs2[i]^inv)) continue;
//System.out.println("        "+i+": value0="+value0+", value="+value);
         s++;
         bs1[i]=true;
//System.out.println("             s="+s+", bs1[i]->"+bs1[i]);
      }
      return s;
   }
   
   static final int subtract(boolean[] bs1,boolean[] bs2,int from,int to,boolean inv){
//System.out.println("Bitset.subtract(boolean[],boolean[],"+inv+"):");
      int s=0;
      for(int i=from;i<=to;i++){
         if(!bs1[i]) continue;
         if(!(bs2[i]^inv)) continue;
         s--;
         bs1[i]=false;
//if(i>32 && i<127) System.out.println("             s="+s+", bs1['"+(char)i+"']->"+bs1[i]);
//else System.out.println("             s="+s+", bs1["+i+"]->"+bs1[i]);
      }
      return s;
   }
   
   static final int set(boolean[] arr,boolean value,int from,int to){
      int s=0;
      for(int i=from;i<=to;i++){
         if(arr[i]==value) continue;
         if(value) s++; else s--;
         arr[i]=value;
      }
      return s;
   }
   
   public String toString(){
      StringBuffer sb=new StringBuffer();
      if(!positive) sb.append('^');
      
      if(isLarge) sb.append(CharacterClass.stringValue2(Block.toBitset2(blocks)));
      else if(block0!=null) sb.append(CharacterClass.stringValue0(block0));
      
      sb.append('(');
      sb.append(getWeight());
      sb.append(')');
      return sb.toString();
   }
   
   /*
   public static void main(String[] args){
      //System.out.print("blocks(Lu)=");
      //System.out.println(CharacterClass.stringValue2(Block.toBitset2(categoryBits[Lu])));
      //System.out.println("[1][0].get('a')="+categoryBits[1][0].get('a'));
      //System.out.println("[1][0].get('A')="+categoryBits[1][0].get('A'));
      //System.out.println("[1][0].get(65)="+categoryBits[1][0].get(65));
      //System.out.println(""+categoryBits[1][0].get('A'));
      Bitset b1=new Bitset();
      //b1.setCategory(Lu);
      //b1.enableLargeMode();
      b1.setRange('a','z');
      b1.setRange('\u00E0','\u00FF');
      
      Bitset b2=new Bitset();
      //b2.setCategory(Ll);
      //b2.enableLargeMode();
      b2.setRange('A','Z');
      b2.setRange('\u00C0','\u00DF');
      
      Bitset b=new Bitset();
      //bs.setRange('a','z');
      //bs.setRange('A','Z');
      b.add(b1);
      b.add(b2,true);
      
      System.out.println("b1="+b1);
      System.out.println("b2="+b2);
      System.out.println("b=b1+^b2="+b);
      
      b.subtract(b1,true);
      
      System.out.println("(b1+^b2)-^b1="+b);
      
   }
   */
}

class Block implements UnicodeConstants{
   private boolean isFull;
   //private boolean[] bits;
   boolean[] bits;
   private boolean shared=false;
   
   Block(){}
   
   Block(boolean[] bits){
      this.bits=bits;
      shared=true;
   }
   
   final boolean set(int c){
//System.out.println("Block.add("+CharacterClass.stringValue2(toBitset2(targets))+","+CharacterClass.stringValue2(toBitset2(addends))+","+from*BLOCK_SIZE+","+to*BLOCK_SIZE+","+inv+"):");
      if(isFull) return false;
      boolean[] bits=this.bits;
      if(bits==null){
         this.bits=bits=new boolean[BLOCK_SIZE];
         shared=false;
         bits[c]=true;
         return true;
      }
      
      if(bits[c]) return false;
      
      if(shared) bits=copyBits(this);
      
      bits[c]=true;
      return true;
   }
   
   final boolean get(int c){
      if(isFull) return true;
      boolean[] bits=this.bits;
      if(bits==null){
         return false;
      }
      return bits[c];
   }
   
   final static int add(Block[] targets,Block[] addends,int from,int to,boolean inv){
//System.out.println("Block.add("+CharacterClass.stringValue2(toBitset2(targets))+","+CharacterClass.stringValue2(toBitset2(addends))+","+from*BLOCK_SIZE+","+to*BLOCK_SIZE+","+inv+"):");
//System.out.println("Block.add():");
      int s=0;
      for(int i=from;i<=to;i++){
         Block addend=addends[i];
//System.out.println("   "+i+": ");
//System.out.println("     target="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+count(target.bits,0,BLOCK_SIZE-1)+"}"));
//System.out.println("     addend="+(addend==null? "null": i==0? CharacterClass.stringValue0(addend.bits): "{"+count(addend.bits,0,BLOCK_SIZE-1)+"}"));
         if(addend==null){ 
            if(!inv) continue;
         }
         else if(addend.isFull && inv) continue;
         
         Block target=targets[i];
         if(target==null) targets[i]=target=new Block();
         else if(target.isFull) continue;
         
         s+=add(target,addend,inv);
//System.out.println("     result="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+count(target.bits,0,BLOCK_SIZE-1)+"}"));
//System.out.println("     s="+s);
      }
//System.out.println("   s="+s);
      return s;
   }
   
   private final static int add(Block target,Block addend,boolean inv){
//System.out.println("Block.add(Block,Block):");
      //there is provided that !target.isFull
      boolean[] targetbits,addbits;
      if(addend==null){
         if(!inv) return 0;
         int s=BLOCK_SIZE;
         if((targetbits=target.bits)!=null){
            s-=count(targetbits,0,BLOCK_SIZE-1);
         }
         target.isFull=true;
         target.bits=null;
         target.shared=false;
         return s;
      }
      else if(addend.isFull){
         if(inv) return 0;
         int s=BLOCK_SIZE;
         if((targetbits=target.bits)!=null){
            s-=count(targetbits,0,BLOCK_SIZE-1);
         }
         target.isFull=true;
         target.bits=null;
         target.shared=false;
         return s;
      }
      else if((addbits=addend.bits)==null){
         if(!inv) return 0;
         int s=BLOCK_SIZE;
         if((targetbits=target.bits)!=null){
            s-=count(targetbits,0,BLOCK_SIZE-1);
         }
         target.isFull=true;
         target.bits=null;
         target.shared=false;
         return s;
      }
      else{
         if((targetbits=target.bits)==null){
            if(!inv){
               target.bits=addbits;
               target.shared=true;
               return count(addbits,0,BLOCK_SIZE-1);
            }
            else{
               target.bits=targetbits=emptyBits(null);
               target.shared=false;
               return Bitset.add(targetbits,addbits,0,BLOCK_SIZE-1,inv);
            }
         }
         else{
            if(target.shared) targetbits=copyBits(target);
            return Bitset.add(targetbits,addbits,0,BLOCK_SIZE-1,inv);
         }
      }
   }
   
   final static int subtract(Block[] targets,Block[] subtrahends,int from,int to,boolean inv){
//System.out.println("Block.subtract(Block[],Block[],"+inv+"):");
      int s=0;
      for(int i=from;i<=to;i++){
//System.out.println("   "+i+": ");
         
         Block target=targets[i];
         if(target==null || (!target.isFull && target.bits==null)) continue;
//System.out.println("     target="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+ (target.isFull? BLOCK_SIZE: count(target.bits,0,BLOCK_SIZE-1))+"}"));
         
         Block subtrahend=subtrahends[i];
//System.out.println("     subtrahend="+(subtrahend==null? "null": i==0? CharacterClass.stringValue0(subtrahend.bits): "{"+(subtrahend.isFull? BLOCK_SIZE: count(subtrahend.bits,0,BLOCK_SIZE-1))+"}"));
         
         if(subtrahend==null){
            if(!inv) continue;
            else{
               if(target.isFull){
                  s-=BLOCK_SIZE;
               }
               else{
                  s-=count(target.bits,0,BLOCK_SIZE-1);
               }
               target.isFull=false;
               target.bits=null;
               target.shared=false;
            }
         }
         else{
            s+=subtract(target,subtrahend,inv);
         }
//System.out.println("     result="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+ (target.isFull? BLOCK_SIZE: target.bits==null? 0: count(target.bits,0,BLOCK_SIZE-1))+"}"));
//System.out.println("     s="+s);
      }
//System.out.println("   s="+s);
      return s;
   }
   
   private final static int subtract(Block target,Block subtrahend,boolean inv){
      boolean[] targetbits,subbits;
//System.out.println("subtract(Block,Block,"+inv+")");
      //there is provided that target.isFull or target.bits!=null
      if(subtrahend.isFull){
         if(inv) return 0;
         int s=0;
         if(target.isFull){
            s=BLOCK_SIZE;
         }
         else{
            s=count(target.bits,0,BLOCK_SIZE-1);
         }
         target.isFull=false;
         target.bits=null;
         target.shared=false;
         return s;
      }
      else if((subbits=subtrahend.bits)==null){
         if(!inv) return 0;
         int s=0;
         if(target.isFull){
            s=BLOCK_SIZE;
         }
         else{
            s=count(target.bits,0,BLOCK_SIZE-1);
         }
         target.isFull=false;
         target.bits=null;
         target.shared=false;
         return s;
      }
      else{
         if(target.isFull){
            boolean[] bits=fullBits(target.bits);
            int s=Bitset.subtract(bits,subbits,0,BLOCK_SIZE-1,inv);
            target.isFull=false;
            target.shared=false;
            target.bits=bits;
            return s;
         }
         else{
            if(target.shared) targetbits=copyBits(target);
            else targetbits=target.bits;
            return Bitset.subtract(targetbits,subbits,0,BLOCK_SIZE-1,inv);
         }
      }
   }
   
   private static boolean[] copyBits(Block block){
      boolean[] bits=new boolean[BLOCK_SIZE];
      System.arraycopy(block.bits,0,bits,0,BLOCK_SIZE);
      block.bits=bits;
      block.shared=false;
      return bits;
   }
   
   private static boolean[] fullBits(boolean[] bits){
      if(bits==null) bits=new boolean[BLOCK_SIZE];
      System.arraycopy(FULL_BITS,0,bits,0,BLOCK_SIZE);
      return bits;
   }
   
   private static boolean[] emptyBits(boolean[] bits){
      if(bits==null) bits=new boolean[BLOCK_SIZE];
      else System.arraycopy(EMPTY_BITS,0,bits,0,BLOCK_SIZE);
      return bits;
   }
   
   final static int count(boolean[] arr, int from, int to){
      int s=0;
      for(int i=from;i<=to;i++){
         if(arr[i]) s++;
      }
      return s;
   }
   
   final static boolean[][] toBitset2(Block[] blocks){
      int len=blocks.length;
      boolean[][] result=new boolean[len][];
      for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy