com.groupbyinc.common.jregex.Matcher Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of common-test Show documentation

${project.name}

There is a newer version: 198

/**
 * Copyright (c) 2001, Sergey A. Samokhodkin
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification, 
 * are permitted provided that the following conditions are met:
 * 
 * - Redistributions of source code must retain the above copyright notice, 
 * this list of conditions and the following disclaimer. 
 * - Redistributions in binary form 
 * must reproduce the above copyright notice, this list of conditions and the following 
 * disclaimer in the documentation and/or other materials provided with the distribution.
 * - Neither the name of jregex nor the names of its contributors may be used 
 * to endorse or promote products derived from this software without specific prior 
 * written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * @version 1.2_01
 */

package jregex;

import java.util.*;
import java.io.*;

/**
 * Matcher instance is an automaton that actually performs matching. It provides the following methods:
 *  searching for a matching substrings : matcher.find() or matcher.findAll();
 * 
 testing whether a text matches a whole pattern : matcher.matches();
 * 
 testing whether the text matches the beginning of a pattern : matcher.matchesPrefix();
 * 
 searching with custom options : matcher.find(int options)
 * 
 * Obtaining results

 * After the search succeded, i.e. if one of above methods returned true
 * one may obtain an information on the match:
 * 
 may check whether some group is captured : matcher.isCaptured(int);
 * 
 may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int);
 * 
 may obtain match contents as String : matcher.group(int).

 * The same way can be obtained the match prefix and suffix information.
 * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements.

 * Matcher objects are not thread-safe, so only one thread may use a matcher instance at a time.
 * Note, that Pattern objects are thread-safe(the same instanse may be shared between
 * multiple threads), and the typical tactics in multithreaded applications is to have one Pattern instance per expression(a singleton),
 * and one Matcher object per thread.
 */

public class Matcher implements MatchResult{
  /* Matching options*/
  /**
   * The same effect as "^" without REFlags.MULTILINE.
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_START=1;
   
  /**
   * The same effect as "\\G".
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_LASTMATCH=2;
   
  /**
   * The same effect as "$" without REFlags.MULTILINE.
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_END=4;
   
  /**
   * Experimental option; if a text ends up before the end of a pattern,report a match.
   * @see Matcher#find(int)
   */
   public static final int ACCEPT_INCOMPLETE=8;
   
   //see search(ANCHOR_START|...)
   private static Term startAnchor=new Term(Term.START);
   
   //see search(ANCHOR_LASTMATCH|...)
   private static Term lastMatchAnchor=new Term(Term.LAST_MATCH_END);
   
   private Pattern re;
   private int[] counters;
   private MemReg[] memregs;
   private LAEntry[] lookaheads;
   private int counterCount;
   private int memregCount;
   private int lookaheadCount;
   
   private char[] data;
   private int offset,end,wOffset,wEnd;
   private boolean shared;
   
   private SearchEntry top;           //stack entry
   private SearchEntry first;         //object pool entry
   private SearchEntry defaultEntry;  //called when moving the window
   
   private boolean called;
   
   private int minQueueLength;
   
   private String cache;
   
   //cache may be longer than the actual data
   //and contrariwise; so cacheOffset may have both signs.
   //cacheOffset is actually -(data offset).
   private int cacheOffset,cacheLength;   
   
   private MemReg prefixBounds,suffixBounds,targetBounds;
   
   Matcher(Pattern regex){
      this.re=regex;
      //int memregCount=(memregs=new MemReg[regex.memregs]).length;
      //for(int i=0;i0){
         MemReg[] memregs=new MemReg[memregCount];
         for(int i=0;i0) counters=new int[counterCount];
      
      if((lookaheadCount=regex.lookaheads)>0){
         LAEntry[] lookaheads=new LAEntry[lookaheadCount];
         for(int i=0;i
   *   Matcher m=new Pattern("\\w+").matcher(myString);
   *   if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix
   *


   * Resets current search position to zero.
   * @param m  - a matcher that is a source of data
   * @param groupId - which group to take data from
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public final void setTarget(Matcher m, int groupId){
      MemReg mr=m.bounds(groupId);
//System.out.println("setTarget("+m+","+groupId+")");
//System.out.println("   in="+mr.in);
//System.out.println("   out="+mr.out);
      if(mr==null) throw new IllegalArgumentException("group #"+groupId+" is not assigned");
      data=m.data;
      offset=mr.in;
      end=mr.out;
      cache=m.cache;
      cacheLength=m.cacheLength;
      cacheOffset=m.cacheOffset;
      if(m!=this){
         shared=true;
         m.shared=true;
      }
      init();
   }
   
   
  /**
   * Supplies a text to search in/match with.
   * Resets current search position to zero.
   * @param text - a data
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public void setTarget(String text){
      setTarget(text,0,text.length());
   }
   
  /**
   * Supplies a text to search in/match with, as a part of String.
   * Resets current search position to zero.
   * @param text - a data source
   * @param start - where the target starts
   * @param len - how long is the target
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public void setTarget(String text,int start,int len){
      char[] mychars=data;
      if(mychars==null || shared || mychars.lengthshared=false

   *   myMatcher.setTarget(myCharArray,x,y,false); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted
   *

* then we should expect the array contents to be changed on subsequent setTarget(..) operations. * Such method may yield some increase in perfomanse in the case of multiple setTarget() calls. * Resets current search position to zero. * @param text - a data source * @param start - where the target starts * @param len - how long is the target * @param shared - if

true: data are shared or used later, don't modify it; if false: possible modifications of the text on subsequent setTarget() calls are perceived and allowed.
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public final void setTarget(char[] text,int start,int len,boolean shared){
      cache=null;
      data=text;
      offset=start;
      end=start+len;
      this.shared=shared;
      init();
   }
   
   
  /**
   * Supplies a text to search in/match with through a stream.
   * Resets current search position to zero.
   * @param in - a data stream;
   * @param len - how much characters should be read; if len is -1, read the entire stream.
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   */
   public void setTarget(Reader in,int len)throws IOException{
      if(len<0){
         setAll(in);
         return;
      }
      char[] mychars=data;
      boolean shared=this.shared;
      if(mychars==null || shared || mychars.length=0){
         len-=c;
         count+=c;
         if(len==0) break;
      }
      setTarget(mychars,0,count,shared);
   }
   
   private void setAll(Reader in)throws IOException{
      char[] mychars=data;
      int free;
      boolean shared=this.shared;
      if(mychars==null || shared){
         mychars=new char[free=1024];
         shared=false;
      }
      else free=mychars.length;
      int count=0;
      int c;
      while((c=in.read(mychars,count,free))>=0){
         free-=c;
         count+=c;
         if(free==0){
            int newsize=count*3;
            char[] newchars=new char[newsize];
            System.arraycopy(mychars,0,newchars,0,count);
            mychars=newchars;
            free=newsize-count;
            shared=false;
         }
      }
      setTarget(mychars,0,count,shared);
   }
   
   private final String getString(int start,int end){
      String src=cache;
      if(src!=null){
         int co=cacheOffset;
         return src.substring(start-co,end-co);
      }
      int tOffset,tEnd,tLen=(tEnd=this.end)-(tOffset=this.offset);
      char[] data=this.data;
      if((end-start)>=(tLen/3)){
         //it makes sence to make a cache
         cache=src=new String(data,tOffset,tLen);
         cacheOffset=tOffset;
         cacheLength=tLen;
         return src.substring(start-tOffset,end-tOffset);
      }
      return new String(data,start,end-start);
   }
   
  /* Matching */
   
  /**
   * Tells whether the entire target matches the beginning of the pattern.
   * The whole pattern is also regarded as its beginning.

   * This feature allows to find a mismatch by examining only a beginning part of 
   * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target 
   * also couldn't match).

   * For example the following assertions yield true:   *   Pattern p=new Pattern("abcd"); 
   *   p.matcher("").matchesPrefix();
   *   p.matcher("a").matchesPrefix();
   *   p.matcher("ab").matchesPrefix();
   *   p.matcher("abc").matchesPrefix();
   *   p.matcher("abcd").matchesPrefix();
   * 
   * and the following yield false:   *   p.matcher("b").isPrefix();
   *   p.matcher("abcdef").isPrefix();
   *   p.matcher("x").isPrefix();
   * 
   * @return true if the entire target matches the beginning of the pattern
   */
   public final boolean matchesPrefix(){
      setPosition(0);
      return search(ANCHOR_START|ACCEPT_INCOMPLETE|ANCHOR_END);
   }
   
  /**
   * Just an old name for isPrefix().

   * Retained for backwards compatibility.
   * @deprecated Replaced by isPrefix()
   */
   public final boolean isStart(){
      return matchesPrefix();
   }
   
  /**
   * Tells whether a current target matches the whole pattern.
   * For example the following yields the true:   *   Pattern p=new Pattern("\\w+"); 
   *   p.matcher("a").matches();
   *   p.matcher("ab").matches();
   *   p.matcher("abc").matches();
   * 
   * and the following yields the false:   *   p.matcher("abc def").matches();
   *   p.matcher("bcd ").matches();
   *   p.matcher(" bcd").matches();
   *   p.matcher("#xyz#").matches();
   * 
   * @return whether a current target matches the whole pattern.
   */
   public final boolean matches(){
if(called) setPosition(0);
      return search(ANCHOR_START|ANCHOR_END);
   }
   
  /**
   * Just a combination of setTarget(String) and matches().
   * @param s the target string;
   * @return whether the specified string matches the whole pattern.
   */
   public final boolean matches(String s){
      setTarget(s);
      return search(ANCHOR_START|ANCHOR_END);
   }
   
  /**
   * Allows to set a position the subsequent find()/find(int) will start from.
   * @param pos the position to start from;
   * @see Matcher#find()
   * @see Matcher#find(int)
   */
   public void setPosition(int pos){
      wOffset=offset+pos;
      wEnd=-1;
      called=false;
      flush();
   }
   
  /**
   * Searches through a target for a matching substring, starting from just after the end of last match.
   * If there wasn't any search performed, starts from zero.
   * @return true if a match found.
   */
   public final boolean find(){
      if(called) skip();
      return search(0);
   }
   
  /**
   * Searches through a target for a matching substring, starting from just after the end of last match.
   * If there wasn't any search performed, starts from zero.
   * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE
   * @return true if a match found.
   */
   public final boolean find(int anchors){
      if(called) skip();
      return search(anchors);
   }
   
   
  /**
   * The same as  findAll(int), but with default behaviour;
   */
   public MatchIterator findAll(){
      return findAll(0);
   }
   
  /**
   * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position.
   */
   public MatchIterator findAll(final int options){
      //setPosition(0);
      return new MatchIterator(){
         private boolean checked=false;
         private boolean hasMore=false;
         public boolean hasMore(){
            if(!checked) check();
            return hasMore;
         }
         public MatchResult nextMatch(){
            if(!checked) check();
            if(!hasMore) throw new NoSuchElementException();
            checked=false;
            return Matcher.this;
         }
         private final void check(){
            hasMore=find(options);
            checked=true;
         }
         public int count(){
            if(!checked) check();
            if(!hasMore) return 0;
            int c=1;
            while(find(options))c++;
            checked=false;
            return c;
         }
      };
   }
   
  /**
   * Continues to search from where the last search left off.
   * The same as proceed(0).
   * @see Matcher#proceed(int)
   */
   public final boolean proceed(){
      return proceed(0);
   }
   
  /**
   * Continues to search from where the last search left off using specified options:   * Matcher m=new Pattern("\\w+").matcher("abc");
   * while(m.proceed(0)){
   *    System.out.println(m.group(0));
   * }
   * 
   * Output:   * abc
   * ab
   * a
   * bc
   * b
   * c
   * 
   * For example, let's find all odd nubmers occuring in a text:   *    Matcher m=new Pattern("\\d+").matcher("123");
   *    while(m.proceed(0)){
   *       String match=m.group(0);
   *       if(isOdd(Integer.parseInt(match))) System.out.println(match);
   *    }
   *    
   *    static boolean isOdd(int i){
   *       return (i&1)>0;
   *    }
   * 
   * This outputs:   * 123
   * 1
   * 23
   * 3
   * 
   * Note that using find() method we would find '123' only.
   * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring.
   */
   public final boolean proceed(int options){
//System.out.println("next() : top="+top);
      if(called){
         if(top==null){
            wOffset++;
         }
      }
      return search(0);
   }
   
  /**
   * Sets the current search position just after the end of last match.
   */
   public final void skip(){
      int we=wEnd;
      if(wOffset==we){ //requires special handling
         //if no variants at 'wOutside',advance pointer and clear
         if(top==null){ 
            wOffset++;
            flush();
         }
         //otherwise, if there exist a variant, 
         //don't clear(), i.e. allow it to match
         return;
      }
      else{
         if(we<0) wOffset=0;
         else wOffset=we;
      }
      //rflush(); //rflush() works faster on simple regexes (with a small group/branch number)
      flush();
   }
   
   private final void init(){
      //wOffset=-1;
//System.out.println("init(): offset="+offset+", end="+end);
      wOffset=offset;
      wEnd=-1;
      called=false;
      flush();
   }
   
  /**
   * Resets the internal state.
   */
   private final void flush(){
      top=null;
      defaultEntry.reset(0);
      
/*
int c=0;
SearchEntry se=first;
while(se!=null){
   c++;
   se=se.on;
}
System.out.println("queue: allocated="+c+", truncating to "+minQueueLength);
new Exception().printStackTrace();
*/
      
      first.reset(minQueueLength);
      //first.reset(0);
      for(int i=memregs.length-1;i>0;i--){
         MemReg mr=memregs[i];
         mr.in=mr.out=-1;
      }
      for(int i=memregs.length-1;i>0;i--){
         MemReg mr=memregs[i];
         mr.in=mr.out=-1;
      }
      called=false;
   }
   
   //reverse flush
   //may work significantly faster,
   //need testing
   private final void rflush(){
      SearchEntry entry=top;
      top=null;
      MemReg[] memregs=this.memregs;
      int[] counters=this.counters;
      while(entry!=null){
         SearchEntry next=entry.sub;
         SearchEntry.popState(entry,memregs,counters);
         entry=next;
      }
      SearchEntry.popState(defaultEntry,memregs,counters);
   }
   
  /**
   */
   public String toString(){
      return getString(wOffset,wEnd);
   }
   
   public Pattern pattern(){
      return re;
   }
   
   public String target(){
      return getString(offset,end);
   }
   
  /**
   */
   public char[] targetChars(){
      shared=true;
      return data;
   }
   
  /**
   */
   public int targetStart(){
      return offset;
   }
   
  /**
   */
   public int targetEnd(){
      return end;
   }
   
   public char charAt(int i){
      int in=this.wOffset;
      int out=this.wEnd;
      if(in<0 || out(mr.out-in)) throw new StringIndexOutOfBoundsException(""+i);
      return data[in+i];
   }
   
   public final int length(){
      return wEnd-wOffset;
   }
   
  /**
   */
   public final int start(){
      return wOffset-offset;
   }
   
  /**
   */
   public final int end(){
      return wEnd-offset;
   }
   
  /**
   */
   public String prefix(){
      return getString(offset,wOffset);
   }
   
  /**
   */
   public String suffix(){
      return getString(wEnd,end);
   }
   
  /**
   */
   public int groupCount(){
      return memregs.length;
   }
   
  /**
   */
   public String group(int n){
      MemReg mr=bounds(n);
      if(mr==null) return null;
      return getString(mr.in,mr.out);
   }
   
  /**
   */
   public String group(String name){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("<"+name+"> isn't defined");
      return group(id.intValue());
   }
   
  /**
   */
   public boolean getGroup(int n,TextBuffer tb){
      MemReg mr=bounds(n);
      if(mr==null) return false;
      int in;
      tb.append(data,in=mr.in,mr.out-in);
      return true;
   }
   
  /**
   */
   public boolean getGroup(String name,TextBuffer tb){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
      return getGroup(id.intValue(),tb);
   }
   
  /**
   */
   public boolean getGroup(int n,StringBuffer sb){
      MemReg mr=bounds(n);
      if(mr==null) return false;
      int in;
      sb.append(data,in=mr.in,mr.out-in);
      return true;
   }
   
  /**
   */
   public boolean getGroup(String name,StringBuffer sb){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
      return getGroup(id.intValue(),sb);
   }
   
  /**
   */
   public String[] groups(){
      MemReg[] memregs=this.memregs;
      String[] groups=new String[memregs.length];
      int in,out;
      MemReg mr;
      for(int i=0;i=0){
         mr=memregs[id];
      }
      else switch(id){
         case PREFIX:
            mr=prefixBounds;
            if(mr==null) prefixBounds=mr=new MemReg(PREFIX);
            mr.in=offset;
            mr.out=wOffset;
            break;
         case SUFFIX:
            mr=suffixBounds;
            if(mr==null) suffixBounds=mr=new MemReg(SUFFIX);
            mr.in=wEnd;
            mr.out=end;
            break;
         case TARGET:
            mr=targetBounds;
            if(mr==null) targetBounds=mr=new MemReg(TARGET);
            mr.in=offset;
            mr.out=end;
            break;
         default:
            throw new IllegalArgumentException("illegal group id: "+id+"; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX");
      }
//System.out.println("  mr=["+mr.in+","+mr.out+"]");
      int in;
      if((in=mr.in)<0 || mr.out=0 && wEnd>=wOffset;
   }
   
  /**
   */
   public final boolean isCaptured(int id){
      return bounds(id)!=null;
   }
   
  /**
   */
   public final boolean isCaptured(String groupName){
      Integer id=re.groupId(groupName);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+groupName+"\"");
      return isCaptured(id.intValue());
   }
   
  /**
   */
   public final int length(int id){
      MemReg mr=bounds(id);
      return mr.out-mr.in;
   }
   
  /**
   */
   public final int start(int id){
      return bounds(id).in-offset;
   }
   
  /**
   */
   public final int end(int id){
      return bounds(id).out-offset;
   }
   
   private final boolean search(int anchors){
      called=true;
      final int end=this.end;
      int offset=this.offset;
      char[] data=this.data;
      int wOffset=this.wOffset;
      int wEnd=this.wEnd;
      
      MemReg[] memregs=this.memregs;
      int[] counters=this.counters;
      LAEntry[] lookaheads=this.lookaheads;
      
      //int memregCount=memregs.length;
      //int cntCount=counters.length;
      int memregCount=this.memregCount;
      int cntCount=this.counterCount;
      
      SearchEntry defaultEntry=this.defaultEntry;
      SearchEntry first=this.first;
      SearchEntry top=this.top;
      SearchEntry actual=null;
      int cnt,regLen;
      int i;
      
      final boolean matchEnd=(anchors&ANCHOR_END)>0;
      final boolean allowIncomplete=(anchors&ACCEPT_INCOMPLETE)>0;
      
      Pattern re=this.re;
      Term root=re.root;
      Term term;
      if(top==null){
         if((anchors&ANCHOR_START)>0){
            term=re.root0;  //raw root
            root=startAnchor;
         }
         else if((anchors&ANCHOR_LASTMATCH)>0){
            term=re.root0;  //raw root
            root=lastMatchAnchor;
         }
         else{
            term=root;  //optimized root
         }
         i=wOffset;
         actual=first;
         SearchEntry.popState(defaultEntry,memregs,counters);
      }
      else{
         top=(actual=top).sub;
         term=actual.term;
         i=actual.index;
         SearchEntry.popState(actual,memregs,counters);
      }
      cnt=actual.cnt;
      regLen=actual.regLen;
      
      main:
      while(wOffset<=end){
         matchHere:
         for(;;){
     /*
     System.out.print("char: "+i+", term: ");
     System.out.print(term.toString());

     System.out.print(" // mrs:{");
     for(int dbi=0;dbiend) break;
                  }
                  term=term.next;
                  continue matchHere;
               }
               case Term.VOID:
                  term=term.next;
                  continue matchHere;
               
               case Term.CHAR:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end || data[i]!=term.c) break;
//System.out.println("CHAR: "+data[i]+", i="+i);
                  i++;
                  term=term.next;
                  continue matchHere;
               
               case Term.ANY_CHAR:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end) break;
                  i++;
                  term=term.next;
                  continue matchHere;
               
               case Term.ANY_CHAR_NE:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end || (c=data[i])=='\r' || c=='\n') break;
                  i++;
                  term=term.next;
                  continue matchHere;
               
               case Term.END:
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  break; 
                  
               case Term.END_EOL:  //perl's $
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else{
                     boolean matches=
                        i>=end |
                        ((i+1)==end && data[i]=='\n') |
                        ((i+2)==end && data[i]=='\r' && data[i+1]=='\n');
                        
                     if(matches){
                        term=term.next;
                        continue matchHere;
                     }
                     else break; 
                  }
                  
               case Term.LINE_END:
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else{
                     /*
                     if(((c=data[i])=='\r' || c=='\n') &&
                           (c=data[i-1])!='\r' && c!='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                     */
                     //5 aug 2001
                     if((c=data[i])=='\r' || c=='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                  }
                  break; 
                  
               case Term.START: //Perl's "^"
                  if(i==offset){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  //break; 
                  
                  //changed on 27-04-2002
                  //due to a side effect: if ALLOW_INCOMPLETE is enabled,
                  //the anchorStart moves up to the end and succeeds 
                  //(see comments at the last lines of matchHere, ~line 1830)
                  //Solution: if there are some entries on the stack ("^a|b$"),
                  //try them; otherwise it's a final 'no'
                  //if(top!=null) break;
                  //else break main;
                  
                  //changed on 25-05-2002
                  //rationale: if the term is startAnchor, 
                  //it's the root term by definition, 
                  //so if it doesn't match, the entire pattern 
                  //couldn't match too;
                  //otherwise we could have the following problem: 
                  //"c|^a" against "abc" finds only "a"
                  if(top!=null) break;
                  if(term!=startAnchor) break;
                  else break main;
                  
               case Term.LAST_MATCH_END:
                  if(i==wEnd){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  break main; //return false
                  
               case Term.LINE_START:
                  if(i==offset){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else if(i=end) break;
                  c=data[i];
                  if(!(c<=255 && term.bitset[c])^term.inverse) break;
                  i++;
                  term=term.next;
                  continue matchHere;
               }
               case Term.BITSET2:{
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end) break;
                  c=data[i];
                  boolean[] arr=term.bitset2[c>>8];
                  if(arr==null || !arr[c&255]^term.inverse) break;
                  i++;
                  term=term.next;
                  continue matchHere;
               }
               case Term.BOUNDARY:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[] bitset=term.bitset;
                  test1:{
                     int j=i-1;
                     //if(j=end) break test1;
                     if(j=end) break test2;
                     if(i>=end) break test2;
                     c= data[i];
                     ch2Meets= (c<256 && bitset[c]);
                  }
                  if(ch1Meets^ch2Meets^term.inverse){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else break;
               }
               case Term.UBOUNDARY:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[][] bitset2=term.bitset2;
                  test1:{
                     int j=i-1;
                     //if(j=end) break test1;
                     if(j>8];
                     ch1Meets= bits!=null && bits[c&0xff];
                  }
                  test2:{
                     //if(i=end) break test2;
                     if(i>=end) break test2;
                     c= data[i];
                     boolean[] bits=bitset2[c>>8];
                     ch2Meets= bits!=null && bits[c&0xff];
                  }
                  if(ch1Meets^ch2Meets^term.inverse){  //is boundary ^ inv
                     term=term.next;
                     continue matchHere;
                  }
                  else break;
               }
               case Term.DIRECTION:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[] bitset=term.bitset;
                  boolean inv=term.inverse;
//System.out.println("i="+i+", inv="+inv+", bitset="+CharacterClass.stringValue0(bitset));
                  int j=i-1;
                  //if(j>=offset && j=offset){
                     c= data[j];
                     ch1Meets= c<256 && bitset[c];
//System.out.println("    ch1Meets="+ch1Meets);
                  }
                  if(ch1Meets^inv) break;
                  
                  //if(i>=offset && i=offset && j=offset){
                     c= data[j];
                     boolean[] bits=bitset2[c>>8];
                     ch1Meets= bits!=null && bits[c&0xff];
                  }
                  if(ch1Meets^inv) break;
                  
                  //if(i>=offset && i>8];
                     ch2Meets= bits!=null && bits[c&0xff];
                  }
                  if(!ch2Meets^inv) break;
                  
                  term=term.next;
                  continue matchHere;
               }
               case Term.REG:{
                  MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  int rLen;
                  if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(rLen==0){
                     term=term.next;
                     continue matchHere;
                  }
                  
                  // don't prevent us from reaching the 'end'
                  if((i+rLen)>end) break;
                  
                  if(compareRegions(data,sampleOffset,i,rLen,end)){
                     i+=rLen;
                     term=term.next;
                     continue matchHere;
                  }
                  break;
               }
               case Term.REG_I:{
                  MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  int rLen;
                  if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(rLen==0){
                     term=term.next;
                     continue matchHere;
                  }
                  
                  // don't prevent us from reaching the 'end'
                  if((i+rLen)>end) break;
                  
                  if(compareRegionsI(data,sampleOffset,i,rLen,end)){
                     i+=rLen;
                     term=term.next;
                     continue matchHere;
                  }
                  break;
               }
               case Term.REPEAT_0_INF:{
//System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
                  //i+=(cnt=repeat(data,i,end,term.target));
                  if((cnt=repeat(data,i,end,term.target))<=0){
                     term=term.next;
                     continue;
                  }
                  i+=cnt;
                  
                  //branch out the backtracker (that is term.failNext, see Term.make*())
                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual=(top=actual).on;
                  if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case Term.REPEAT_MIN_INF:{
//System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
                  cnt=repeat(data,i,end,term.target);
                  if(cnt0 && compareRegions(data,i,sampleOffset,bitset,end)){
                     cnt++;
                     i+=bitset;
                     countBack--;
                  }
                  
                  if(cnt0){
                     cnt--;
                     i--;
                     actual.cnt=cnt;
                     actual.index=i;
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               
               case Term.BACKTRACK_MIN:
//System.out.println("<<");
                  cnt=actual.cnt;
                  if(cnt>term.minCount){
                     cnt--;
                     i--;
                     actual.cnt=cnt;
                     actual.index=i;
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               
               case Term.BACKTRACK_FIND_MIN:{
//System.out.print("<<<[cnt=");
                  cnt=actual.cnt;
//System.out.print(cnt+", minCnt=");
//System.out.print(term.minCount+", target=");
//System.out.print(term.target+"]");
                  int minCnt;
                  if(cnt>(minCnt=term.minCount)){
                     int start=i+term.distance;
                     if(start>end){
                        int exceed=start-end;
                        cnt-=exceed;
                        if(cnt<=minCnt) break;
                        i-=exceed;
                        start=end;
                     }
                     int back=findBack(data,i+term.distance,cnt-minCnt,term.target);
//System.out.print("[back="+back+"]");
                     if(back<0) break;
                     
                     //cnt-=back;
                     //i-=back;
                     if((cnt-=back)<=minCnt){
                        i-=back;
                        if(term.eat)i++;
                        term=term.next;
                        continue;
                     }
                     i-=back;
                     
                     actual.cnt=cnt;
                     actual.index=i;
                     
                     if(term.eat)i++;
                     
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               }
               
               case Term.BACKTRACK_FINDREG_MIN:{
//System.out.print("<<<[cnt=");
                  cnt=actual.cnt;
//System.out.print(cnt+", minCnt=");
//System.out.print(term.minCount+", target=");
//System.out.print(term.target);
//System.out.print("reg=<"+memregs[term.target.memreg].in+","+memregs[term.target.memreg].out+">]");
                  int minCnt;
                  if(cnt>(minCnt=term.minCount)){
                     int start=i+term.distance;
                     if(start>end){
                        int exceed=start-end;
                        cnt-=exceed;
                        if(cnt<=minCnt) break;
                        i-=exceed;
                        start=end;
                     }
                     MemReg mr=memregs[term.target.memreg];
                     int sampleOff=mr.in;
                     int sampleLen=mr.out-sampleOff;
                     //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
                     //int back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end);
                     //if(back<0) break;
                     /*@since 1.2*/
                     int back;
                     if(sampleOff<0 || sampleLen<0){ 
                     //the group is not def., as in the case of '(\w+)\1'
                     //treat as usual BACKTRACK_MIN
                        cnt--;
                        i--;
                        actual.cnt=cnt;
                        actual.index=i;
                        actual.term=term;
                        actual=(top=actual).on;
                        if(actual==null){
                           actual=new SearchEntry();
                           top.on=actual;
                           actual.sub=top;
                        }
                        term=term.next;
                        continue;
                     }
                     else if(sampleLen==0){
                        back=-1;
                     }
                     else{
                        back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end);
//System.out.print("[back="+back+"]");
                        if(back<0) break;
                     }
                     cnt-=back;
                     i-=back;
                     actual.cnt=cnt;
                     actual.index=i;
                     
                     if(term.eat)i+=sampleLen;
                     
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               }
               
               case Term.BACKTRACK_REG_MIN:
//System.out.println("<<");
                  cnt=actual.cnt;
                  if(cnt>term.minCount){
                     regLen=actual.regLen;
                     cnt--;
                     i-=regLen;
                     actual.cnt=cnt;
                     actual.index=i;
                     actual.term=term;
                     //actual.regLen=regLen;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               
               case Term.GROUP_IN:{
                  memreg=term.memreg;
                  //memreg=0 is a regex itself; we don't need to handle it
                  //because regex bounds already are in wOffset and wEnd
                  if(memreg>0){
                     //MemReg mr=memregs[memreg];
                     //saveMemregState((top!=null)? top: defaultEntry,memreg,mr);
                     //mr.in=i;
                     
                     memregs[memreg].tmp=i; //assume
                  }
                  term=term.next;
                  continue;
               }
               case Term.GROUP_OUT:
                  memreg=term.memreg;
                  //see above
                  if(memreg>0){
                     //if(term.saveState)saveMemregState((top!=null)? top: defaultEntry,memreg,memregs);
                     
                     MemReg mr=memregs[memreg];
                     SearchEntry.saveMemregState((top!=null)? top: defaultEntry,memreg,mr);
                     mr.in=mr.tmp; //commit
                     mr.out=i;
                  }
                  term=term.next;
                  continue;
               
               case Term.PLOOKBEHIND_IN:{
                  int tmp=i-term.distance;
                  if(tmp0;c--,p1--,p2--){
         if(arr[p1]!=arr[p2]){
//System.out.println(" : no");
         	  return false;
         }
      }
//System.out.println(" : yes");
      return true;
   }
   
   private static final boolean compareRegionsI(char[] arr, int off1, int off2, int len,int out){
      int p1=off1+len-1;
      int p2=off2+len-1;
      if(p1>=out || p2>=out){
         return false;
      }
      char c1,c2;
      for(int c=len;c>0;c--,p1--,p2--){
         if((c1=arr[p1])!=Character.toLowerCase(c2=arr[p2]) &&
            c1!=Character.toUpperCase(c2) &&
            c1!=Character.toTitleCase(c2)) return false;
      }
      return true;
   }
   
   //repeat while matches
   private static final int repeat(char[] data,int off,int out,Term term){
//System.out.print("off="+off+", out="+out+", term="+term);
      switch(term.type){
         case Term.CHAR:{
            char c=term.c;
            int i=off;
            while(i>8];
               if(arr!=null && arr[c&0xff]) break;
               else i++;
            }
            else while(i>8];
               if(arr!=null && arr[c&0xff]) i++;
               else break;
            }
            return i-off;
         }
      }
      throw new Error("this kind of term can't be quantified:"+term.type);
   }
   
   //repeat while doesn't match
   private static final int find(char[] data,int off,int out,Term term){
//System.out.print("off="+off+", out="+out+", term="+term);
      if(off>=out) return -1;
      switch(term.type){
         case Term.CHAR:{
            char c=term.c;
            int i=off;
            while(i>8];
               if(arr!=null && arr[c&0xff]) break;
               else i++;
            }
            else while(i>8];
               if(arr!=null && arr[c&0xff]) i++;
               else break;
            }
            return i-off;
         }
      }
      throw new IllegalArgumentException("can't seek this kind of term:"+term.type);
   }
   
   
   private static final int findReg(char[] data,int off,int regOff,int regLen,Term term,int out){
//System.out.print("off="+off+", out="+out+", term="+term);
      if(off>=out) return -1;
      int i=off;
      if(term.type==Term.REG){
         while(i255 || !arr[c]) break;
               if(i<=iMin) return -1;
            }
            return off-i;
         }
         case Term.BITSET2:{
            boolean[][] bitset2=term.bitset2;
            int i=off;
            char c;
            int iMin=off-maxCount;
            if(!term.inverse) for(;;){
               boolean[] arr=bitset2[(c=data[--i])>>8];
               if(arr!=null && arr[c&0xff]) break;
               if(i<=iMin) return -1;
            }
            else for(;;){
               boolean[] arr=bitset2[(c=data[--i])>>8];
               if(arr==null || arr[c&0xff]) break;
               if(i<=iMin) return -1;
            }
            return off-i;
         }
      }
      throw new IllegalArgumentException("can't find this kind of term:"+term.type);
   }
   
   private static final int findBackReg(char[] data,int off,int regOff,int regLen,int maxCount,Term term,int out){
      //assume that the cases when regLen==0 or maxCount==0 are handled by caller
      int i=off;
      int iMin=off-maxCount;
      if(term.type==Term.REG){
         /*@since 1.2*/
         char first=data[regOff];
         regOff++;
         regLen--;
         for(;;){
            i--;
            if(data[i]==first && compareRegions(data,i+1,regOff,regLen,out)) break;
            if(i<=iMin) return -1;
         }
      }
      else if(term.type==Term.REG_I){
         /*@since 1.2*/
         char c=data[regOff];
         char firstLower=Character.toLowerCase(c);
         char firstUpper=Character.toUpperCase(c);
         char firstTitle=Character.toTitleCase(c);
         regOff++;
         regLen--;
         for(;;){
            i--;
            if(((c=data[i])==firstLower || c==firstUpper || c==firstTitle) && compareRegionsI(data,i+1,regOff,regLen,out)) break;
            if(i<=iMin) return -1;
         }
         return off-i;
      }
      else throw new IllegalArgumentException("wrong findBackReg() target type :"+term.type);
      return off-i;
   }
   
   public String toString_d(){
      StringBuffer s=new StringBuffer();
      s.append("counters: ");
      s.append(counters==null? 0: counters.length);

      s.append("\r\nmemregs: ");
      s.append(memregs.length);
      for(int i=0;i0) on.reset(restQueue-1);
         else{
            this.on=null;
            on.sub=null;
         }
      }
      //sub=on=null;      
   }
}

class MemReg{
   int index;
   
   int in=-1,out=-1;
   int tmp=-1;  //for assuming at GROUP_IN
   
   MemReg(int index){
      this.index=index;
   }
   
   void reset(){
      in=out=-1;
   }
}

class LAEntry{
   int index;
   SearchEntry top,actual;
}    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api