All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.epam.deltix.util.text.WordMatcherBuilder Maven / Gradle / Ivy

/*
 * Copyright 2021 EPAM Systems, Inc
 *
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership. Licensed under the Apache License,
 * Version 2.0 (the "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.epam.deltix.util.text;

import com.epam.deltix.util.collections.EmptyEnumeration;
import java.util.*;

/**
 *  Builds a decision tree for matching words from the given set.
 *  This matches about 13,600 times faster than java regular expressions.
 */
public class WordMatcherBuilder implements WordMatcher {
    private Node        mRoot = new Node ();
    private int         mMaxLength = -1;
    
    public WordMatcherBuilder () {
    }
    
    public WordMatcherBuilder (Collection  css) {
        for (CharSequence cs : css)
            add (cs);
    }
    

    public void             add (CharSequence s) {
        mMaxLength = Math.max (mMaxLength, s.length ());        
        mRoot.add (s, 0);
    } 
    
    public void             dump () {
        mRoot.dump ("");
    }
    
    WordMatcher             compile32 () {           
        return (new WordMatcher32 (mRoot, mMaxLength));
    }
    
    public WordMatcher      compile () {           
        try {
            return (new WordMatcher16 (mRoot, mMaxLength));
        } catch (WordMatcher16.CodeTooBigException x) {
            return (new WordMatcher32 (mRoot, mMaxLength));
        }                
    }
    
    /**
     *  A relatively slow, interpreted version of matching logic. Used for testing.
     *  @param s        String to match
     *  @return         Whether it matches the vocabulary.
     */
    public boolean          matches (CharSequence s) {
        return (matches (s, 0, s.length ()));
    }
    
    /**
     *  A relatively slow, interpreted version of matching logic. Used for testing.
     *  @param s        String to match
     *  @return         Whether it matches the vocabulary.
     */
    public boolean          matches (CharSequence s, int offset, int length) {
        return (mRoot.match (s, offset, length));
    }
    
    /**
     *  A relatively slow, interpreted version of matching logic. Used for testing.
     *  @param bytes    String to match
     *  @return         Whether it matches the vocabulary.
     */
    public boolean          matches (byte [] bytes, int offset, int length) {
        return (mRoot.match (bytes, offset, length));
    }    
    
    public Enumeration      vocabulary () {
        if (mMaxLength < 0)
            return (new EmptyEnumeration  ());
        
        return (new NodeVocabularyEnumeration (mRoot, mMaxLength));
    }    
    
    public static void main (String [] args) {
        WordMatcherBuilder  wmb = new WordMatcherBuilder ();
        
        wmb.add ("");
        wmb.add ("a");
        wmb.add ("ba");
        wmb.add ("bx");
        wmb.add ("ab");
        wmb.add ("ass");
        
        WordMatcher m = wmb.compile32 ();
        
        for (Enumeration  e = m.vocabulary (); e.hasMoreElements (); ) {
            CharSequence cs = e.nextElement ();
            
            System.out.println (cs);     
            System.out.println (m.matches(cs));
        }
        
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy