All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jhotdraw.samples.teddy.regex.Matcher Maven / Gradle / Ivy

The newest version!
/*
 * @(#)Matcher.java
 *
 * Copyright (c) 2007 by the original authors of JHotDraw and all its
 * contributors. All rights reserved.
 *
 * You may not use, copy or modify this file, except in compliance with the 
 * license agreement you entered into with the copyright holders. For details
 * see accompanying license terms.
 */
package org.jhotdraw.samples.teddy.regex;

import javax.swing.text.*;

/**
 * Searches for an occurence of a case (in)sensitive text on a document.
 * This is a rather slow implementation that does not use advanced techniques
 * such as Boyer-Moore.
 *
 * @author  Werner Randelshofer
 * @version $Id: Matcher.java 718 2010-11-21 17:49:53Z rawcoder $
 */
public class Matcher {
    /**
     * The document to be examined.
     */
    private Document document;
    /**
     * The string to be matched.
     */
    private String findString;
    /**
     * The start index for the next findNext operation.
     */
    private int startIndex;
    
    /**
     * The array of lower case matching chars.
     */
    private char[] matchLowerCase;
    
    /**
     * The array of upper case matching chars.
     */
    private char[] matchUpperCase;
    
    /**
     * The match type.
     */
    private MatchType matchType;
    
    /**
     * Creates a new instance of Matcher which
     * performs a case sensitive search.
     *
     * @param document The document to be examined
     * @param findString The string to be searched.
     */
    public Matcher(Document document, String findString) {
        this(document, findString, true, MatchType.CONTAINS);
    }
    
    /**
     * Creates a new instance of Matcher
     *
     * @param document The document to be examined
     * @param findString The string to be searched.
     * @param matchCase Set to true for case sensitive search.
     * @param matchType Sets the match type.
     */
    public Matcher(Document document, String findString, boolean matchCase, MatchType matchType) {
        this.document = document;
        this.findString = findString;
        startIndex = 0;
        
        // Convert to chars for efficiency
        if (matchCase) {
            matchLowerCase = matchUpperCase = findString.toCharArray();
        } else {
            matchUpperCase = findString.toUpperCase().toCharArray();
            matchLowerCase = findString.toLowerCase().toCharArray();
        }
        
        this.matchType = matchType;
    }
    
    public String getFindString() {
        return findString;
    }
    
    public boolean isMatchCase() {
        return matchLowerCase == matchUpperCase;
    }
    public MatchType getMatchType() {
        return matchType;
    }
    
    /**
     * Sets the start index for the findNext(), findPrevious() methods.
     */
    public void setStartIndex(int newValue) {
        startIndex = newValue;
    }
    
    /**
     * Resets this matcher and then attempts to find the next
     * subsequence of the input sequence that matches the pattern,
     * starting at the specified index.
     *
     * @param startIndex the index from which to start the search.
     * @return the index of the first occurrence of the search string,
     * starting at the specified offset, or -1 if no occurrence was found.
     */
    public int findNext(int startIndex) {
        this.startIndex = startIndex;
        return findNext();
    }
    /**
     * Attempts to find the next subsequence of the
     * input sequence that matches the pattern.
     * 

* This method starts at the beginning of * the input sequence or, if a previous invocation * of the method was successful and the matcher has not * since been reset, at the first character not matched by * the previous match. * * @return the index of the first occurrence of the search string, * starting at the specified offset, or -1 if no occurrence was found. */ public int findNext() { // Don't match empty strings and don't match if we are at the end of the document. if (findString.length() == 0 || document.getLength() - findString.length() < startIndex) { return -1; } try { int nextMatch = 0; // index of next matching character // Iterate through all segments of the document starting from offset Segment text = new Segment(); text.setPartialReturn(true); int offset = startIndex; int nleft = document.getLength() - startIndex; while (nleft > 0) { document.getText(offset, nleft, text); // Iterate through the characters in the current segment char next = text.first(); for (text.first(); next != Segment.DONE; next = text.next()) { // Check if the current character matches with the next // search character. char current = text.current(); if (current == matchUpperCase[nextMatch] || current == matchLowerCase[nextMatch]) { nextMatch++; // Did we match all search characters? if (nextMatch == matchLowerCase.length) { int foundIndex = text.getIndex() - text.getBeginIndex() + offset - matchLowerCase.length + 1; if (matchType == MatchType.CONTAINS) { return foundIndex; // break; <- never reached } else if (matchType == MatchType.STARTS_WITH) { if (! isWordChar(foundIndex - 1)) { return foundIndex; } } else if (matchType == MatchType.FULL_WORD) { if (! isWordChar(foundIndex - 1) && ! isWordChar(foundIndex + matchLowerCase.length)) { return foundIndex; } } nextMatch = 0; } } else { nextMatch = 0; } } // Move forward to the next segment nleft -= text.count; offset += text.count; } return -1; } catch (BadLocationException e) { throw new IndexOutOfBoundsException(); } } /** * Resets this matcher and then attempts to find the previous * subsequence of the input sequence that matches the pattern, * starting at the specified index. * * @param startIndex the index from which to start the search. * @return the index of the first occurrence of the search string, * starting at the specified offset, or -1 if no occurrence was found. */ public int findPrevious(int startIndex) { this.startIndex = startIndex; return findPrevious(); } /** * Attempts to find the previous subsequence of the * input sequence that matches the pattern. *

* This method starts at the beginning of * the input sequence or, if a previous invocation * of the method was successful and the matcher has not * since been reset, at the first character not matched by * the previous match. * * @return the index of the first occurrence of the search string, * starting at the specified offset, or -1 if no occurrence was found. */ public int findPrevious() { // Don't match empty strings and don't match if we are at the beginning of the document. if (findString.length() == 0 || startIndex < findString.length() - 1) { //System.out.println("too close to start"); return -1; } try { int nextMatch = matchLowerCase.length - 1; // index of next matching character // For simplicity, we request all text of the document in a single // segment. Segment text = new Segment(); text.setPartialReturn(false); document.getText(0, startIndex + 1, text); // Iterate through the characters in the current segment char previous = text.last(); //System.out.println("previus isch "+previous); for (text.last(); previous != Segment.DONE; previous = text.previous()) { // Check if the current character matches with the next // search character. char current = text.current(); if (current == matchUpperCase[nextMatch] || current == matchLowerCase[nextMatch]) { nextMatch--; //System.out.println("matched "+nextMatch); // Did we match all search characters? if (nextMatch == -1) { int foundIndex = text.getIndex() - text.getBeginIndex(); //System.out.println("found index:"+foundIndex); if (matchType == MatchType.CONTAINS) { return foundIndex; } else if (matchType == MatchType.STARTS_WITH) { if (! isWordChar(foundIndex - 1)) { return foundIndex; } } else if (matchType == MatchType.FULL_WORD) { if (! isWordChar(foundIndex - 1) && ! isWordChar(foundIndex + matchLowerCase.length)) { return foundIndex; } } nextMatch = matchLowerCase.length - 1; } } else { nextMatch = matchLowerCase.length - 1; } } return -1; } catch (BadLocationException e) { throw new IndexOutOfBoundsException(); } } /** * Resets the startIndex of the matcher to 0. */ public void reset() { startIndex = 0; } private boolean isWordChar(int index) { try { char ch = document.getText(index, 1).charAt(0); return Character.isLetterOrDigit(ch); } catch (BadLocationException e) { return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy