All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ctakes.sideeffect.util.PatternMatch Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.sideeffect.util;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Regular expression pattern match class used in SideEffectAnnotaor:
 * Constructor: 
 * PatternMatch(String expr, String in, List kw):
 *   "expr" is an regular expression. Note that "KW" will be replaced by the list of kw argument;
 *   "in" is a string to match;
 *   "kw" is a list of keywords;
 *   
 * PatternMatch(String expr, String in): 
 *   "expr" is an regular expression;
 *   "in" is a string to match.	
 *   
 * @author Mayo Clinic
 *
 */
public class PatternMatch {
	private List keywords; 
	private String regex;
	private String input;    
    public Pattern pat;
    public Matcher mat;
    
    //expr: (PSE).*(KW).*(DRUG)
    public PatternMatch(String expr, String in, List kw) {
    	StringBuffer sb = new StringBuffer();
    	for(String s : kw) 
    		sb.append(s+"|");
    	
    	regex = expr.replaceAll("KW", sb.substring(0, sb.length()-1)); 
    	input = in;
    	keywords = kw;  
    	pat = Pattern.compile(regex);
    	mat = pat.matcher(input);
    }
    
    public PatternMatch(String expr, String in) {   	
    	regex = expr; 
    	input = in;
    	pat = Pattern.compile(regex);
    	mat = pat.matcher(input);
    }
        
    /**
     * Return true if finds "DRUG" in input.substring(begin, end)
     * 
     * @param begin
     * @param end
     */
    public boolean isDrugBetween(int begin, int end) {
    	return input.substring(begin, end).matches(".*.*");
    }
    
    /**
     * Return true if finds "DRUG" in input.substring(begin, end)
     * and assign offsets of DRUG to span
     * 
     * @param begin
     * @param end
     * @param span offset of the first DRUG in "input"
     * @return
     */
    public boolean isDrugBetween(int begin, int end, int[] span) {
       	Pattern p = Pattern.compile("()");
    	Matcher m = p.matcher(input.substring(begin, end));
    	while(m.find()) {
    		span[0] = m.start()+begin;
    		span[1] = m.end()+begin;
    		return true;
    	}
    	
    	return false;
    }
    
    /**
     * Return true if finds "PSE" in input.substring(begin, end)
     * 
     * @param begin
     * @param end
     */
    public boolean isPseBetween(int begin, int end) {
    	return input.substring(begin, end).matches(".*.*");
    }
    
    /**
     * Return true if finds "PSE" in input.substring(begin, end)
     * and assign offsets of PSE to span
     * 
     * @param begin
     * @param end
     * @param span offset of the first PSE in "input"
     * @return
     */
    public boolean isPseBetween(int begin, int end, int[] span) {
       	Pattern p = Pattern.compile("()");
    	Matcher m = p.matcher(input.substring(begin, end));
    	while(m.find()) {
    		span[0] = m.start()+begin;
    		span[1] = m.end()+begin;
    		return true;
    	}
    	
    	return false;
    }
    
    /**
     * Return true if exist other than "and" "or" "," (ignoring DRUG and PSE) 
     * in input.substring(begin, end)
     * 
     * @param begin
     * @param end
     * @return
     */
    public boolean isDistantBetween(int begin, int end) {
    	String s = input.substring(begin, end).replaceAll("|", "");
    	return !s.matches("(\\sand\\s)|(\\sor\\s)|(,)|(,\\s)|(\\s,\\s)");
    }       
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy