org.jpmml.evaluator.TextMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pmml-evaluator Show documentation
Show all versions of pmml-evaluator Show documentation
JPMML class model evaluator
The newest version!
/*
* Copyright (c) 2021 Villu Ruusmann
*
* This file is part of JPMML-Evaluator
*
* JPMML-Evaluator is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* JPMML-Evaluator is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with JPMML-Evaluator. If not, see .
*/
package org.jpmml.evaluator;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.dmg.pmml.PMMLObject;
import org.dmg.pmml.TextIndex;
/**
* @see TextIndex#getWordRE()
*/
public class TextMatcher extends TextTokenizer {
public TextMatcher(String wordRE, PMMLObject context){
this(RegExUtil.compile(wordRE, context));
}
public TextMatcher(Pattern pattern){
super(pattern);
}
@Override
public TokenizedString tokenize(String string){
Pattern pattern = getPattern();
if(("").equals(string)){
return TokenizedString.EMPTY;
}
Matcher matcher = pattern.matcher(string);
if(!matcher.find()){
return TokenizedString.EMPTY;
}
List tokens = new ArrayList<>(Math.max(string.length() / 4, 16));
do {
tokens.add(matcher.group());
} while(matcher.find());
return new TokenizedString(tokens);
}
}