All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpmml.translator.TextIndexUtil Maven / Gradle / Ivy

/*
 * Copyright (c) 2020 Villu Ruusmann
 *
 * This file is part of JPMML-Transpiler
 *
 * JPMML-Transpiler is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * JPMML-Transpiler is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with JPMML-Transpiler.  If not, see .
 */
package org.jpmml.translator;

import java.util.Map;

import com.sun.codemodel.JAssignment;
import com.sun.codemodel.JExpression;
import com.sun.codemodel.JInvocation;
import com.sun.codemodel.JVar;
import org.dmg.pmml.DataType;
import org.dmg.pmml.DerivedField;
import org.dmg.pmml.OpType;
import org.dmg.pmml.TextIndex;
import org.jpmml.evaluator.TextUtil;
import org.jpmml.evaluator.TokenizedString;

public class TextIndexUtil {

	private TextIndexUtil(){
	}

	static
	public TextIndex toLocalTextIndex(TextIndex textIndex, String name){
		String wordRE = textIndex.getWordRE();
		String wordSeparatorCharacterRE = textIndex.getWordSeparatorCharacterRE();

		if(wordRE != null){
			wordSeparatorCharacterRE = null;
		}

		TextIndex localTextIndex = new TextIndex(name, null)
			.setLocalTermWeights(textIndex.getLocalTermWeights())
			.setCaseSensitive(textIndex.isCaseSensitive())
			.setMaxLevenshteinDistance(textIndex.getMaxLevenshteinDistance())
			.setCountHits(textIndex.getCountHits())
			.setWordRE(wordRE)
			.setWordSeparatorCharacterRE(wordSeparatorCharacterRE)
			.setTokenize(textIndex.isTokenize());

		if(textIndex.hasTextIndexNormalizations()){
			(localTextIndex.getTextIndexNormalizations()).addAll(textIndex.getTextIndexNormalizations());
		}

		return localTextIndex;
	}

	static
	public JExpression computeTermFrequencyTable(JVar assignmentTargetVar, TextIndex textIndex, JExpression textIndexExpr, JExpression vocabularyExpr, int maxLength, TranslationContext context){
		// XXX
		FieldInfo textFieldInfo = new FieldInfo(new DerivedField(textIndex.requireTextField(), OpType.CATEGORICAL, DataType.STRING, null));

		StringRef textRef = (StringRef)context.ensureOperable(textFieldInfo, (method) -> true);

		JVar textVar = (JVar)textRef.getExpression();

		if(textIndex.hasTextIndexNormalizations()){
			JInvocation textNormalizationInvocation = context.staticInvoke(TextUtil.class, "normalize", textIndexExpr, textVar);

			context.add((JAssignment)textVar.assign(textNormalizationInvocation));
		}

		JInvocation textTokenizationInvocation = context.staticInvoke(TextUtil.class, "tokenize", textIndexExpr, textVar);

		JVar textTokensVar = context.declare(context.ref(TokenizedString.class), textVar.name() + "Tokens", textTokenizationInvocation);

		JInvocation termFrequencyTableInvocation = context.staticInvoke(TextUtil.class, "termFrequencyTable", textIndexExpr, textTokensVar, vocabularyExpr, maxLength);

		if(assignmentTargetVar != null){
			context.add((JAssignment)assignmentTargetVar.assign(termFrequencyTableInvocation));

			return termFrequencyTableInvocation;
		}

		JVar termFrequencyTableVar = context.declare(context.genericRef(Map.class, TokenizedString.class, Integer.class), textVar.name() + "FrequencyTable", termFrequencyTableInvocation);

		return termFrequencyTableVar;
	}

	static
	public FunctionInvocation.Tf asTf(FunctionInvocation functionInvocation){

		if(functionInvocation instanceof FunctionInvocation.Tf){
			FunctionInvocation.Tf tf = (FunctionInvocation.Tf)functionInvocation;

			return tf;
		} else

		if(functionInvocation instanceof FunctionInvocation.TfIdf){
			FunctionInvocation.TfIdf tfIdf = (FunctionInvocation.TfIdf)functionInvocation;
			FunctionInvocation.Tf tf = tfIdf.getTf();

			return tf;
		}

		throw new IllegalArgumentException();
	}

	static
	public FunctionInvocation.TfIdf asTfIdf(FunctionInvocation functionInvocation){

		if(functionInvocation instanceof FunctionInvocation.Tf){
			FunctionInvocation.Tf tf = (FunctionInvocation.Tf)functionInvocation;

			return new FunctionInvocation.TfIdf(){

				@Override
				public FunctionInvocation.Tf getTf(){
					return tf;
				}

				@Override
				public Number getWeight(){
					return 1;
				}
			};
		} else

		if(functionInvocation instanceof FunctionInvocation.TfIdf){
			FunctionInvocation.TfIdf tfIdf = (FunctionInvocation.TfIdf)functionInvocation;

			return tfIdf;
		}

		throw new IllegalArgumentException();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy