All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ontotext.kim.model.AliasTextTransformer Maven / Gradle / Ivy

The newest version!
package com.ontotext.kim.model;

import org.apache.commons.collections.Transformer;

/**
 * This class implements specific text normalization logic used by the class
 * ParsingFrame. It is intended to remove insignificant specifics
 * of the stored/searched aliases before their comparison. Such specifics
 * are:
* - Multiple white spaces but no more than 10 at a time.
* - The case of letters (only in case-insensitive mode).
*
* This class is left open for addition of other normalization procedures * if such need arises. * * @author danko * */ public class AliasTextTransformer implements Transformer { private final boolean toLower; private boolean isWs(char c) { return Character.isWhitespace(c); } private boolean isUc(char c) { return Character.isUpperCase(c); } private char toLc(char c) { return Character.toLowerCase(c); } public Object transform(Object input) { if (input == null) return ""; String in = input.toString(); if (in.length() == 0) return ""; //trim int i=0, j=in.length()-1; while (i < j && isWs(in.charAt(i))) ++i; while (j > i && isWs(in.charAt(j))) --j; boolean changed = !(i==0 && j==in.length()-1); // remove consecutive whitespace but max 10 of them int mergedWS = 0; StringBuilder strip = null; if (changed) { // If change case is detected create the change buff strip = new StringBuilder(j-i+1); } for (int k = i; k<=j ; k++) { char c = in.charAt(k); boolean isWs = isWs(c); // This block detects the first change. If no change // criterion is fulfilled - no string manipulations are done if (!changed) { if (isWs) { if (mergedWS>0) // detects WS merger case changed=true; else mergedWS=1; } else { if (toLower && isUc(c)) // detects to-lower case changed = true; else mergedWS=0; } if (changed) { // If change case is detected create the change buff strip = new StringBuilder(j-i+1); // append the string so far strip.append(in.substring(i,k)); } else continue; } if (isWs && mergedWS<10) { if (mergedWS < 1) strip.append(c); mergedWS++; } else { if (toLower) strip.append(toLc(c)); else strip.append(c); mergedWS=0; } } return (changed)? strip.toString(): in; } public AliasTextTransformer(boolean toLower) { super(); this.toLower = toLower; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy