![JAR search and dependency download from the Maven repository](/logo.png)
org.cogroo.util.TextUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cogroo-ann Show documentation
Show all versions of cogroo-ann Show documentation
An API with pipes and annotators for NLP
/**
* Copyright (C) 2012 cogroo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.util;
import java.util.Arrays;
import java.util.List;
import org.cogroo.config.Analyzers;
import org.cogroo.text.Document;
import org.cogroo.text.Sentence;
import org.cogroo.text.Token;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
/**
* The TextUtils
class deals with the code prints.
*/
public class TextUtils {
public static String[] tokensToString(List tokens) {
String[] tokensString = new String[tokens.size()];
for (int i = 0; i < tokens.size(); i++) {
tokensString[i] = tokens.get(i).getLexeme();
}
return tokensString;
}
public static String[][] additionalContext(List tokens,
List analyzers) {
String[][] additionalContext = new String[analyzers.size()][tokens.size()];
for (int i = 0; i < analyzers.size(); i++) {
for (int j = 0; j < tokens.size(); j++) {
Object object = tokens.get(j).getAdditionalContext(analyzers.get(i));
if (object == null)
additionalContext[i][j] = null;
else
additionalContext[i][j] = (String) object;
}
}
return additionalContext;
}
/**
* @return the String
to be printed
*/
public static String nicePrint(Document document) {
boolean printAdditionalContext = false;
StringBuilder output = new StringBuilder();
output.append("Document text: ").append(document.getText()).append("\n\n");
if (document.getSentences() != null) {
int cont = 0;
for (Sentence sentence : document.getSentences()) {
cont++;
output.append("{Sentence ").append(cont).append(": ")
.append(sentence.getText()).append("\n");
List tokens = sentence.getTokens();
String format;
Joiner joiner = Joiner.on(", ");
if (tokens != null) {
String[] lexemes = new String[tokens.size()];
String[] posTags = new String[tokens.size()];
String[] features = new String[tokens.size()];
String[] lemmas = new String[tokens.size()];
String[] chunks = new String[tokens.size()];
String[] schunks = new String[tokens.size()];
output.append(" (token, class tag, feature tag, lexeme, chunks, function)\n");
for (int i = 0; i < tokens.size(); i++) {
Token t = tokens.get(i);
lexemes[i] = Strings.nullToEmpty(t.getLexeme());
posTags[i] = Strings.nullToEmpty(t.getPOSTag());
features[i] = Strings.nullToEmpty(t.getFeatures());
if (t.getLemmas() != null)
lemmas[i] = joiner.join(t.getLemmas());
else
lemmas[i] = "";
String head = "";
if(t.isChunkHead()) {
head = "*";
}
chunks[i] = t.getChunkTag() + head;
schunks[i] = t.getSyntacticTag();
}
format = " | %-" + maxSize(lexemes) + "s | %-" + maxSize(posTags)
+ "s | %-" + maxSize(features) + "s | %-" + maxSize(lemmas)
+ "s | %-" + maxSize(chunks) + "s | %-" + maxSize(schunks)
+ "s |\n";
for (int i = 0; i < tokens.size(); i++) {
output.append(String.format(format, lexemes[i], posTags[i],
features[i], lemmas[i], chunks[i], schunks[i]));
}
output.append("\n");
if (printAdditionalContext) {
String[][] addcontext = TextUtils.additionalContext(tokens, Arrays
.asList(Analyzers.CONTRACTION_FINDER, Analyzers.NAME_FINDER));
for (String[] line : addcontext) {
for (String col : line) {
output.append("[");
if (col == null) {
output.append("-");
} else {
output.append(col);
}
output.append("]");
}
output.append("\n");
}
}
}
output.append(" Syntax tree: \n ");
output.append(sentence.asTree().toSyntaxTree());
output.append("\n}\n");
}
}
return output.toString();
}
private static int maxSize(String[] string) {
int size = 1;
for (String string2 : string) {
if (size < string2.length()) {
size = string2.length();
}
}
return size;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy