All Downloads are FREE. Search and download functionalities are using the official Maven repository.

annis.CommonHelper Maven / Gradle / Ivy

There is a newer version: 4.0.0-beta.4
Show newest version
/*
 * Copyright 2011 SFB 632.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;

import com.google.common.base.Joiner;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.corpus_tools.salt.SALT_TYPE;
import org.corpus_tools.salt.SaltFactory;
import org.corpus_tools.salt.common.SCorpus;
import org.corpus_tools.salt.common.SCorpusGraph;
import org.corpus_tools.salt.common.SDocument;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.SOrderRelation;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.STextualRelation;
import org.corpus_tools.salt.common.SToken;
import org.corpus_tools.salt.common.SaltProject;
import org.corpus_tools.salt.core.GraphTraverseHandler;
import org.corpus_tools.salt.core.SAnnotation;
import org.corpus_tools.salt.core.SFeature;
import org.corpus_tools.salt.core.SGraph;
import org.corpus_tools.salt.core.SGraph.GRAPH_TRAVERSE_TYPE;
import org.corpus_tools.salt.core.SLayer;
import org.corpus_tools.salt.core.SNode;
import org.corpus_tools.salt.core.SRelation;
import org.corpus_tools.salt.graph.Label;
import org.corpus_tools.salt.util.DataSourceSequence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import annis.model.AnnisConstants;
import annis.service.objects.Match;

/**
 * Utilities class for non-gui operations.
 *
 * @author Thomas Krause {@literal }
 * @author Benjamin Weißenfels {@literal }
 */
public class CommonHelper {

    private final static Logger log = LoggerFactory.getLogger(CommonHelper.class);

    /**
     * Detects arabic characters in a string.
     *
     * 

* Every character is checked, if its bit representation lies between: * [1425, 1785] | [64286, 65019] | [65136, 65276] * *

* * @param str * The string to be checked. * @return returns true, if arabic characters are detected. */ public static boolean containsRTLText(String str) { if (str != null) { for (int i = 0; i < str.length(); i++) { char cc = str.charAt(i); // hebrew extended and basic, arabic basic and extendend if (cc >= 1425 && cc <= 1785) { return true; } // alphabetic presentations forms (hebrwew) to arabic presentation forms A else if (cc >= 64286 && cc <= 65019) { return true; } // arabic presentation forms B else if (cc >= 65136 && cc <= 65276) { return true; } } } return false; } /** * Calculates a {@link SOrderRelation} node chain of a {@link SDocumentGraph}. * *

* If no segmentation name is set, a list of sorted {@link SToken} will be * returned. *

* * @param segName * The segmentation name, for which the chain is computed. * @param graph * The salt document graph, which is traversed for the * segmentation. * * @return Returns a List of {@link SNode}, which is sorted by the * {@link SOrderRelation}. */ public static List getSortedSegmentationNodes(String segName, SDocumentGraph graph) { List token = new ArrayList(); if (segName == null) { // if no segmentation is given just return the sorted token list List unsortedToken = graph.getSortedTokenByText(); if (unsortedToken != null) { token.addAll(unsortedToken); } } else { // get the very first node of the order relation chain Set startNodes = new LinkedHashSet(); if (graph != null) { List orderRoots = graph.getRootsByRelation(SALT_TYPE.SORDER_RELATION); if (orderRoots != null) { // collect the start nodes of a segmentation chain of length 1 for (SNode n : orderRoots) { for (SRelation rel : n.getOutRelations()) { if (rel instanceof SOrderRelation) { // the type is the name of the relation if (segName.equals(rel.getType())) { startNodes.add(n); break; } } } } } } Set alreadyAdded = new HashSet(); // add all nodes on the order relation chain beginning from the start node for (SNode s : startNodes) { SNode current = s; while (current != null) { token.add(current); List> out = graph.getOutRelations(current.getId()); current = null; if (out != null) { for (SRelation e : out) { if (e instanceof SOrderRelation) { current = ((SOrderRelation) e).getTarget(); if (alreadyAdded.contains(current.getId())) { // abort if cycle detected current = null; } else { alreadyAdded.add(current.getId()); } break; } } } } } } return token; } public static Set getTokenAnnotationLevelSet(SDocumentGraph graph) { Set result = new TreeSet(); if (graph != null) { for (SToken n : graph.getTokens()) { for (SAnnotation anno : n.getAnnotations()) { result.add(anno.getQName()); } } } return result; } public static Set getTokenAnnotationLevelSet(SaltProject p) { Set result = new TreeSet(); for (SCorpusGraph corpusGraphs : p.getCorpusGraphs()) { for (SDocument doc : corpusGraphs.getDocuments()) { SDocumentGraph g = doc.getDocumentGraph(); result.addAll(getTokenAnnotationLevelSet(g)); } } return result; } /** * Gets the spannend/covered text for a token. This will get all * {@link STextualRelation} edges for a {@link SToken} from the * {@link SDocumentGraph} and calculates the appropiate substring from the * {@link STextualDS}. * * @param tok * The {@link SToken} which is overlapping the text sequence. * @return An empty {@link String} object, if there is no * {@link STextualRelation} */ public static String getSpannedText(SToken tok) { SGraph graph = tok.getGraph(); List> edges = graph.getOutRelations(tok.getId()); for (SRelation e : edges) { if (e instanceof STextualRelation) { STextualRelation textRel = (STextualRelation) e; return textRel.getTarget().getText().substring(textRel.getStart(), textRel.getEnd()); } } return ""; } /** * Checks a {@link SNode} if it is member of a specific {@link SLayer}. * * @param layerName * Specifies the layername to check. * @param node * Specifies the node to check. * @return true - it is true when the name of layername corresponds to the name * of any label of the SNode. */ public static boolean checkSLayer(String layerName, SNode node) { // robustness if (layerName == null || node == null) { return false; } Set sLayers = node.getLayers(); if (sLayers != null) { for (SLayer l : sLayers) { Collection





© 2015 - 2024 Weber Informatics LLC | Privacy Policy