All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.trees.ud.CoNLLUUtils Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

The newest version!
package edu.stanford.nlp.trees.ud;

import java.util.*;

/**
 * Utility functions for reading and writing CoNLL-U files.
 *
 * @author Sebastian Schuster
 */
public class CoNLLUUtils {

    /**
     * Parses the value of the feature column in a CoNLL-U file
     * and returns them in a HashMap with the feature names as keys
     * and the feature values as values.
     *
     * @param featureString
     * @return A HashMap with the feature values.
     */
    public static HashMap parseFeatures(String featureString) {
        HashMap features = new HashMap<>();
        if (! featureString.equals("_")) {
            String[] featValPairs = featureString.split("\\|");
            for (String p : featValPairs) {
                String[] featValPair = p.split("=");
                features.put(featValPair[0], featValPair[1]);
            }
        }
        return features;
    }

    /**
     * Converts a feature HashMap to a feature string to be used
     * in a CoNLL-U file.
     *
     * @return The feature string.
     */
    public static String toFeatureString(HashMap features) {
        StringBuffer sb = new StringBuffer();
        boolean first = true;
        if (features != null) {
            List sortedKeys = new ArrayList<>(features.keySet());
            Collections.sort(sortedKeys, new FeatureNameComparator());
            for (String key : sortedKeys) {
                if (!first) {
                    sb.append("|");
                } else {
                    first = false;
                }

                sb.append(key)
                        .append("=")
                        .append(features.get(key));

            }
        }

    /* Empty feature list. */
        if (first) {
            sb.append("_");
        }

        return sb.toString();
    }

    /**
     * Parses the value of the extra dependencies column in a CoNLL-U file
     * and returns them in a HashMap with the governor indices as keys
     * and the relation names as values.
     *
     * @param extraDepsString
     * @return A HashMap with the additional dependencies.
     */
    public static HashMap parseExtraDeps(String extraDepsString) {
        HashMap extraDeps = new HashMap<>();
        if ( ! extraDepsString.equals("_")) {
            String[] extraDepParts = extraDepsString.split("\\|");
            for (String extraDepString : extraDepParts) {
                int sepPos = extraDepString.indexOf(":");
                String reln = extraDepString.substring(sepPos + 1);
                String gov = extraDepString.substring(0, sepPos);
                extraDeps.put(gov, reln);
            }
        }
        return extraDeps;
    }

    /**
     * Converts an extra dependencies hash map to a string to be used
     * in a CoNLL-U file.
     *
     * @param extraDeps
     * @return The extra dependencies string.
     */
    public static String toExtraDepsString(HashMap extraDeps) {
        StringBuffer sb = new StringBuffer();
        boolean first = true;
        if (extraDeps != null) {
            List sortedKeys = new ArrayList<>(extraDeps.keySet());
            Collections.sort(sortedKeys);
            for (String key : sortedKeys) {
                if (!first) {
                    sb.append("|");
                } else {
                    first = false;
                }

                sb.append(key)
                        .append(":")
                        .append(extraDeps.get(key));
            }
        }
        /* Empty feature list. */
        if (first) {
            sb.append("_");
        }
        return sb.toString();
    }


    public static class FeatureNameComparator implements Comparator {

        @Override
        public int compare(String featureName1, String featureName2) {
            return featureName1.toLowerCase().compareTo(featureName2.toLowerCase());
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy