All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.dkm.pikes.resources.FrameNet Maven / Gradle / Ivy

Go to download

A collection of Java classes for accessing and querying a number of NLP resources.

The newest version!
package eu.fbk.dkm.pikes.resources;

import com.google.common.base.Charsets;
import com.google.common.collect.*;
import com.google.common.io.Resources;
import eu.fbk.utils.core.CommandLine;
import eu.fbk.utils.core.CommandLine.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.stream.XMLStreamException;
import java.io.*;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class FrameNet {

    private static final Logger LOGGER = LoggerFactory.getLogger(FrameNet.class);

    private static final Map> RELATIONS;

    static {
        try {
            final Map> map = Maps.newHashMap();
            for (final Relation relation : Relation.values()) {
                map.put(relation, ImmutableMultimap.builder());
            }
            for (final String line : Resources.readLines(
                    FrameNet.class.getResource("FrameNet.tsv"), Charsets.UTF_8)) {
                final String[] tokens = line.split("\t");
                final Relation relation = Relation.valueOf(tokens[0]);
                final String from = tokens[1];
                final String to = tokens[2];
                map.get(relation).put(from, to);
                if (relation == Relation.USES) {
                    map.get(Relation.IS_USED_BY).put(to, from);
                } else if (relation == Relation.INHERITS_FROM) {
                    map.get(Relation.IS_INHERITED_BY).put(to, from);
                } else if (relation == Relation.PRECEDES) {
                    map.get(Relation.IS_PRECEDED_BY).put(to, from);
                } else if (relation == Relation.PERSPECTIVE_ON) {
                    map.get(Relation.IS_PERSPECTIVIZED_IN).put(to, from);
                } else if (relation == Relation.SUBFRAME_OF) {
                    map.get(Relation.HAS_SUBFRAME).put(to, from);
                }
            }
            final ImmutableMap.Builder> mapBuilder = ImmutableMap
                    .builder();
            for (final Map.Entry> entry : map
                    .entrySet()) {
                mapBuilder.put(entry.getKey(), entry.getValue().build());
            }
            RELATIONS = mapBuilder.build();

        } catch (final IOException ex) {
            throw new Error("Could not load eu.fbk.dkm.pikes.resources.FrameNet data from classpath", ex);
        }
    }

    public static Set getRelatedFrames(final boolean recursive,
            final String sourceFrameID, final Relation... relations) {
        final Set ids = Sets.newHashSet();
        final List queue = Lists.newLinkedList();
        queue.add(sourceFrameID);
        while (!queue.isEmpty()) {
            final String id = queue.remove(0);
            for (final Relation relation : relations) {
                for (final String relatedID : RELATIONS.get(relation).get(id)) {
                    if (ids.add(relatedID) && recursive) {
                        queue.add(relatedID);
                    }
                }
            }
        }
        return ids;
    }

    public static void main(final String[] args) throws IOException, XMLStreamException {

        try {
            final CommandLine cmd = CommandLine
                    .parser()
                    .withName("eu.fbk.dkm.pikes.resources.FrameNet")
                    .withHeader("Generate a TSV file with indexed eu.fbk.dkm.pikes.resources.FrameNet data")
                    .withOption("f", "frames", "the directory containing frame definitions",
                            "DIR", Type.DIRECTORY_EXISTING, true, false, true)
                    .withOption("o", "output", "output file", "FILE", Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            final File dir = cmd.getOptionValue("f", File.class);
            final File output = cmd.getOptionValue("o", File.class);

            final Set lines = Sets.newHashSet();
            for (final File file : dir.listFiles()) {
                if (!file.getName().endsWith(".xml")) {
                    continue;
                }
                LOGGER.info("Processing {}", file);
                try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
                    String line = null;
                    String from = null;
                    Relation relation = null;
                    while ((line = reader.readLine()) != null) {
                        if (line.startsWith("") + 1;
                            final int end = line.indexOf('<', start);
                            final String to = line.substring(start, end).trim().replace(' ', '_');
                            if (relation == Relation.IS_USED_BY) {
                                lines.add(Relation.USES + "\t" + to + "\t" + from);
                            } else if (relation == Relation.IS_INHERITED_BY) {
                                lines.add(Relation.INHERITS_FROM + "\t" + to + "\t" + from);
                            } else if (relation == Relation.IS_PRECEDED_BY) {
                                lines.add(Relation.PRECEDES + "\t" + to + "\t" + from);
                            } else if (relation == Relation.IS_PERSPECTIVIZED_IN) {
                                lines.add(Relation.PERSPECTIVE_ON + "\t" + to + "\t" + from);
                            } else if (relation == Relation.HAS_SUBFRAME) {
                                lines.add(Relation.SUBFRAME_OF + "\t" + to + "\t" + from);
                            } else {
                                lines.add(relation + "\t" + from + "\t" + to);
                            }
                        }
                    }
                }
            }

            final List sortedLines = Ordering.natural().immutableSortedCopy(lines);
            try (Writer writer = new OutputStreamWriter(new BufferedOutputStream(
                    new FileOutputStream(output)), Charsets.UTF_8)) {
                for (final String line : sortedLines) {
                    writer.write(line);
                    writer.write('\n');
                }
            }

            LOGGER.info("Extracted {} relations", sortedLines.size());

        } catch (final Throwable ex) {
            CommandLine.fail(ex);
        }
    }

    public enum Relation {

        USES,

        IS_USED_BY,

        INHERITS_FROM,

        IS_INHERITED_BY,

        PRECEDES,

        IS_PRECEDED_BY,

        PERSPECTIVE_ON,

        IS_PERSPECTIVIZED_IN,

        SUBFRAME_OF,

        HAS_SUBFRAME,

        IS_CAUSATIVE_OF,

        IS_INCHOATIVE_OF,

        SEE_ALSO;

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy