se.lth.cs.srl.util.Sentence2RDF Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of srl Show documentation
mate-tools Semantic Role Labeler
The newest version!
package se.lth.cs.srl.util;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.util.TreeSet;

import se.lth.cs.srl.corpus.Sentence;
import se.lth.cs.srl.corpus.Word;

/**
 *
 * @author pierre
 */
public class Sentence2RDF {

    /**
     * @param args the command line arguments
     */
    public Sentence sentence;
    public int inx;
//
//    public static void main(String[] args) throws IOException {
//        // To load all the triples in Sesame, set the Tomcat memory to 2g:
//        // 
//        /* A query
//         PREFIX rdf:
//         PREFIX gstruct:
//
//         SELECT ?sent ?s ?n ?id ?f {
//         ?sent rdf:type gstruct:Sentence.
//         ?sent gstruct:predicates ?s.
//         ?s gstruct:predsense "play.02".
//         ?s gstruct:args ?n.
//         ?n gstruct:argtype "AM-TMP".
//         ?n gstruct:id ?id.
//         ?sent gstruct:words ?n2.
//         ?n2 gstruct:id ?id.
//         ?n2 gstruct:form ?f
//         }
//         */
//        String file = "/Users/pierre/Projets/robotcad/Rosetta/Rosetta/semparser/corpora/train.50p.txt";
//        //String file = "/Users/pierre/Projets/robotcad/Rosetta/Rosetta/semparser/corpora/one_sentence.txt";
//        List sentences = CorpusReader.getCorpusCoNLL2009(new File(file));
//        Sentence2RDF sentence2rdf = new Sentence2RDF();
//        String prefix;
//        String grammar;
//        String semantics;
//        prefix = sentence2rdf.makePrefix();
//        System.out.println(prefix);
//
//        for (int i = 0; i < sentences.size(); i++) {
//            sentence2rdf = new Sentence2RDF(sentences.get(i), i);
//            grammar = sentence2rdf.makeGramTriples();
//            semantics = sentence2rdf.makeSemTriples();
//            if (sentence2rdf.sentence.getPredicates().isEmpty()) {
//                System.out.print(grammar);
//            } else {
//                System.out.print(grammar + ";\n" + semantics);
//            }
//            System.out.println(".");
//        }
//    }

//    public Sentence2RDF() {
//    }

    // Parameters: The sentence and its index in the document
    public Sentence2RDF(Sentence sentence, int inx) {
        this.sentence = sentence;
        this.inx = inx;
    }

//    public void printSentence() {
//        System.out.println(sentence);
//    }

//    public String makeGramTriples() {
//        String grammar = "gstruct:sentence_" + inx + "\t";
//        grammar += makeGramTriplesWithBlankNode();
//        return grammar;
//    }

//    public String makeGramTriplesWithBlankNode() {
//        String grammar = "rdf:type\t" + "gstruct:Sentence ;\n";
//        grammar += "\tgstruct:inx\t" + inx + " ;\n";
//        grammar += "\tgstruct:words\n";
//        for (int i = 1; i < sentence.size(); i++) {
//            grammar += "\t\t[";
//            grammar += "gstruct:id " + i + " ; ";
//            grammar += "gstruct:form \"" + sentence.get(i).getForm() + "\"" + " ; ";
//            grammar += "gstruct:lemma \"" + sentence.get(i).getLemma() + "\"" + " ; ";
//            grammar += "gstruct:pos \"" + sentence.get(i).getPOS() + "\"" + " ; ";
//            grammar += "gstruct:head \"" + sentence.get(i).getHeadId() + "\"" + " ; ";
//            grammar += "gstruct:deprel \"" + sentence.get(i).getDeprel() + "\"";
//            grammar += "]";
//            if (i != (sentence.size() - 1)) {
//                grammar += ",\n";
//            }
//        }
//        return grammar;
//    }

//    public String makeSemTriples() {
//        String semantics = "\tgstruct:predicates\n";
//        for (int i = 0; i < sentence.getPredicates().size(); i++) {
//            semantics += "\t\t[";
//            semantics += "gstruct:id " + sentence.getPredicates().get(i).getInx() + " ; ";
//            semantics += "gstruct:predsense \"" + sentence.getPredicates().get(i).getSense() + "\"" + " ; ";
//            semantics += "gstruct:args\n";
//            semantics += makeArgString(sentence.getPredicates().get(i).getArgMap());
//            semantics += "]";
//            if (i != (sentence.getPredicates().size() - 1)) {
//                semantics += ",\n";
//            }
//        }
//        return semantics;
//    }

    public String makeArgString(Map argMap) {
        if (argMap.isEmpty()) {
            return "[]";
        }
        TreeSet ts=new TreeSet(Word.WORD_LINEAR_ORDER_COMPARATOR);
        ts.addAll(argMap.keySet());
        String tmp = "";
        for (Word word : ts) {
            tmp += "\t\t\t[gstruct:id " + word.getIdx() + " ; ";
            tmp += " gstruct:argtype \"" + argMap.get(word) + "\"],\n";
        }
        tmp = tmp.replaceAll(",\n$", "");
        return tmp;
    }

    public void printRDF(PrintStream out) {
        out.println("gstruct:sentence_" + inx + "\trdf:type\t" + "gstruct:Sentence ;");
        out.println("\tgstruct:inx\t" + inx + " ;");
        out.println("\tgstruct:words");
        String tmp;
        for (int i = 1; i < sentence.size(); i++) {
        	String lemma=sentence.get(i).getLemma();
        	String pos=sentence.get(i).getPOS();
            tmp = "\t\t[";
            tmp += "gstruct:id " + i + " ; ";
            tmp += "gstruct:form \"" + sentence.get(i).getForm() + "\"" + " ; ";
            if(lemma!=null && !lemma.equals("_"))
            	tmp += "gstruct:lemma \"" + lemma + "\"" + " ; ";
            tmp += "gstruct:pos \"" + pos + "\"" + " ; ";
            if(pos!=null && !pos.equals("_"))
            	tmp += "gstruct:head \"" + sentence.get(i).getHeadId() + "\"" + " ; ";
            tmp += "gstruct:deprel \"" + sentence.get(i).getDeprel() + "\"";
            tmp += "]";
            if (i != (sentence.size() - 1)) {
                tmp += ",\n";
            }
            out.print(tmp);
        }
        if (sentence.getPredicates().isEmpty()) {
            out.println(".");
            return;
        } else {
            out.println(";");
        }
        out.println("\tgstruct:predicates");
        for (int i = 0; i < sentence.getPredicates().size(); i++) {
            tmp = "\t\t[";
            tmp += "gstruct:id " + sentence.getPredicates().get(i).getIdx() + " ; ";
            tmp += "gstruct:predsense \"" + sentence.getPredicates().get(i).getSense() + "\"" + " ; ";
            tmp += "gstruct:args\n";
            tmp += makeArgString(sentence.getPredicates().get(i).getArgMap());
            tmp += "]";
            if (i != (sentence.getPredicates().size() - 1)) {
                tmp += ",\n";
            }
            out.print(tmp);
        }
        out.println(".");
    }

//    public String makePrefix() {
//        // gstruct means grammatical structure 
//        String prefix = "@prefix gstruct:  .\n";
//        prefix += "@prefix rdf:  .\n";
//        return prefix;
//    }

    public void printPrefix(PrintStream out) {
        // gstruct means grammatical structure 
        out.println("@prefix gstruct:  .");
        out.println("@prefix rdf:  .");

    }
    
    public static String sentence2RDF(Sentence s) throws UnsupportedEncodingException{
    	ByteArrayOutputStream baos=new ByteArrayOutputStream();
    	PrintStream p=new PrintStream(baos,true,"UTF8");
    	Sentence2RDF s2r=new Sentence2RDF(s,1);
    	s2r.printPrefix(p);
    	p.println();
    	s2r.printRDF(p);
    	p.close();
    	return baos.toString("UTF8");
    }
}