Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**********************************************************************************
* Copyright (c) 2011, Monnet Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Monnet Project nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE MONNET PROJECT BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*********************************************************************************/
package eu.monnetproject.translation.evaluation.evaluate;
import eu.monnetproject.label.LabelExtractor;
import eu.monnetproject.label.LabelExtractorFactory;
import eu.monnetproject.lang.Language;
import eu.monnetproject.lemon.LemonModel;
import eu.monnetproject.lemon.LemonModels;
import eu.monnetproject.lemon.LemonSerializer;
import eu.monnetproject.lemon.model.LexicalEntry;
import eu.monnetproject.lemon.model.Lexicon;
import eu.monnetproject.ontology.Entity;
import eu.monnetproject.ontology.Ontology;
import eu.monnetproject.translation.monitor.Messages;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
/**
*
* @author John McCrae
*/
public class SimpleLexicalizer {
private final LabelExtractor labelExtractor;
private final LemonModel model = LemonSerializer.newInstance().create();
@SuppressWarnings("unchecked")
public SimpleLexicalizer(LabelExtractorFactory lef) {
this.labelExtractor = lef.getExtractor(Collections.EMPTY_LIST, false, false);
}
public Collection lexicalize(Ontology ontology) {
final HashSet processedPuns = new HashSet();
final HashMap lexica = new HashMap();
final HashMap counter = new HashMap();
final HashMap dupes = new HashMap();
for (Entity entity : ontology.getEntities()) {
if (entity.getURI() != null) {
if (processedPuns.contains(entity.getURI())) {
continue;
}
processedPuns.add(entity.getURI());
}
final Map> labels = labelExtractor.getLabels(entity);
for (Map.Entry> label : labels.entrySet()) {
final Language lang = label.getKey();
if(!counter.containsKey(lang)) {
counter.put(lang,0);
dupes.put(lang, 0);
}
counter.put(lang,counter.get(lang) +1);
if (lang.equals(LabelExtractor.NO_LANGUAGE)) {
// log.fine("Ignoring unlanged label \"" + label.getValue() + "\"");
} else {
if (!lexica.containsKey(lang)) {
lexica.put(lang, model.addLexicon(mkURI(ontology, lang), lang.toString()));
}
if (label.getValue().size() > 1) {
//log.info(label.getValue().size() + " labels for entity " + entity.getURI());
}
for (String l : label.getValue()) {
final int oldLexiconSize = lexica.get(lang).getEntrys().size();
final LexicalEntry entry = LemonModels.addEntryToLexicon(lexica.get(lang), mkURI(ontology, lang, l), l, entity.getURI());
if(lexica.get(lang).getEntrys().size() == oldLexiconSize) {
dupes.put(lang,dupes.get(lang)+1);
//Messages.info("senses: " + entry.getSenses().size());
}
}
}
}
}
for(Language l : counter.keySet()) {
Messages.info(l + " has [" + (counter.get(l) - dupes.get(l)) + "] " + counter.get(l) +" [unique] labels");
}
return lexica.values();
}
public Lexicon getBlankLexicon(Ontology ontology, Language lang) {
return model.addLexicon(mkURI(ontology, lang), lang.toString());
}
private URI mkURI(Ontology ontology, Language lang) {
final String ontoURIStr = ontology.getURI() == null ? "unknown:ontology#" : ontology.getURI().toString();
final int fragPoint = ontoURIStr.lastIndexOf("#") > 0 ? ontoURIStr.lastIndexOf("#") : ontoURIStr.length();
return URI.create(ontoURIStr.substring(0, fragPoint) + "#lexicon__" + lang);
}
private URI mkURI(Ontology ontology, Language lang, String l) {
final String ontoURIStr = ontology.getURI() == null ? "unknown:ontology#" : ontology.getURI().toString();
final int fragPoint = ontoURIStr.lastIndexOf("#") > 0 ? ontoURIStr.lastIndexOf("#") : ontoURIStr.length();
try {
return URI.create(ontoURIStr.substring(0, fragPoint) + "#lexicon__" + lang + "/" + URLEncoder.encode(l, "UTF-8"));
} catch (UnsupportedEncodingException x) {
throw new RuntimeException(x);
}
}
}