
net.sf.okapi.connectors.pensieve.PensieveTMConnector Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2009-2014 by the Okapi Framework contributors
-----------------------------------------------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===========================================================================*/
package net.sf.okapi.connectors.pensieve;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.query.QueryResult;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.lib.translation.BaseConnector;
import net.sf.okapi.lib.translation.ITMQuery;
import net.sf.okapi.tm.pensieve.common.Metadata;
import net.sf.okapi.tm.pensieve.common.MetadataType;
import net.sf.okapi.tm.pensieve.common.TmHit;
import net.sf.okapi.tm.pensieve.seeker.ITmSeeker;
import net.sf.okapi.tm.pensieve.seeker.TmSeekerFactory;
import org.json.simple.JSONArray;
import org.json.simple.parser.JSONParser;
public class PensieveTMConnector extends BaseConnector implements ITMQuery {
private int maxHits = 25;
private int threshold = 95;
private List results;
private int current = -1;
private Parameters params;
private ITmSeeker seeker;
private Metadata attrs;
private String rootDir;
private JSONParser parser;
private String basePart;
private String origin;
public PensieveTMConnector () {
params = new Parameters();
attrs = new Metadata();
}
@Override
public String getName() {
return "Pensieve TM";
}
@Override
public String getSettingsDisplay () {
if ( params.getUseServer() ) {
return "Server: " + (Util.isEmpty(params.getHost())
? ""
: params.getHost());
}
else {
return "Database: " + (Util.isEmpty(params.getDbDirectory())
? ""
: params.getDbDirectory());
}
}
@Override
public void setMaximumHits (int max) {
if ( max < 1 ) {
maxHits = 1;
}
else {
maxHits = max;
}
}
@Override
public void setThreshold (int threshold) {
this.threshold = threshold;
}
@Override
public void close () {
if ( seeker != null ) {
seeker.close();
}
}
@Override
public boolean hasNext () {
if ( results == null ) {
return false;
}
if ( current >= results.size() ) {
current = -1;
}
return (current > -1);
}
@Override
public QueryResult next () {
if ( results == null ) {
return null;
}
if (( current > -1 ) && ( current < results.size() )) {
current++;
return results.get(current-1);
}
current = -1;
return null;
}
@Override
public void open () {
if ( params.getUseServer() ) {
parser = new JSONParser();
// tmName is set when setting languages for the server
origin = null;
}
else {
// Create a seeker (the TM must exist: we are just querying)
if ( seeker != null ) seeker.close();
origin = Util.fillRootDirectoryVariable(params.getDbDirectory(), rootDir);
origin = LocaleId.replaceVariables(origin, srcLoc, trgLoc);
seeker = TmSeekerFactory.createFileBasedTmSeeker(origin);
// For the real origin value, keep just the filename
origin = Util.getFilename(origin, true);
}
}
@Override
public int query (String plainText) {
results = new ArrayList<>();
current = -1;
if ( params.getUseServer() ) {
return queryServer(new TextFragment(plainText));
}
else {
return queryDirectory(new TextFragment(plainText));
}
}
@Override
public int query (TextFragment text) {
results = new ArrayList<>();
current = -1;
if ( params.getUseServer() ) {
return queryServer(text);
}
else {
return queryDirectory(text);
}
}
// Direct query, using the seeker
private int queryDirectory (TextFragment text) {
List list;
if ( threshold >= 100 ) {
list = seeker.searchExact(text, attrs);
}
else {
list = seeker.searchFuzzy(text, threshold, maxHits, attrs);
}
// Convert to normalized results
for ( TmHit hit : list ) {
Float f = hit.getScore();
QueryResult qr = new QueryResult();
qr.weight = getWeight();
qr.setFuzzyScore(f.intValue());
qr.source = hit.getTu().getSource().getContent();
qr.target = hit.getTu().getTarget().getContent();
qr.matchType = hit.getMatchType();
qr.origin = origin;
results.add(qr);
}
if ( results.size() > 0 ) {
current = 0;
}
return results.size();
}
// Indirect query, using the pensieve-server API
private int queryServer (TextFragment fragment) {
try {
// Check if there is actually text to translate
if ( !fragment.hasText(false) ) return 0;
//TODO: deal with inline codes
String qtext = fragment.toText();
// Create the connection and query
URL url = new URL(basePart + String.format("?q=%s", URLEncoder.encode(qtext, "UTF-8")));
URLConnection conn = url.openConnection();
// Get the response
JSONArray array = (JSONArray)parser.parse(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
@SuppressWarnings("unchecked")
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy