All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.connectors.globalsight.GlobalSightTMConnector Maven / Gradle / Ivy

/*===========================================================================
  Copyright (C) 2009 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/

package net.sf.okapi.connectors.globalsight;

import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.exceptions.OkapiNotImplementedException;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.query.QueryResult;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextFragment.TagType;
import net.sf.okapi.lib.translation.BaseConnector;
import net.sf.okapi.lib.translation.ITMQuery;

import org.apache.axis.AxisFault;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.globalsight.www.webservices.Ambassador;
import com.globalsight.www.webservices.AmbassadorWebServiceSoapBindingStub;

public class GlobalSightTMConnector extends BaseConnector implements ITMQuery {

	private List results;
	private int current = -1;
	private int maxHits = 25;
	private int threshold = 75;
	private Ambassador gsWS;
	private String gsToken;
	private String gsTmProfile;
	private Parameters params;
	private DocumentBuilder docBuilder;

	public GlobalSightTMConnector () {
		params = new Parameters();
		DocumentBuilderFactory Fact = DocumentBuilderFactory.newInstance();
		Fact.setValidating(false);
		try {
			docBuilder = Fact.newDocumentBuilder();
		}
		catch ( ParserConfigurationException e ) {
			throw new OkapiException("Error creating document builder.", e);
		}
	}

	@Override
	public String getName () {
		return "GlobalSight-TM";
	}

	@Override
	public String getSettingsDisplay () {
		return String.format("URL: %s\nTM profile: %s",
			params.getServerURL(), params.getTmProfile());
	}
	
	@Override
	public void close () {
	}

	@Override
	public boolean hasNext () {
		if ( results == null ) return false;
		if ( current >= results.size() ) {
			current = -1;
		}
		return (current > -1);
	}

	@Override
	public QueryResult next () {
		if ( results == null ) return null;
		if (( current > -1 ) && ( current < results.size() )) {
			current++;
			return results.get(current-1);
		}
		current = -1;
		return null;
	}

	@Override
	public void open () {
		try {
			URL url = new URL(params.getServerURL());
			gsWS = new AmbassadorWebServiceSoapBindingStub(url, null);
			gsToken = gsWS.login(params.getUsername(), params.getPassword());
			// Remove the end part
			int n = gsToken.lastIndexOf("+_+");
			gsToken = gsToken.substring(0, n);
			gsTmProfile = params.getTmProfile();
			results = new ArrayList();
		}
		catch ( AxisFault e ) {
			throw new OkapiException("Error creating the GlobalSight Web services.", e);
		}
		catch ( RemoteException e ) {
			throw new OkapiException("Error when login.", e);
		}
		catch ( MalformedURLException e ) {
			throw new OkapiException("Invalid server URL.", e);
		}
	}

	@Override
	public int query (TextFragment frag) {
		results.clear();
		if ( !frag.hasText() ) return 0;
		try {
			String text = frag.getCodedText();
			if ( frag.hasCode() ) {
				StringBuilder tmp = new StringBuilder();
				Code code;
				for ( int i=0; i", code.getId(), code.getId()));
						break;
					case TextFragment.MARKER_CLOSING:
						code = frag.getCode(text.charAt(++i));
						tmp.append(String.format("", code.getId()-1));
						break;
					case TextFragment.MARKER_ISOLATED:
						code = frag.getCode(text.charAt(++i));
						tmp.append(String.format("", code.getId()));
						break;
					default:
						tmp.append(text.charAt(i));
						break;
					}
				}
				text = tmp.toString();
			}

			String xmlRes = gsWS.searchEntries(gsToken, gsTmProfile, text, srcCode);
			Document doc = docBuilder.parse(new InputSource(new StringReader(xmlRes)));
			NodeList list1 = doc.getElementsByTagName("entry");
			Element elem;
			NodeList list2, list3;
			QueryResult res;
			for ( int i=0; i= maxHits ) break;
				
				elem = (Element)list1.item(i);
				list2 = elem.getElementsByTagName("percentage");
				res = new QueryResult();
				res.weight = getWeight();
				res.setFuzzyScore(Float.valueOf(Util.getTextContent(list2.item(0)).replace("%", "")).intValue());
				if ( res.getFuzzyScore() < threshold ) continue;
				
				if ( res.getFuzzyScore() >= 100 ) res.matchType = MatchType.EXACT;
				else if ( res.getFuzzyScore() > 0 ) res.matchType = MatchType.FUZZY;
				
				list2 = elem.getElementsByTagName("tm");
				res.origin = Util.getTextContent(list2.item(0));

				list2 = elem.getElementsByTagName("source");
				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
				res.source = readSegment((Element)list3.item(0), frag);

				list2 = elem.getElementsByTagName("target");
				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
				res.target = readSegment((Element)list3.item(0), frag);


				results.add(res);
			}

		} catch ( RemoteException e ) {
			throw new OkapiException("Error querying TM.", e);
		} catch ( SAXException | IOException e ) {
			throw new OkapiException("Error with query results.", e);
		}
		if ( results.size() > 0 ) current = 0;
		return results.size();
	}

	@Override
	public List> batchQuery (List fragments) {
		throw new OkapiNotImplementedException();
	}
	
//	public int queryV5 (TextFragment frag) {
//		/* The GlobalSight TM Web service does not support query with inline codes
//		 * for the time being (v7.1.3), so we query plain text to get the best match 
//		 * possible. But queries with codes will never get an exact match even if one 
//		 * exists in the TM.
//		 */
//		results.clear();
//		if ( !frag.hasText() ) return 0;
//		try {
//			String qtext = frag.getCodedText();
//			StringBuilder tmpCodes = new StringBuilder();
//			if ( frag.hasCode() ) {
//				StringBuilder tmpText = new StringBuilder();
//				for ( int i=0; i= maxHits ) break;
//				
//				elem = (Element)list1.item(i);
//				list2 = elem.getElementsByTagName("percentage");
//				res = new QueryResult();
//				res.score = Integer.valueOf(Util.getTextContent(list2.item(0)).replace("%", ""));
//				if ( res.score < threshold ) continue;
//				
//				list2 = elem.getElementsByTagName("source");
//				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
//				res.source = readSegment((Element)list3.item(0), frag);
//
//				list2 = elem.getElementsByTagName("target");
//				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
//				res.target = readSegment((Element)list3.item(0), frag);
//				
//				// Query is done without codes, so any exact match result from a text
//				// with codes should be down-graded
//				if ( frag.hasCode() && res.score >= 100 ) {
//					res.score = 99;
//				}
//				results.add(res);
//			}
//
//		}
//		catch ( WebServiceException e ) {
//			throw new OkapiException("Error querying TM.", e);
//		}
//		catch ( RemoteException e ) {
//			throw new OkapiException("Error querying TM.", e);
//		}
//		catch ( SAXException e ) {
//			throw new OkapiException("Error with query results.", e);
//		}
//		catch ( IOException e ) {
//			throw new OkapiException("Error with query results.", e);
//		}
//		if ( results.size() > 0 ) current = 0;
//		return results.size();
//	}

	@Override
	public int query (String plainText) {
		try {
			results.clear();
			String xmlRes = gsWS.searchEntries(gsToken, gsTmProfile, plainText, srcCode);
			Document doc = docBuilder.parse(new InputSource(new StringReader(xmlRes)));
			NodeList list1 = doc.getElementsByTagName("entry");
			Element elem;
			NodeList list2;
			NodeList list3;
			QueryResult res;
			for ( int i=0; i= maxHits ) break;
				elem = (Element)list1.item(i);
				list2 = elem.getElementsByTagName("percentage");
				res = new QueryResult();
				res.weight = getWeight();
				res.setFuzzyScore(Float.valueOf(Util.getTextContent(list2.item(0)).replace("%", "")).intValue());
				if ( res.getFuzzyScore() < threshold ) continue;
				list2 = elem.getElementsByTagName("source");
				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
				res.source = readSegment((Element)list3.item(0), null);
				list2 = elem.getElementsByTagName("target");
				list3 = ((Element)list2.item(0)).getElementsByTagName("segment");
				res.target = readSegment((Element)list3.item(0), null);
				results.add(res);
			}
		} catch ( RemoteException e ) {
			throw new OkapiException("Error querying TM.", e);
		} catch ( SAXException | IOException e ) {
			throw new OkapiException("Error with query results.", e);
		}
		if ( results.size() > 0 ) current = 0;
		return results.size();
	}
	
	// The original parameter can be null
	private TextFragment readSegment (Element elem,
		TextFragment original)
	{
		TextFragment tf = new TextFragment();
		NodeList list = elem.getChildNodes();
		int lastId = -1;
		int id = -1;
		Node node;
		Code code;
//		Code srcCode;
		Stack stack = new Stack();
//		List oriCodes = null;
//		
//		if ( original != null ) {
//			oriCodes = original.getCodes();
//		}
		
		// Note that this parsing assumes non-overlapping codes.
		for ( int i=0; i codes,
//		TagType tagType,
//		int rawIndex) // Starts at 0
//	{
//		if ( codes == null ) return null;
//		if (( rawIndex < 0 ) || ( rawIndex >= codes.size() )) {
//			return null;
//		}
//		Code code = codes.get(rawIndex);
//		if ( code.getTagType() == tagType ) {
//			return code.clone();
//		}
//		return null;
//	}
	
	private int getRawIndex (int lastIndex, Node attr) {
		if ( attr == null ) return ++lastIndex;
		// GS codes return are 0-base
		return Integer.valueOf(attr.getNodeValue());
	}
	
	@Override
	public void removeAttribute (String name) {
	}
	
	@Override
	public void clearAttributes () {
	}

	@Override
	public void setAttribute (String name,
		String value)
	{
	}

	@Override
	protected String toInternalCode (LocaleId locale) {
		//TODO: Do we need to adjust the code to always have the country?
		return locale.toPOSIXLocaleId();
	}

	/**
	 * Sets the maximum number of hits to return. Note that with this
	 * connector this method can only reduce the maximum number of hits from
	 * the one defined in the active TM profile.
	 */
	@Override
	public void setMaximumHits (int max) {
		maxHits = max;
	}

	/**
	 * Sets the minimal percentage at which a match is kept. Note that
	 * with this connector this method can only reduce the threshold from the
	 * one defined in the active TM profile. 
	 */
	@Override
	public void setThreshold (int threshold) {
		this.threshold = threshold; 
	}

	@Override
	public int getMaximumHits () {
		return maxHits;
	}

	@Override
	public int getThreshold () {
		return threshold;
	}

	@Override
	public IParameters getParameters () {
		return params;
	}

	@Override
	public void setParameters (IParameters params) {
		this.params = (Parameters)params;
	}

	@Override
	public void setRootDirectory (String rootDir) {
		// Not used
	}
}