All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.lib.serialization.textunitflat.Proto2TextUnitFlat Maven / Gradle / Ivy

/*===========================================================================
  Copyright (C) 2016 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  This library is free software; you can redistribute it and/or modify it 
  under the terms of the GNU Lesser General Public License as published by 
  the Free Software Foundation; either version 2.1 of the License, or (at 
  your option) any later version.

  This library is distributed in the hope that it will be useful, but 
  WITHOUT ANY WARRANTY; without even the implied warranty of 
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 
  General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License 
  along with this library; if not, write to the Free Software Foundation, 
  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

  See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
===========================================================================*/

package net.sf.okapi.lib.serialization.textunitflat;

import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.annotation.Note;
import net.sf.okapi.common.annotation.NoteAnnotation;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.Property;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextFragment.TagType;
import net.sf.okapi.common.resource.TextPart;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.common.resource.WhitespaceStrategy;

import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;

public class Proto2TextUnitFlat {

	static TagType toTagType(net.sf.okapi.proto.textunitflat.TextFragment.TagType tt) {
		switch (tt) {
			case CLOSING: return TagType.CLOSING;
			case OPENING: return TagType.OPENING;
			case PLACEHOLDER: return TagType.PLACEHOLDER;
			default: return TagType.valueOf("ERROR");
		}
	}

	static Code toCode(net.sf.okapi.proto.textunitflat.Code code) {
		Code result = new Code(toTagType(code.getTagType()), code.getCodeType(), code.getData());
		result.setId(code.getId());
		result.setOuterData(code.getOuterData());
		result.setFlag(code.getFlag());
		result.setDisplayText(code.getDisplayText());

		code.getOriginalId();
		result.setOriginalId(code.getOriginalId());

		result.setAdded(code.getAdded());		
		
		if (code.hasMergedData()) {
			result.setMergedData(code.getMergedData());
		}

		for (net.sf.okapi.proto.textunitflat.Property property : code.getPropertiesList()) {
			result.setProperty(toProperty(property));
		}
		
		return result;
	}

	static TextFragment toTextFragment(net.sf.okapi.proto.textunitflat.TextFragment tf) {
		TextFragment result = new TextFragment(tf.getText());
		int codeCount = 0;
		for (net.sf.okapi.proto.textunitflat.Code code : tf.getCodesList()) {			
			result.insert(code.getPosition() + (codeCount*2), toCode(code));
			codeCount++;
		}
		result.balanceMarkers();
		return result;
	}

	private static TextPart toTextPart(net.sf.okapi.proto.textunitflat.TextPart part) {
		TextPart result;
		if (part.getSegment()) {
			result = new Segment(part.getId(), toTextFragment(part.getText()));
		} else {
			result = new TextPart(toTextFragment(part.getText()));
		}

		for (net.sf.okapi.proto.textunitflat.Property property : part.getPropertiesList()) {
			result.setProperty(toProperty(property));
		}
		result.setOriginalId(part.getOriginalId());
		result.setWhitespaceStrategy(toWhitespaceStrategy(part.getWhitespaceStrategy()));

		return result;
	}

	private static WhitespaceStrategy toWhitespaceStrategy(net.sf.okapi.proto.textunitflat.TextPart.WhitespaceStrategy whitespaceStrategy) {
		switch (whitespaceStrategy.getNumber()) {
			case 0:
				return WhitespaceStrategy.INHERIT;
			case 1:
				return WhitespaceStrategy.PRESERVE;
			case 2:
				return WhitespaceStrategy.NORMALIZE;
			default:
				return WhitespaceStrategy.INHERIT;
		}
	}

	static Property toProperty(net.sf.okapi.proto.textunitflat.Property prop) {
		return new Property(prop.getName(), prop.getValue(), prop.getReadOnly());
	}

	static TextContainer toTextContainer(net.sf.okapi.proto.textunitflat.TextContainer tc) {
		ArrayList parts = new ArrayList<>();
		for (net.sf.okapi.proto.textunitflat.TextPart part : tc.getPartsList()) {
			parts.add(toTextPart(part));
		}
		// WORKAROUND: Empty TextContainer constructor creates an empty segment.
		// add all the parts as list to avoid this side effect
		TextContainer result = new TextContainer(parts.toArray(new TextPart[0]));

		result.setId(tc.getId());
		result.setMimeType(tc.getMimeType());
		result.setName(tc.getName());
		result.setType(tc.getTuType());
		result.setIsTranslatable(tc.getTranslatable());
		result.setPreserveWhitespaces(tc.getPreserveWS());
		result.setHasBeenSegmentedFlag(tc.getSegApplied());
		
		for (net.sf.okapi.proto.textunitflat.Property property : tc.getPropertiesList()) {
			result.setProperty(toProperty(property));	
		}

		if (tc.getAltTransCount() > 0) {
			AltTranslationsAnnotation ann = new AltTranslationsAnnotation();
			for (net.sf.okapi.proto.textunitflat.AltTranslation at : tc.getAltTransList()) {
				ann.add(toAltTrans(at));
			}
			result.setAnnotation(ann);
		}

		NoteAnnotation na = new NoteAnnotation();
		for (net.sf.okapi.proto.textunitflat.Note note : tc.getNotesList()) {
			na.add(toNote(note));
		}
		result.setAnnotation(na);
		
		return result;
	}

	public static Note toNote(net.sf.okapi.proto.textunitflat.Note note) {
		Note result = new Note(note.getNote());
		result.setAnnotates(toAnnotates(note.getAnnotates()));
		result.setPriority(toPriority(note.getPriority()));
		result.setFrom(note.getFrom());
		result.setXmlLang(note.getXmlLang());

		return result;
	}

	public static Note.Annotates toAnnotates(net.sf.okapi.proto.textunitflat.Note.Annotates annotates) {
		switch (annotates.name()) {
			case "source": return Note.Annotates.SOURCE;
			case "target": return Note.Annotates.TARGET;
			default: return Note.Annotates.GENERAL;
		}
	}

	public static Note.Priority toPriority(net.sf.okapi.proto.textunitflat.Note.Priority priority) {
		switch (priority.getNumber()) {
			case 0: return Note.Priority.ONE;
			case 1: return Note.Priority.TWO;
			case 2: return Note.Priority.THREE;
			case 3: return Note.Priority.FOUR;
			case 4: return Note.Priority.FIVE;
			case 5: return Note.Priority.SIX;
			case 6: return Note.Priority.SEVEN;
			case 7: return Note.Priority.EIGHT;
			case 8: return Note.Priority.NINE;
			default: return Note.Priority.TEN;
		}
	}

	public static MatchType toMatchType(net.sf.okapi.proto.textunitflat.AltTranslation.MatchType matchType) {
		switch (matchType.toString()) {
			case "ACCEPTED": return MatchType.ACCEPTED;
			case "EXACT_UNIQUE_ID": return MatchType.EXACT_UNIQUE_ID;
			case "EXACT_LOCAL_CONTEXT": return MatchType.EXACT_LOCAL_CONTEXT;
			case "EXACT": return MatchType.EXACT;
			case "EXACT_TEXT_ONLY": return MatchType.EXACT_TEXT_ONLY;
			case "FUZZY": return MatchType.FUZZY;
			default: return MatchType.UKNOWN;
		}
	}

	// assume only one Textfragment per TextContainer
	public static AltTranslation toAltTrans(net.sf.okapi.proto.textunitflat.AltTranslation altTrans) {
		LocaleId srcLoc = LocaleId.fromBCP47(altTrans.getSourceLocale());
		LocaleId trgLoc = LocaleId.fromBCP47(altTrans.getTargetLocale());
		AltTranslation result = new AltTranslation(
				srcLoc,
				trgLoc,
				toTextUnit(altTrans.getTextUnit()),
				toMatchType(altTrans.getType()),
				altTrans.getCombinedScore(),
				altTrans.getOrigin());
		result.setEngine(altTrans.getEngine());
		result.setFromOriginal(altTrans.getFromOriginal());
		result.setFuzzyScore(altTrans.getFuzzyScore());
		result.setQualityScore(altTrans.getQualityScore());
		result.setAltTransType(altTrans.getAltTransType());

		return result;
	}

	public static TextUnit toTextUnit(net.sf.okapi.proto.textunitflat.TextUnit tu) {
		TextUnit result = new TextUnit(tu.getId(), "");
		
		result.setSource(toTextContainer(tu.getSource()));
		result.setMimeType(tu.getMimeType());
		result.setName(tu.getName());
		result.setType(tu.getTuType());
		result.setIsTranslatable(tu.getTranslatable());
		result.setPreserveWhitespaces(tu.getPreserveWS());

		for (Entry trg : tu.getTargetsMap().entrySet()) {			
			result.setTarget(LocaleId.fromBCP47(trg.getKey()), toTextContainer(trg.getValue()));
		}

		for (net.sf.okapi.proto.textunitflat.Property property : tu.getPropertiesList()) {
			result.setProperty(toProperty(property));	
		}

		NoteAnnotation na = new NoteAnnotation();
		for (net.sf.okapi.proto.textunitflat.Note note : tu.getNotesList()) {
			na.add(toNote(note));
		}
		result.setAnnotation(na);

		return result;
	}
	
	public static List toTextUnits(net.sf.okapi.proto.textunitflat.TextUnits tus) {
		List result = new ArrayList<>();
		for (net.sf.okapi.proto.textunitflat.TextUnit tu : tus.getTextUnitsList()) {
			result.add(toTextUnit(tu));
		}
		return result;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy