net.sf.okapi.lib.serialization.textunitflat.Proto2TextUnitFlat Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2016 by the Okapi Framework contributors
-----------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
===========================================================================*/
package net.sf.okapi.lib.serialization.textunitflat;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.annotation.Note;
import net.sf.okapi.common.annotation.NoteAnnotation;
import net.sf.okapi.common.query.MatchType;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.Property;
import net.sf.okapi.common.resource.Segment;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextFragment.TagType;
import net.sf.okapi.common.resource.TextPart;
import net.sf.okapi.common.resource.TextUnit;
import net.sf.okapi.common.resource.WhitespaceStrategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
public class Proto2TextUnitFlat {
static TagType toTagType(net.sf.okapi.proto.textunitflat.TextFragment.TagType tt) {
switch (tt) {
case CLOSING: return TagType.CLOSING;
case OPENING: return TagType.OPENING;
case PLACEHOLDER: return TagType.PLACEHOLDER;
default: return TagType.valueOf("ERROR");
}
}
static Code toCode(net.sf.okapi.proto.textunitflat.Code code) {
Code result = new Code(toTagType(code.getTagType()), code.getCodeType(), code.getData());
result.setId(code.getId());
result.setOuterData(code.getOuterData());
result.setFlag(code.getFlag());
result.setDisplayText(code.getDisplayText());
code.getOriginalId();
result.setOriginalId(code.getOriginalId());
result.setAdded(code.getAdded());
if (code.hasMergedData()) {
result.setMergedData(code.getMergedData());
}
for (net.sf.okapi.proto.textunitflat.Property property : code.getPropertiesList()) {
result.setProperty(toProperty(property));
}
return result;
}
static TextFragment toTextFragment(net.sf.okapi.proto.textunitflat.TextFragment tf) {
TextFragment result = new TextFragment(tf.getText());
int codeCount = 0;
for (net.sf.okapi.proto.textunitflat.Code code : tf.getCodesList()) {
result.insert(code.getPosition() + (codeCount*2), toCode(code));
codeCount++;
}
result.balanceMarkers();
return result;
}
private static TextPart toTextPart(net.sf.okapi.proto.textunitflat.TextPart part) {
TextPart result;
if (part.getSegment()) {
result = new Segment(part.getId(), toTextFragment(part.getText()));
} else {
result = new TextPart(toTextFragment(part.getText()));
}
for (net.sf.okapi.proto.textunitflat.Property property : part.getPropertiesList()) {
result.setProperty(toProperty(property));
}
result.setOriginalId(part.getOriginalId());
result.setWhitespaceStrategy(toWhitespaceStrategy(part.getWhitespaceStrategy()));
return result;
}
private static WhitespaceStrategy toWhitespaceStrategy(net.sf.okapi.proto.textunitflat.TextPart.WhitespaceStrategy whitespaceStrategy) {
switch (whitespaceStrategy.getNumber()) {
case 0:
return WhitespaceStrategy.INHERIT;
case 1:
return WhitespaceStrategy.PRESERVE;
case 2:
return WhitespaceStrategy.NORMALIZE;
default:
return WhitespaceStrategy.INHERIT;
}
}
static Property toProperty(net.sf.okapi.proto.textunitflat.Property prop) {
return new Property(prop.getName(), prop.getValue(), prop.getReadOnly());
}
static TextContainer toTextContainer(net.sf.okapi.proto.textunitflat.TextContainer tc) {
ArrayList parts = new ArrayList<>();
for (net.sf.okapi.proto.textunitflat.TextPart part : tc.getPartsList()) {
parts.add(toTextPart(part));
}
// WORKAROUND: Empty TextContainer constructor creates an empty segment.
// add all the parts as list to avoid this side effect
TextContainer result = new TextContainer(parts.toArray(new TextPart[0]));
result.setId(tc.getId());
result.setMimeType(tc.getMimeType());
result.setName(tc.getName());
result.setType(tc.getTuType());
result.setIsTranslatable(tc.getTranslatable());
result.setPreserveWhitespaces(tc.getPreserveWS());
result.setHasBeenSegmentedFlag(tc.getSegApplied());
for (net.sf.okapi.proto.textunitflat.Property property : tc.getPropertiesList()) {
result.setProperty(toProperty(property));
}
if (tc.getAltTransCount() > 0) {
AltTranslationsAnnotation ann = new AltTranslationsAnnotation();
for (net.sf.okapi.proto.textunitflat.AltTranslation at : tc.getAltTransList()) {
ann.add(toAltTrans(at));
}
result.setAnnotation(ann);
}
NoteAnnotation na = new NoteAnnotation();
for (net.sf.okapi.proto.textunitflat.Note note : tc.getNotesList()) {
na.add(toNote(note));
}
result.setAnnotation(na);
return result;
}
public static Note toNote(net.sf.okapi.proto.textunitflat.Note note) {
Note result = new Note(note.getNote());
result.setAnnotates(toAnnotates(note.getAnnotates()));
result.setPriority(toPriority(note.getPriority()));
result.setFrom(note.getFrom());
result.setXmlLang(note.getXmlLang());
return result;
}
public static Note.Annotates toAnnotates(net.sf.okapi.proto.textunitflat.Note.Annotates annotates) {
switch (annotates.name()) {
case "source": return Note.Annotates.SOURCE;
case "target": return Note.Annotates.TARGET;
default: return Note.Annotates.GENERAL;
}
}
public static Note.Priority toPriority(net.sf.okapi.proto.textunitflat.Note.Priority priority) {
switch (priority.getNumber()) {
case 0: return Note.Priority.ONE;
case 1: return Note.Priority.TWO;
case 2: return Note.Priority.THREE;
case 3: return Note.Priority.FOUR;
case 4: return Note.Priority.FIVE;
case 5: return Note.Priority.SIX;
case 6: return Note.Priority.SEVEN;
case 7: return Note.Priority.EIGHT;
case 8: return Note.Priority.NINE;
default: return Note.Priority.TEN;
}
}
public static MatchType toMatchType(net.sf.okapi.proto.textunitflat.AltTranslation.MatchType matchType) {
switch (matchType.toString()) {
case "ACCEPTED": return MatchType.ACCEPTED;
case "EXACT_UNIQUE_ID": return MatchType.EXACT_UNIQUE_ID;
case "EXACT_LOCAL_CONTEXT": return MatchType.EXACT_LOCAL_CONTEXT;
case "EXACT": return MatchType.EXACT;
case "EXACT_TEXT_ONLY": return MatchType.EXACT_TEXT_ONLY;
case "FUZZY": return MatchType.FUZZY;
default: return MatchType.UKNOWN;
}
}
// assume only one Textfragment per TextContainer
public static AltTranslation toAltTrans(net.sf.okapi.proto.textunitflat.AltTranslation altTrans) {
LocaleId srcLoc = LocaleId.fromBCP47(altTrans.getSourceLocale());
LocaleId trgLoc = LocaleId.fromBCP47(altTrans.getTargetLocale());
AltTranslation result = new AltTranslation(
srcLoc,
trgLoc,
toTextUnit(altTrans.getTextUnit()),
toMatchType(altTrans.getType()),
altTrans.getCombinedScore(),
altTrans.getOrigin());
result.setEngine(altTrans.getEngine());
result.setFromOriginal(altTrans.getFromOriginal());
result.setFuzzyScore(altTrans.getFuzzyScore());
result.setQualityScore(altTrans.getQualityScore());
result.setAltTransType(altTrans.getAltTransType());
return result;
}
public static TextUnit toTextUnit(net.sf.okapi.proto.textunitflat.TextUnit tu) {
TextUnit result = new TextUnit(tu.getId(), "");
result.setSource(toTextContainer(tu.getSource()));
result.setMimeType(tu.getMimeType());
result.setName(tu.getName());
result.setType(tu.getTuType());
result.setIsTranslatable(tu.getTranslatable());
result.setPreserveWhitespaces(tu.getPreserveWS());
for (Entry trg : tu.getTargetsMap().entrySet()) {
result.setTarget(LocaleId.fromBCP47(trg.getKey()), toTextContainer(trg.getValue()));
}
for (net.sf.okapi.proto.textunitflat.Property property : tu.getPropertiesList()) {
result.setProperty(toProperty(property));
}
NoteAnnotation na = new NoteAnnotation();
for (net.sf.okapi.proto.textunitflat.Note note : tu.getNotesList()) {
na.add(toNote(note));
}
result.setAnnotation(na);
return result;
}
public static List toTextUnits(net.sf.okapi.proto.textunitflat.TextUnits tus) {
List result = new ArrayList<>();
for (net.sf.okapi.proto.textunitflat.TextUnit tu : tus.getTextUnitsList()) {
result.add(toTextUnit(tu));
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy