net.sf.okapi.lib.serialization.step.OriginalDocumentTextUnitFlatMergerStep Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2009-2013 by the Okapi Framework contributors
-----------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
===========================================================================*/
package net.sf.okapi.lib.serialization.step;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.exceptions.OkapiFilterCreationException;
import net.sf.okapi.common.exceptions.OkapiMergeException;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.io.InputStreamFromOutputStream;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.pipelinedriver.PipelineDriver;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.lib.merge.merge.SkeletonMergerWriter;
import net.sf.okapi.lib.merge.merge.TextUnitMergerSerialized;
import net.sf.okapi.lib.serialization.filter.TextUnitFlatFilter;
import net.sf.okapi.lib.merge.merge.TextUnitMerger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Tkit merger which re-filters the original source file to provide the
* skeleton for merging. Uses lib-merge's {@link SkeletonMergerWriter} and {@link TextUnitMergerSerialized}.
*
* @author jimh
*
*/
public class OriginalDocumentTextUnitFlatMergerStep extends BasePipelineStep {
private final Logger logger = LoggerFactory.getLogger(getClass());
private IFilter filter;
private IFilterConfigurationMapper fcMapper;
private String outputEncoding;
private LocaleId trgLoc;
private RawDocument originalDocument;
private SkeletonMergerWriter skelMergerWriter;
private Parameters params;
public OriginalDocumentTextUnitFlatMergerStep() {
params = new Parameters();
skelMergerWriter = new SkeletonMergerWriter(new TextUnitMergerSerialized());
}
public OriginalDocumentTextUnitFlatMergerStep(SkeletonMergerWriter skelMergerWriter) {
this();
this.skelMergerWriter = skelMergerWriter;
}
@Override
public String getName() {
return "Original Document TextUnit Flat Json Merger";
}
@Override
public String getDescription() {
return "TextUnit Flat Json merger which re-filters the original source file to provide the skeleton for merging.";
}
@StepParameterMapping(parameterType = StepParameterType.OUTPUT_ENCODING)
public void setOutputEncoding(String outputEncoding) {
this.outputEncoding = outputEncoding;
}
/**
* Target locales. Currently only the first locale in the list is used.
*
* @param targetLocales
*/
@StepParameterMapping(parameterType = StepParameterType.TARGET_LOCALES)
public void setTargetLocales(final List targetLocales) {
this.trgLoc = targetLocales.get(0);
}
/**
* This is the original source document
*
* @param secondInput Original source document
*/
@StepParameterMapping(parameterType = StepParameterType.SECOND_INPUT_RAWDOC)
public void setSecondInput(final RawDocument secondInput) {
this.originalDocument = secondInput;
}
/**
* The {@link IFilterConfigurationMapper} set in the {@link PipelineDriver}
*
* @param fcMapper
*/
@StepParameterMapping(parameterType = StepParameterType.FILTER_CONFIGURATION_MAPPER)
public void setFilterConfigurationMapper(final IFilterConfigurationMapper fcMapper) {
this.fcMapper = fcMapper;
}
@SuppressWarnings("resource")
@Override
protected Event handleRawDocument(final Event event) {
filter = fcMapper.createFilter(originalDocument.getFilterConfigId(), filter);
if (filter == null) {
throw new OkapiFilterCreationException(String.format(
"Cannot create the filter or load the configuration for '%s'",
originalDocument.getFilterConfigId()));
}
filter.open(originalDocument);
skelMergerWriter.setFilter(filter);
skelMergerWriter.setOptions(trgLoc, outputEncoding);
final TextUnitFlatFilter textUnitFlatFilter = new TextUnitFlatFilter();
final InputStreamFromOutputStream is = new InputStreamFromOutputStream() {
OkapiMergeException error;
@Override
protected Void produce(OutputStream sink) throws Exception {
try {
skelMergerWriter.setOutput(sink);
textUnitFlatFilter.open(event.getRawDocument());
while (textUnitFlatFilter.hasNext()) {
skelMergerWriter.handleEvent(textUnitFlatFilter.next());
}
} catch (Exception e) {
error = new OkapiMergeException("Error merging from original file", e);
} finally {
textUnitFlatFilter.close();
skelMergerWriter.close();
originalDocument.close();
}
return null;
}
@Override
protected void afterClose () {
// check for filter/merger error
// throw the original exception
if (error != null) {
try {
close();
} catch (IOException e) {
logger.error("Error closing InputStreamFromOutputStream", e);
}
throw error;
}
}
};
// Writer step closes the RawDocument
return new Event(EventType.RAW_DOCUMENT,
new RawDocument(is, outputEncoding, trgLoc));
}
/**
* @return the filter
*/
public IFilter getFilter() {
return filter;
}
/**
* @param filter the filter to set
*/
public void setFilter(IFilter filter) {
this.filter = filter;
}
/**
* Get the current {@link SkeletonMergerWriter}
*
* @return the skeletonMergerWriter
*/
public SkeletonMergerWriter getSkelMergerWriter() {
return skelMergerWriter;
}
/**
* Set t {@link SkeletonMergerWriter}. Must call before handleRawDocument.
* Use this method to update {@link TextUnitMerger} {@link net.sf.okapi.lib.merge.merge.Parameters}
* {@link SkeletonMergerWriter#setParameters(IParameters)}
*
* @param skelMergerWriter
*/
public void setSkelMergerWriter(SkeletonMergerWriter skelMergerWriter) {
this.skelMergerWriter = skelMergerWriter;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy