All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.lib.serialization.step.OriginalDocumentTextUnitFlatMergerStep Maven / Gradle / Ivy

There is a newer version: 1.47.0
Show newest version
/*===========================================================================
  Copyright (C) 2009-2013 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  This library is free software; you can redistribute it and/or modify it 
  under the terms of the GNU Lesser General Public License as published by 
  the Free Software Foundation; either version 2.1 of the License, or (at 
  your option) any later version.

  This library is distributed in the hope that it will be useful, but 
  WITHOUT ANY WARRANTY; without even the implied warranty of 
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 
  General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License 
  along with this library; if not, write to the Free Software Foundation, 
  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

  See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
===========================================================================*/

package net.sf.okapi.lib.serialization.step;

import java.io.IOException;
import java.io.OutputStream;
import java.util.List;

import net.sf.okapi.common.Event;
import net.sf.okapi.common.EventType;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.exceptions.OkapiFilterCreationException;
import net.sf.okapi.common.exceptions.OkapiMergeException;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.io.InputStreamFromOutputStream;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.pipelinedriver.PipelineDriver;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.lib.merge.merge.SkeletonMergerWriter;
import net.sf.okapi.lib.merge.merge.TextUnitMergerSerialized;
import net.sf.okapi.lib.serialization.filter.TextUnitFlatFilter;
import net.sf.okapi.lib.merge.merge.TextUnitMerger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Tkit merger which re-filters the original source file to provide the
 * skeleton for merging. Uses lib-merge's {@link SkeletonMergerWriter} and {@link TextUnitMergerSerialized}.
 * 
 * @author jimh
 * 
 */
public class OriginalDocumentTextUnitFlatMergerStep extends BasePipelineStep {
	private final Logger logger = LoggerFactory.getLogger(getClass());

	private IFilter filter;
	private IFilterConfigurationMapper fcMapper;
	private String outputEncoding;
	private LocaleId trgLoc;
	private RawDocument originalDocument;
	private SkeletonMergerWriter skelMergerWriter;
	private Parameters params;

	public OriginalDocumentTextUnitFlatMergerStep() {
		params = new Parameters();
		skelMergerWriter = new SkeletonMergerWriter(new TextUnitMergerSerialized());
	}

	public OriginalDocumentTextUnitFlatMergerStep(SkeletonMergerWriter skelMergerWriter) {
		this();
		this.skelMergerWriter = skelMergerWriter;
	}
	
	@Override
	public String getName() {
		return "Original Document TextUnit Flat Json Merger";
	}

	@Override
	public String getDescription() {
		return "TextUnit Flat Json merger which re-filters the original source file to provide the skeleton for merging.";
	}

	@StepParameterMapping(parameterType = StepParameterType.OUTPUT_ENCODING)
	public void setOutputEncoding(String outputEncoding) {
		this.outputEncoding = outputEncoding;
	}
	
	/**
	 * Target locales. Currently only the first locale in the list is used.
	 * 
	 * @param targetLocales
	 */
	@StepParameterMapping(parameterType = StepParameterType.TARGET_LOCALES)
	public void setTargetLocales(final List targetLocales) {
		this.trgLoc = targetLocales.get(0);
	}
	
	/**
	 * This is the original source document
	 * 
	 * @param secondInput Original source document
	 */
	@StepParameterMapping(parameterType = StepParameterType.SECOND_INPUT_RAWDOC)
	public void setSecondInput(final RawDocument secondInput) {
		this.originalDocument = secondInput;
	}

	/**
	 * The {@link IFilterConfigurationMapper} set in the {@link PipelineDriver}
	 * 
	 * @param fcMapper
	 */
	@StepParameterMapping(parameterType = StepParameterType.FILTER_CONFIGURATION_MAPPER)
	public void setFilterConfigurationMapper(final IFilterConfigurationMapper fcMapper) {
		this.fcMapper = fcMapper;
	}
	
	@SuppressWarnings("resource")
	@Override
	protected Event handleRawDocument(final Event event) {
		filter = fcMapper.createFilter(originalDocument.getFilterConfigId(), filter);
		if (filter == null) {
			throw new OkapiFilterCreationException(String.format(
					"Cannot create the filter or load the configuration for '%s'",
					originalDocument.getFilterConfigId()));
		}
		filter.open(originalDocument);
		skelMergerWriter.setFilter(filter);
		skelMergerWriter.setOptions(trgLoc, outputEncoding);

		final TextUnitFlatFilter textUnitFlatFilter = new TextUnitFlatFilter();

		final InputStreamFromOutputStream is = new InputStreamFromOutputStream() {
			OkapiMergeException error;

			@Override
			protected Void produce(OutputStream sink) throws Exception {
				try {
					skelMergerWriter.setOutput(sink);
					textUnitFlatFilter.open(event.getRawDocument());
					while (textUnitFlatFilter.hasNext()) {
						skelMergerWriter.handleEvent(textUnitFlatFilter.next());
					}
				} catch (Exception e) {
					error = new OkapiMergeException("Error merging from original file", e);
				} finally {
					textUnitFlatFilter.close();
					skelMergerWriter.close();
					originalDocument.close();
				}

				return null;
			}
			@Override
			protected void afterClose () {
				// check for filter/merger error
				// throw the original exception
				if (error != null) {
					try {
						close();
					} catch (IOException e) {
						logger.error("Error closing InputStreamFromOutputStream", e);
					}
					throw error;
				}
			}
		};
						
		// Writer step closes the RawDocument
		return new Event(EventType.RAW_DOCUMENT,
				new RawDocument(is, outputEncoding, trgLoc));
	}

	/**
	 * @return the filter
	 */
	public IFilter getFilter() {
		return filter;
	}

	/**
	 * @param filter the filter to set
	 */
	public void setFilter(IFilter filter) {
		this.filter = filter;
	}

	/**
	 * Get the current {@link SkeletonMergerWriter}
	 *
	 * @return the skeletonMergerWriter
	 */
	public SkeletonMergerWriter getSkelMergerWriter() {
		return skelMergerWriter;
	}

	/**
	 * Set t {@link SkeletonMergerWriter}. Must call before handleRawDocument.
	 * Use this method to update {@link TextUnitMerger} {@link net.sf.okapi.lib.merge.merge.Parameters}
	 * {@link SkeletonMergerWriter#setParameters(IParameters)}
	 *
	 * @param skelMergerWriter
	 */
	public void setSkelMergerWriter(SkeletonMergerWriter skelMergerWriter) {
		this.skelMergerWriter = skelMergerWriter;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy