All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.steps.common.codesimplifier.PostSegmentationCodeSimplifierStep Maven / Gradle / Ivy

The newest version!
/*===========================================================================
  Copyright (C) 2011 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/

package net.sf.okapi.steps.common.codesimplifier;

import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.MimeTypeMapper;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiBadStepInputException;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.core.simplifierrules.ParseException;
import net.sf.okapi.core.simplifierrules.SimplifierRules;

/**
 * Simplify inline codes by merging adjacent codes (where safe) and trimming start and end codes.
 * 

* WARNING: An error will be thrown if the TextUnits have not been segmented. This step must be run post-segmentation as trimmed codes are moved to inter-segment {@link TextPart}s. * {@link TextPart}s will be created if needed. *

*/ @UsingParameters(Parameters.class) public class PostSegmentationCodeSimplifierStep extends BasePipelineStep { private Parameters params; public PostSegmentationCodeSimplifierStep() { super(); params = new Parameters(); } @Override public String getDescription() { return "Merges adjacent inline codes in the source and target part of a text unit." + " Also where possible, moves leading and trailing codes of the source to inter-segment TextParts." + " Expects: filter events. Sends back: filter events."; } @Override public String getName() { return "Post-segmentation Inline Codes Simplifier"; } @Override public Parameters getParameters() { return params; } @Override public void setParameters(IParameters params) { this.params = (Parameters) params; // validate simplify rules if (Util.isEmpty(this.params.getRules())) { return; } SimplifierRules r = new SimplifierRules(this.params.getRules(), new Code()); try { r.parse(); } catch (ParseException e) { throw new OkapiBadStepInputException("Simplifier Rules are Invalid", e); } } @Override protected Event handleTextUnit(Event event) { ITextUnit tu = event.getTextUnit(); boolean segmentation = MimeTypeMapper.isSegmentationSupported(tu.getMimeType()); // don't trim codes for segmented formats like XLIFF as this changes the segment boundaries from the original if (!segmentation) { TextUnitUtil.simplifyCodesPostSegmentation(tu, getParameters().getRules(), params.getRemoveLeadingTrailingCodes(), params.getMergeCodes()); } return super.handleTextUnit(event); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy