All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.common.resource.Segments Maven / Gradle / Ivy

/*===========================================================================
  Copyright (C) 2010 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/


package net.sf.okapi.common.resource;

import net.sf.okapi.common.IResource;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.Util;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

public class Segments implements ISegments {
	// assume segments are always aligned when created
	private AlignmentStatus alignmentStatus = AlignmentStatus.ALIGNED;
	private TextContainer parent;
	private List parts;

	public Segments() {
	}

    /**
	 * Creates an uninitialized Segments object.
	 * 

* IMPORTANT: setParts() must be called with a non-null argument before * calling any other methods. * * @param parent the parent {@link TextContainer}. */ public Segments(TextContainer parent) { this.parent = parent; } /** * Sets the list of TextPart objects in which the segments for this Segments * object are located. Parts must be set after construction before any other * methods are invoked. * * @param parts the list of {@link TextPart}s where the segments are stored. */ public void setParts(List parts) { this.parts = parts; } @Override public Iterator iterator() { return new Iterator() { int current = foundNext(-1); private int foundNext (int start) { for ( int i=start+1; i asList() { final ArrayList segments = new ArrayList<>(); for ( final TextPart part : parts ) { if ( part.isSegment() ) { segments.add((Segment)part); } } return segments; } @Override public void swap(int segIndex1, int segIndex2) { final int partIndex1 = getPartIndex(segIndex1); final int partIndex2 = getPartIndex(segIndex2); if (( partIndex1 == -1 ) || ( partIndex2 == -1 )) { return; // At least one index is wrong: do nothing } final TextPart tmp = parts.get(partIndex1); parts.set(partIndex1, parts.get(partIndex2)); parts.set(partIndex2, tmp); } @Override public void append(Segment segment, boolean collapseIfPreviousEmpty) { append(segment, null, collapseIfPreviousEmpty); } @Override public void append(Segment segment) { append(segment, true); } @Override public void append(Segment segment, String textBefore, boolean collapseIfPreviousEmpty) { // Add the text before if needed if ( !Util.isEmpty(textBefore) ) { if (( parts.get(parts.size()-1).getContent().isEmpty() ) && !parts.get(parts.size()-1).isSegment() ) { parts.set(parts.size()-1, new TextPart(textBefore)); } else { parts.add(new TextPart(textBefore)); } } // If the last segment is empty and at the end of the content: re-use it if ( collapseIfPreviousEmpty ) { if (( parts.get(parts.size()-1).getContent().isEmpty() ) && parts.get(parts.size()-1).isSegment() ) { parts.set(parts.size()-1, segment); } else { parts.add(segment); } } else { parts.add(segment); } validateSegmentId(segment); parent.setHasBeenSegmentedFlag(true); } @Override public void append(Segment segment, String textBefore) { append(segment, textBefore, true); } @Override public void append(TextFragment fragment, boolean collapseIfPreviousEmpty) { append(new Segment(null, fragment), collapseIfPreviousEmpty); } @Override public void append(TextFragment fragment) { append(fragment, true); } @Override public void set(int index, Segment seg) { final int n = getPartIndex(index); if ( n < -1 ) { throw new IndexOutOfBoundsException("Invalid segment index: "+index); } parts.set(n, seg); validateSegmentId(seg); } @Override public void insert(int index, Segment seg) { // If the index is the one after the last segment: we append if ( index == count() ) { append(seg, true); return; } // Otherwise it has to exist final int n = getPartIndex(index); if ( n < -1 ) { throw new IndexOutOfBoundsException("Invalid segment index: "+index); } parts.add(n, seg); validateSegmentId(seg); } @Override public int create (List ranges) { return create(ranges, false); } @Override public int create(List ranges, boolean allowEmptySegments) { return create(ranges, allowEmptySegments, MetaCopyStrategy.DEFAULT); } @Override public int create (List ranges, boolean allowEmptySegments, MetaCopyStrategy strategy) { // Do nothing if null or empty if (( ranges == null ) || ranges.isEmpty() ) return 0; List originalRanges = new ArrayList<>(); // If the current content is a single segment we start from it TextFragment holder; if ( parts.size() == 1 ) { holder = parts.get(0).getContent(); originalRanges.add(new Range(0, holder.length(), parts.get(0).getId())); } else { holder = createJoinedContent(originalRanges, true); } // clone the current parts List originalParts = new ArrayList<>(); for(TextPart p : parts) { originalParts.add(p.clone()); } // Reset the segments parts.clear(); // Extract the segments using the ranges int start = 0; int id = 0; for ( final Range range : ranges ) { if ( range.end == -1 ) { range.end = holder.text.length(); } // Check boundaries if ( range.end < range.start ) { throw new InvalidPositionException(String.format( "Invalid segment boundaries: start=%d, end=%d.", range.start, range.end)); } if ( start > range.start ) { throw new InvalidPositionException("Invalid range order."); } if ( range.end == range.start ) { // If empty segments are not allowed, we skip this one if ( !allowEmptySegments ) continue; // Otherwise we proceed } // If there is an interstice: creates the corresponding part if ( start < range.start ) { parts.add(new TextPart(holder.subSequence(start, range.start))); } // Create the part for the segment // Use existing id if possible, otherwise use local counter TextPart p; // if the range does not store the original part then we assume this is a Segment if (range.part == null) { p = new Segment(((range.id == null) ? String.valueOf(id++) : range.id), holder.subSequence(range.start, range.end)); validateSegmentId((Segment) p); } else { // since the range carries a part this normally means it's a case where // the TextContainer was already segmented when we calculated the ranges, and we want to remember the // original TextParts (main use case is ITextUnitMerger) if (range.part.isSegment()) { p = new Segment(((range.part.id == null) ? String.valueOf(id++) : range.part.id), holder.subSequence(range.start, range.end)); validateSegmentId((Segment) p); } else { p = new TextPart(((range.part.id == null) ? String.valueOf(id++) : range.part.id), holder.subSequence(range.start, range.end)); } } parts.add(p); start = range.end; parent.setHasBeenSegmentedFlag(true); } // Check if we have remaining text after the last segment if ( start < holder.text.length() ) { if ( start == 0 ) { // If the remainder is the whole content: make it a segment if ( parts.size() > 0 ) { parts.add(new TextPart(holder.subSequence(start, -1))); } else { parts.add(new Segment(String.valueOf(id), holder)); } // That is the only segment: no need to validate the id } else { // Otherwise: make it an interstice parts.add(new TextPart(holder.subSequence(start, -1))); } } switch(strategy) { case DEEPEN: // split segments inherit parent metadata and id's are adjusted deepenCopyMetaData(originalParts, originalRanges, ranges); break; case IDENTITY: identityCopyMetadata(ranges); break; case DEFAULT: // default case - currently do nothing break; } return parts.size(); } /** * Copy metadata for use cases where the original segments were the exact same as the new ones. * Must be a one to one match between ranges and parts. */ private void identityCopyMetadata(List ranges) { assert(ranges.size() == parts.size()); for (int pi = 0; pi < ranges.size(); pi++) { Range r = ranges.get(pi); TextPart part = parts.get(pi); part.id = r.part.id; part.originalId = r.part.originalId; part.whitespaceStrategy = r.part.whitespaceStrategy; IResource.copy(r.part, part); } } /** * Copy metadata for deepen existing segmentation use case. Split segments inherit meta from parent segments. */ private void deepenCopyMetaData(List originalParts, List originalRanges, List ranges) { for (int pi = 0; pi < ranges.size(); pi++) { Range r = ranges.get(pi); for (int oi = 0; oi < originalRanges.size(); oi++) { Range op = originalRanges.get(oi); TextPart part = parts.get(pi); TextPart originalPart = originalParts.get(oi); if (op.equals(r)) { // Range may already have an id, if so keep it part.id = Util.isEmpty(originalPart.id) ? part.id : originalPart.id; part.originalId = originalPart.originalId; part.whitespaceStrategy = originalPart.whitespaceStrategy; IResource.copy(originalPart, part); } else if (op.contains(r)) { // use case for deepening segmentation (split segments) etc. part.id = Util.isEmpty(originalPart.id) ? part.id : String.format("%s.%d", originalPart.id, pi); part.whitespaceStrategy = originalPart.whitespaceStrategy; IResource.copy(originalPart, part); } } } } @Override public int create(int start, int end) { final ArrayList range = new ArrayList<>(); range.add(new Range(start, end)); return create(range); } @Override public int count() { int count = 0; for ( final TextPart part : parts ) { if ( part.isSegment() ) { count++; } } return count; } @Override public TextFragment getFirstContent() { for ( final TextPart part : parts ) { if ( part.isSegment() ) { return part.getContent(); } } // Should never occur return null; } @Override public TextFragment getLastContent() { for ( int i=parts.size()-1; i>=0; i-- ) { if ( parts.get(i).isSegment() ) { return parts.get(i).getContent(); } } // Should never occur return null; } @Override public Segment getLast() { for ( int i=parts.size()-1; i>=0; i-- ) { if ( parts.get(i).isSegment() ) { return (Segment)parts.get(i); } } // Should never occur return null; } @Override public Segment get(String id) { for ( final TextPart part : parts ) { if ( part.isSegment() ) { if ( ((Segment)part).id.equals(id) ) return (Segment)part; } } // Should never occur return null; } @Override public Segment get(int index) { int tmp = -1; for ( final TextPart part : parts ) { if ( part.isSegment() ) { if ( ++tmp == index ) { return (Segment)part; } } } throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + ++tmp); } @Override public void joinAll() { // Merge but don't remember the ranges //parent.setContent(createJoinedContent(null)); joinAll(null); } @Override public void joinAll(boolean keepCodeIds) { parent.setContent(createJoinedContent(null, keepCodeIds)); } @Override public void joinAll(List ranges) { parent.setContent(createJoinedContent(ranges)); } @Override public List getRanges() { final List ranges = new ArrayList<>(); createJoinedContent(ranges); return ranges; } @Override public List getRanges(boolean keepCodeIds) { final List ranges = new ArrayList<>(); createJoinedContent(ranges, keepCodeIds); return ranges; } @Override public int joinWithNext(int segmentIndex) { // Check if we have something to join to if ( parts.size() == 1 ) { return 0; // Nothing to do } // Find the part for the segment index final int start = getPartIndex(segmentIndex); // Check if we have a segment at such index if ( start == -1 ) { return 0; // Not found } // Find the next segment int end = -1; for ( int i=start+1; i ranges) { return createJoinedContent(ranges, false); } private TextFragment createJoinedContent(List ranges, boolean keepCodeIds) { // Clear the ranges if needed if ( ranges != null ) { ranges.clear(); } // Join all segment into a new TextFragment int start = 0; final TextFragment tf = new TextFragment(); for ( final TextPart part : parts ) { if (ranges != null) { Range r = new Range(start, start + part.text.text.length(), part.id); // remember original part as create(List...) nukes all original TextPart metadata r.part = part; ranges.add(r); } start += part.text.text.length(); tf.append(part.getContent(), keepCodeIds); } return tf; } public TextContainer getParent() { return parent; } public List getParts() { return parts; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy