net.sf.okapi.common.resource.Segments Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2010 by the Okapi Framework contributors
-----------------------------------------------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===========================================================================*/
package net.sf.okapi.common.resource;
import net.sf.okapi.common.IResource;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.Util;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
public class Segments implements ISegments {
// assume segments are always aligned when created
private AlignmentStatus alignmentStatus = AlignmentStatus.ALIGNED;
private TextContainer parent;
private List parts;
public Segments() {
}
/**
* Creates an uninitialized Segments object.
*
* IMPORTANT: setParts() must be called with a non-null argument before
* calling any other methods.
*
* @param parent the parent {@link TextContainer}.
*/
public Segments(TextContainer parent) {
this.parent = parent;
}
/**
* Sets the list of TextPart objects in which the segments for this Segments
* object are located. Parts must be set after construction before any other
* methods are invoked.
*
* @param parts the list of {@link TextPart}s where the segments are stored.
*/
public void setParts(List parts) {
this.parts = parts;
}
@Override
public Iterator iterator() {
return new Iterator() {
int current = foundNext(-1);
private int foundNext (int start) {
for ( int i=start+1; i asList() {
final ArrayList segments = new ArrayList<>();
for ( final TextPart part : parts ) {
if ( part.isSegment() ) {
segments.add((Segment)part);
}
}
return segments;
}
@Override
public void swap(int segIndex1, int segIndex2) {
final int partIndex1 = getPartIndex(segIndex1);
final int partIndex2 = getPartIndex(segIndex2);
if (( partIndex1 == -1 ) || ( partIndex2 == -1 )) {
return; // At least one index is wrong: do nothing
}
final TextPart tmp = parts.get(partIndex1);
parts.set(partIndex1, parts.get(partIndex2));
parts.set(partIndex2, tmp);
}
@Override
public void append(Segment segment, boolean collapseIfPreviousEmpty) {
append(segment, null, collapseIfPreviousEmpty);
}
@Override
public void append(Segment segment) {
append(segment, true);
}
@Override
public void append(Segment segment,
String textBefore,
boolean collapseIfPreviousEmpty) {
// Add the text before if needed
if ( !Util.isEmpty(textBefore) ) {
if (( parts.get(parts.size()-1).getContent().isEmpty() )
&& !parts.get(parts.size()-1).isSegment() )
{
parts.set(parts.size()-1, new TextPart(textBefore));
}
else {
parts.add(new TextPart(textBefore));
}
}
// If the last segment is empty and at the end of the content: re-use it
if ( collapseIfPreviousEmpty ) {
if (( parts.get(parts.size()-1).getContent().isEmpty() )
&& parts.get(parts.size()-1).isSegment() )
{
parts.set(parts.size()-1, segment);
}
else {
parts.add(segment);
}
}
else {
parts.add(segment);
}
validateSegmentId(segment);
parent.setHasBeenSegmentedFlag(true);
}
@Override
public void append(Segment segment, String textBefore) {
append(segment, textBefore, true);
}
@Override
public void append(TextFragment fragment, boolean collapseIfPreviousEmpty) {
append(new Segment(null, fragment), collapseIfPreviousEmpty);
}
@Override
public void append(TextFragment fragment) {
append(fragment, true);
}
@Override
public void set(int index, Segment seg) {
final int n = getPartIndex(index);
if ( n < -1 ) {
throw new IndexOutOfBoundsException("Invalid segment index: "+index);
}
parts.set(n, seg);
validateSegmentId(seg);
}
@Override
public void insert(int index, Segment seg) {
// If the index is the one after the last segment: we append
if ( index == count() ) {
append(seg, true);
return;
}
// Otherwise it has to exist
final int n = getPartIndex(index);
if ( n < -1 ) {
throw new IndexOutOfBoundsException("Invalid segment index: "+index);
}
parts.add(n, seg);
validateSegmentId(seg);
}
@Override
public int create (List ranges) {
return create(ranges, false);
}
@Override
public int create(List ranges, boolean allowEmptySegments) {
return create(ranges, allowEmptySegments, MetaCopyStrategy.DEFAULT);
}
@Override
public int create (List ranges, boolean allowEmptySegments, MetaCopyStrategy strategy)
{
// Do nothing if null or empty
if (( ranges == null ) || ranges.isEmpty() ) return 0;
List originalRanges = new ArrayList<>();
// If the current content is a single segment we start from it
TextFragment holder;
if ( parts.size() == 1 ) {
holder = parts.get(0).getContent();
originalRanges.add(new Range(0, holder.length(), parts.get(0).getId()));
}
else {
holder = createJoinedContent(originalRanges, true);
}
// clone the current parts
List originalParts = new ArrayList<>();
for(TextPart p : parts) {
originalParts.add(p.clone());
}
// Reset the segments
parts.clear();
// Extract the segments using the ranges
int start = 0;
int id = 0;
for ( final Range range : ranges ) {
if ( range.end == -1 ) {
range.end = holder.text.length();
}
// Check boundaries
if ( range.end < range.start ) {
throw new InvalidPositionException(String.format(
"Invalid segment boundaries: start=%d, end=%d.", range.start, range.end));
}
if ( start > range.start ) {
throw new InvalidPositionException("Invalid range order.");
}
if ( range.end == range.start ) {
// If empty segments are not allowed, we skip this one
if ( !allowEmptySegments ) continue;
// Otherwise we proceed
}
// If there is an interstice: creates the corresponding part
if ( start < range.start ) {
parts.add(new TextPart(holder.subSequence(start, range.start)));
}
// Create the part for the segment
// Use existing id if possible, otherwise use local counter
TextPart p;
// if the range does not store the original part then we assume this is a Segment
if (range.part == null) {
p = new Segment(((range.id == null) ? String.valueOf(id++) : range.id),
holder.subSequence(range.start, range.end));
validateSegmentId((Segment) p);
} else {
// since the range carries a part this normally means it's a case where
// the TextContainer was already segmented when we calculated the ranges, and we want to remember the
// original TextParts (main use case is ITextUnitMerger)
if (range.part.isSegment()) {
p = new Segment(((range.part.id == null) ? String.valueOf(id++) : range.part.id),
holder.subSequence(range.start, range.end));
validateSegmentId((Segment) p);
} else {
p = new TextPart(((range.part.id == null) ? String.valueOf(id++) : range.part.id),
holder.subSequence(range.start, range.end));
}
}
parts.add(p);
start = range.end;
parent.setHasBeenSegmentedFlag(true);
}
// Check if we have remaining text after the last segment
if ( start < holder.text.length() ) {
if ( start == 0 ) { // If the remainder is the whole content: make it a segment
if ( parts.size() > 0 ) {
parts.add(new TextPart(holder.subSequence(start, -1)));
}
else {
parts.add(new Segment(String.valueOf(id), holder));
}
// That is the only segment: no need to validate the id
}
else { // Otherwise: make it an interstice
parts.add(new TextPart(holder.subSequence(start, -1)));
}
}
switch(strategy) {
case DEEPEN:
// split segments inherit parent metadata and id's are adjusted
deepenCopyMetaData(originalParts, originalRanges, ranges);
break;
case IDENTITY:
identityCopyMetadata(ranges);
break;
case DEFAULT:
// default case - currently do nothing
break;
}
return parts.size();
}
/**
* Copy metadata for use cases where the original segments were the exact same as the new ones.
* Must be a one to one match between ranges and parts.
*/
private void identityCopyMetadata(List ranges) {
assert(ranges.size() == parts.size());
for (int pi = 0; pi < ranges.size(); pi++) {
Range r = ranges.get(pi);
TextPart part = parts.get(pi);
part.id = r.part.id;
part.originalId = r.part.originalId;
part.whitespaceStrategy = r.part.whitespaceStrategy;
IResource.copy(r.part, part);
}
}
/**
* Copy metadata for deepen existing segmentation use case. Split segments inherit meta from parent segments.
*/
private void deepenCopyMetaData(List originalParts, List originalRanges, List ranges) {
for (int pi = 0; pi < ranges.size(); pi++) {
Range r = ranges.get(pi);
for (int oi = 0; oi < originalRanges.size(); oi++) {
Range op = originalRanges.get(oi);
TextPart part = parts.get(pi);
TextPart originalPart = originalParts.get(oi);
if (op.equals(r)) {
// Range may already have an id, if so keep it
part.id = Util.isEmpty(originalPart.id) ? part.id : originalPart.id;
part.originalId = originalPart.originalId;
part.whitespaceStrategy = originalPart.whitespaceStrategy;
IResource.copy(originalPart, part);
} else if (op.contains(r)) {
// use case for deepening segmentation (split segments) etc.
part.id = Util.isEmpty(originalPart.id) ? part.id : String.format("%s.%d", originalPart.id, pi);
part.whitespaceStrategy = originalPart.whitespaceStrategy;
IResource.copy(originalPart, part);
}
}
}
}
@Override
public int create(int start, int end) {
final ArrayList range = new ArrayList<>();
range.add(new Range(start, end));
return create(range);
}
@Override
public int count() {
int count = 0;
for ( final TextPart part : parts ) {
if ( part.isSegment() ) {
count++;
}
}
return count;
}
@Override
public TextFragment getFirstContent() {
for ( final TextPart part : parts ) {
if ( part.isSegment() ) {
return part.getContent();
}
}
// Should never occur
return null;
}
@Override
public TextFragment getLastContent() {
for ( int i=parts.size()-1; i>=0; i-- ) {
if ( parts.get(i).isSegment() ) {
return parts.get(i).getContent();
}
}
// Should never occur
return null;
}
@Override
public Segment getLast() {
for ( int i=parts.size()-1; i>=0; i-- ) {
if ( parts.get(i).isSegment() ) {
return (Segment)parts.get(i);
}
}
// Should never occur
return null;
}
@Override
public Segment get(String id) {
for ( final TextPart part : parts ) {
if ( part.isSegment() ) {
if ( ((Segment)part).id.equals(id) ) return (Segment)part;
}
}
// Should never occur
return null;
}
@Override
public Segment get(int index) {
int tmp = -1;
for ( final TextPart part : parts ) {
if ( part.isSegment() ) {
if ( ++tmp == index ) {
return (Segment)part;
}
}
}
throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + ++tmp);
}
@Override
public void joinAll() {
// Merge but don't remember the ranges
//parent.setContent(createJoinedContent(null));
joinAll(null);
}
@Override
public void joinAll(boolean keepCodeIds) {
parent.setContent(createJoinedContent(null, keepCodeIds));
}
@Override
public void joinAll(List ranges) {
parent.setContent(createJoinedContent(ranges));
}
@Override
public List getRanges() {
final List ranges = new ArrayList<>();
createJoinedContent(ranges);
return ranges;
}
@Override
public List getRanges(boolean keepCodeIds) {
final List ranges = new ArrayList<>();
createJoinedContent(ranges, keepCodeIds);
return ranges;
}
@Override
public int joinWithNext(int segmentIndex) {
// Check if we have something to join to
if ( parts.size() == 1 ) {
return 0; // Nothing to do
}
// Find the part for the segment index
final int start = getPartIndex(segmentIndex);
// Check if we have a segment at such index
if ( start == -1 ) {
return 0; // Not found
}
// Find the next segment
int end = -1;
for ( int i=start+1; i ranges) {
return createJoinedContent(ranges, false);
}
private TextFragment createJoinedContent(List ranges, boolean keepCodeIds) {
// Clear the ranges if needed
if ( ranges != null ) {
ranges.clear();
}
// Join all segment into a new TextFragment
int start = 0;
final TextFragment tf = new TextFragment();
for ( final TextPart part : parts ) {
if (ranges != null) {
Range r = new Range(start, start + part.text.text.length(), part.id);
// remember original part as create(List...) nukes all original TextPart metadata
r.part = part;
ranges.add(r);
}
start += part.text.text.length();
tf.append(part.getContent(), keepCodeIds);
}
return tf;
}
public TextContainer getParent() {
return parent;
}
public List getParts() {
return parts;
}
}