com.basistech.rosette.dm.Sentence Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of adm-model Show documentation
ADM data model.
There is a newer version: 3.0.3
/*
* Copyright 2014 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.dm;

import java.io.Serializable;
import java.util.Map;

/**
 * A Sentence. By convention (influenced by the sentence boundary rules from
 * Unicode TR#29), a sentence should include trailing whitespace after an
 * end-of-sentence marker.  For example, for the string "Hello.  World "
 * with two spaces before "World" and one space after:
 *  * 012345678901234
 * Hello.  World
 * 
 * the first sentence is at offsets [0, 8), and the second at [8, 14).
 * 

 * Note that sentences have no properties of their own.
 */
public class Sentence extends Attribute implements Serializable {
    private static final long serialVersionUID = 222L;

    protected Sentence(int startOffset, int endOffset, Map extendedProperties) {
        super(startOffset, endOffset, extendedProperties);
    }

    /**
     * Builder for Sentence attributes.
     */
    public static class Builder extends Attribute.Builder {
        /**
         * Constructs a builder from the required properties.
         *
         * @param startOffset start character offset
         * @param endOffset end character offset
         */
        public Builder(int startOffset, int endOffset) {
            super(startOffset, endOffset);
        }

        /**
         * Constructs a builder from the 'traditional Rosette' data structure.
         * That data structure is an int[] in which the even-numbered items are token
         * start offsets, and the odd-numbered items are token end offsets.
         *
         * @param tokenOffsets array of token start/end offsets
         * @param tokenStartIndex index in tokenOffsets for the start of the phrase
         * @param tokenEndIndex index in tokenOffsets for the end of the phrase
         * @adm.ignore
         */
        public Builder(int[] tokenOffsets, int tokenStartIndex, int tokenEndIndex) {
            this(tokenOffsets[2 * tokenStartIndex], tokenOffsets[2 * (tokenEndIndex - 1) + 1]);
        }

        /**
         * Constructs a builder from the contents of an existing sentence.
         *
         * @param toCopy the object to copy
         * @adm.ignore
         */
        public Builder(Sentence toCopy) {
            super(toCopy);
        }

        /**
         * Creates an immutable sentence from the current state of the builder.
         *
         * @return the new sentence
         */
        public Sentence build() {
            return new Sentence(startOffset, endOffset, buildExtendedProperties());
        }

        @Override
        protected Builder getThis() {
            return this;
        }

    }
}