All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.fop.text.linebreak.LineBreakStatus Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id: LineBreakStatus.java 1056518 2011-01-07 21:22:40Z adelmelle $ */

package org.apache.fop.text.linebreak;

/**
 * This class is meant for supporting the Unicode line breaking algorithm.
 * See: UTR 14
 *
 */
public class LineBreakStatus {

    /** Constant indicating a Direct Break */
    public static final byte DIRECT_BREAK = LineBreakUtils.DIRECT_BREAK;
    /** Constant indicating an Indirect Break */
    public static final byte INDIRECT_BREAK = LineBreakUtils.INDIRECT_BREAK;
    /** Constant indicating a Combining Indirect Break */
    public static final byte COMBINING_INDIRECT_BREAK = LineBreakUtils.COMBINING_INDIRECT_BREAK;
    /** Constant indicating a Combining Prohibited Break */
    public static final byte COMBINING_PROHIBITED_BREAK = LineBreakUtils.COMBINING_PROHIBITED_BREAK;
    /** Constant indicating a Prohibited Break */
    public static final byte PROHIBITED_BREAK = LineBreakUtils.PROHIBITED_BREAK;
    /** Constant indicating a Explicit Break */
    public static final byte EXPLICIT_BREAK = LineBreakUtils.EXPLICIT_BREAK;

    private byte leftClass;
    private boolean hadSpace;

    /**
     * Resets the class to the same state as if new LineBreakStatus() had just been called.
     */
    public LineBreakStatus() {
        reset();
    }


    /**
     * Reset the status.
     * This method will reset the status to the initial state. It is meant
     * for recycling objects.
     */
    public void reset() {
        leftClass = -1;
        hadSpace = false;
    }

    /**
     * Check whether a line break may happen according to the rules described in
     * the Unicode Line Breaking
     * Algorithm. The function returns the line breaking status of the point
     * before the given character.
     * The algorithm is the table-driven algorithm, as described in
     * 
     * Unicode Technical Report #14.
     * The pair table is taken from {@link LineBreakUtils}.
     *
     * TODO: Better handling for AI, SA, SG and XX line break classes.
     *
     * @param c the character to check
     * @return the break action to be taken
     *          one of: {@link #DIRECT_BREAK},
     *                  {@link #INDIRECT_BREAK},
     *                  {@link #COMBINING_INDIRECT_BREAK},
     *                  {@link #COMBINING_PROHIBITED_BREAK},
     *                  {@link #PROHIBITED_BREAK},
     *                  {@link #EXPLICIT_BREAK}
     */
    public byte nextChar(char c) {

        byte currentClass = LineBreakUtils.getLineBreakProperty(c);

        /* Initial conversions */
        switch (currentClass) {
            case 0: // Unassigned codepoint: same treatment as AI
            case LineBreakUtils.LINE_BREAK_PROPERTY_AI:
            case LineBreakUtils.LINE_BREAK_PROPERTY_SG:
            case LineBreakUtils.LINE_BREAK_PROPERTY_XX:
                // LB 1: Resolve AI, ... SG and XX into other line breaking classes
                //       depending on criteria outside the scope of this algorithm.
                //       In the absence of such criteria, it is recommended that
                //       classes AI, ... SG and XX be resolved to AL
                currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                break;

            case LineBreakUtils.LINE_BREAK_PROPERTY_SA:
                // LB 1: Resolve ... SA ... into other line breaking classes
                //       depending on criteria outside the scope of this algorithm.
                //       In the absence of such criteria, it is recommended that
                //       ... SA be resolved to AL, except that characters of
                //       class SA that have General_Category Mn or Mc be resolved to CM
                switch (Character.getType(c)) {
                    case Character.COMBINING_SPACING_MARK: //General_Category "Mc"
                    case Character.NON_SPACING_MARK: //General_Category "Mn"
                        currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_CM;
                        break;
                    default:
                        currentClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                }

            default:
                //nop
        }

        /* Check 1: First character or initial character after a reset/mandatory break? */
        switch (leftClass) {
            case -1:
                //first character or initial character after a reset()
                leftClass = currentClass;
                if (leftClass == LineBreakUtils.LINE_BREAK_PROPERTY_CM) {
                    // LB 10: Treat any remaining combining marks as AL
                    leftClass = LineBreakUtils.LINE_BREAK_PROPERTY_AL;
                }
                // LB 2: Never break at the start of text
                return PROHIBITED_BREAK;

            case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
            case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
            case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
                //first character after mandatory break
                // LB 4: Always break after hard line breaks
                // LB 5: Treat ... LF and NL has hard line breaks
                reset();
                leftClass = currentClass;
                return EXPLICIT_BREAK;

            case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
                //first character after a carriage return:
                // LB 5: Treat CR followed by LF, as well as CR ... as hard line breaks
                // If current is LF, then fall through to Check 2 (see below),
                // and the hard break will be signaled for the character after LF (see above)
                if (currentClass != LineBreakUtils.LINE_BREAK_PROPERTY_LF) {
                    reset();
                    leftClass = currentClass;
                    return EXPLICIT_BREAK;
                }

            default:
                //nop
        }

        /* Check 2: current is a mandatory break or space? */
        switch (currentClass) {
            case LineBreakUtils.LINE_BREAK_PROPERTY_BK:
            case LineBreakUtils.LINE_BREAK_PROPERTY_LF:
            case LineBreakUtils.LINE_BREAK_PROPERTY_NL:
            case LineBreakUtils.LINE_BREAK_PROPERTY_CR:
                // LB 6: Do not break before a hard break
                leftClass = currentClass;
                return PROHIBITED_BREAK;

            case LineBreakUtils.LINE_BREAK_PROPERTY_SP:
                // LB 7: Do not break before spaces ...
                // Zero-width spaces are in the pair-table (see below)
                hadSpace = true;
                return PROHIBITED_BREAK;

            default:
                //nop
        }

        /* Normal treatment, if the first two checks did not return */
        boolean savedHadSpace = hadSpace;
        hadSpace = false;
        byte breakAction = LineBreakUtils.getLineBreakPairProperty(leftClass, currentClass);
        switch (breakAction) {
            case PROHIBITED_BREAK:
            case DIRECT_BREAK:
                leftClass = currentClass;
                return breakAction;

            case INDIRECT_BREAK:
                leftClass = currentClass;
                if (savedHadSpace) {
                    return INDIRECT_BREAK;
                } else {
                    return PROHIBITED_BREAK;
                }

            case COMBINING_INDIRECT_BREAK:
                if (savedHadSpace) {
                    leftClass = currentClass;
                    return COMBINING_INDIRECT_BREAK;
                } else {
                    return PROHIBITED_BREAK;
                }

            case COMBINING_PROHIBITED_BREAK:
                if (savedHadSpace) {
                    leftClass = currentClass;
                }
                return COMBINING_PROHIBITED_BREAK;

            default:
                assert false;
                return breakAction;
        }
    }

    /**
     * for debugging only
     */
    /*
    public static void main(String args[]) {
        LineBreakStatus lbs = new LineBreakStatus();
        lbs.nextChar('\n');
        lbs.nextChar('\n');
        lbs.nextChar('x');
    }
    */
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy