All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.beezle.mork.scanner.Minimizer Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 1&1 Internet AG, http://www.1and1.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 */

package net.sf.beezle.mork.scanner;

import net.sf.beezle.mork.regexpr.Range;
import net.sf.beezle.sushi.util.IntArrayList;
import net.sf.beezle.sushi.util.IntBitSet;

/**
 * Minimization of a finite automaton. Requires a complete depterministic
 * automaton. Algorithm is taken from [Hopcraft80]. SableCC 2.7 and JFlex 1.1.2
 * use the same algorithm. State partitioning as descibed is most compiler text
 * books is to slow.
 */

public class Minimizer {
    private static final IntArrayList YES = new IntArrayList();
    private static final IntArrayList NO = new IntArrayList();

    /** UNKNOWN has to be the default initialization for reference-type array elements: null */
    private static final IntArrayList UNKNOWN = null;

    private final FA fa;

    /** fa.size(); */
    private final int size;

    /**
     * indexed by [leftSi][rightSi]
     * inv: leftSi > rightSi
     * YES: [leftSi][rightSi] is distinct
     * NO:  [leftSi][rightSi} not distinct
     * UNKNOWN: unknown
     * else: distinct is unknown; if it turns out to be distinct, all pairs in the list are distinct
     * @inv distinct[si].length = si
     */
    private final IntArrayList[][] distinct;

    /** old2new[faSi] = resultSi.  assigned by collect */
    private final int[] old2new;

    /**
     * Requires a complete deterministic automaton
     */
    public Minimizer(FA fa) {
        int leftSi, rightSi;

        this.fa = fa;
        size = fa.size();
        old2new = new int[size];
        distinct = new IntArrayList[size][];
        for (leftSi = 0; leftSi < size; leftSi++) {
            distinct[leftSi] = new IntArrayList[leftSi];
            // initialized to null. That takes an extra test during minimization, but
            // it saves a huge amount of memory
        }
        for (leftSi = fa.getFirstEnd(); leftSi != -1; leftSi = fa.getNextEnd(leftSi)) {
            for (rightSi = 0; rightSi < leftSi; rightSi++) {
                if (!fa.isEnd(rightSi) || !Label.sameSymbols(fa, leftSi, rightSi)) {
                    distinct[leftSi][rightSi] = YES;
                }
            }
            for (rightSi = leftSi + 1; rightSi < size; rightSi++) {
                if (!fa.isEnd(rightSi) || !Label.sameSymbols(fa, leftSi, rightSi)) {
                    distinct[rightSi][leftSi] = YES;
                }
            }
        }
    }

    public int getNewSi(int si) {
        return old2new[si];
    }

    /**
     * The caller has to ensure a complete FA.
     * @throws IllegalArgumentException if faInit is not complete.
     */
    public FA run() {
        distinguish();
        return collect();
    }

    private FA collect() {
        int leftSi, rightSi;
        int faSi, resultSi;
        FA result;
        IntBitSet states; // old states
        int resultSize;
        int ti, maxTi;

        result = new FA();
        for (leftSi = 0; leftSi < size; leftSi++) {
            states = new IntBitSet();
            for (rightSi = 0; rightSi < leftSi; rightSi++) {
                if (distinct[leftSi][rightSi] != YES) {
                    // distinct may well be unknown -- this happens if some other state switches
                    // to "NO", depending states are not notified
                    states.add(rightSi);
                }
            }
            // leftSi == rightSi  => distinct == NO
            states.add(rightSi++);
            for (; rightSi < size; rightSi++) {
                if (distinct[rightSi][leftSi] != YES) {
                    // distinct may well be unknown -- this happens if some other state switches
                    // to "NO", depending states are not notified
                    states.add(rightSi);
                }
            }

            if (result.find(states) == -1) {
                resultSi = result.add(states);
                for (faSi = states.first(); faSi != -1; faSi = states.next(faSi)) {
                    old2new[faSi] = resultSi;
                }
            }
        }

        resultSize = result.size();
        for (resultSi = 0; resultSi < resultSize; resultSi++) {
            states = (IntBitSet) result.get(resultSi).getLabel();
            faSi = states.first();
            if (faSi == -1) {
                throw new RuntimeException();
            }

            if (fa.getStart() == faSi) {
                result.setStart(resultSi);
            }
            if (fa.isEnd(faSi)) {
                result.setEnd(resultSi);
            }

            maxTi = fa.get(faSi).size();
            for (ti = 0; ti < maxTi; ti++) {
                result.get(resultSi).add(
                    old2new[fa.get(faSi).getEnd(ti)], (Range) fa.get(faSi).getInput(ti));
            }
        }

        Label.combineLabels(result, fa);

        return result;
    }

    private void distinguish() {
        int leftSi, rightSi;
        IntArrayList d;

        // I tested the four combinations of running the following loop upwards or downwards:
        // the differences are small, and the following seems best:
        for (leftSi = 0; leftSi < size; leftSi++) {
            for (rightSi = 0; rightSi < leftSi; rightSi++) {
                d = distinct[leftSi][rightSi];
                if (d == YES) {
                    // allready done
                } else {
                    // d != NO because only disctinguish assigns NO -- and it has not been called
                    // for this indes
                    distinguish(leftSi, rightSi);
                }
            }
         }
    }

    /**
     * Distinguish. pre: leftSi > rightSi
     */
    private void distinguish(int leftSi, int rightSi) {
        int leftTi, maxLeftTi, rightTi, maxRightTi;
        int leftEndSi, rightEndSi;
        Range leftRange, rightRange;
        IntArrayList tmp;
        boolean foundUnknown;  // true, if a state with unknown distinct is found

        maxLeftTi = fa.get(leftSi).size();
        if (maxLeftTi == 0) {
            throw new IllegalArgumentException("fa not complete");
        }
        foundUnknown = false;
    leftTransitions:
        for (leftTi = 0; leftTi < maxLeftTi; leftTi++) {
            leftRange = (Range) fa.get(leftSi).getInput(leftTi);
            leftEndSi = fa.get(leftSi).getEnd(leftTi);
            maxRightTi = fa.get(rightSi).size();
            for (rightTi = 0; rightTi < maxRightTi; rightTi++) {
                rightRange = (Range) fa.get(rightSi).getInput(rightTi);
                if (leftRange.touches(rightRange)) {
                    do {
                        rightEndSi = fa.get(rightSi).getEnd(rightTi);
                        tmp = getCheckedDistinctAndAllocate(leftEndSi, rightEndSi);
                        if (tmp == YES) {
                            // (leftEndSi, rightEndSi) are known to differ.
                            setDistinct(leftSi, rightSi);
                            return;
                        } else if (tmp == NO) {
                            // do nothing
                        } else {
                            foundUnknown = true;
                            // distinct for (leftEndSi, rightEndSi) is
                            // not known. Set (leftSi, rightSi) on its list.
                            tmp.add(pair(leftSi, rightSi));
                        }
                        rightTi++;
                        if (rightTi == maxRightTi) {
                            continue leftTransitions;
                        }
                        rightRange = (Range) fa.get(rightSi).getInput(rightTi);
                    } while (leftRange.touches(rightRange));
                    // ranges are sortet; thus, all touching ranges follow without gaps
                    continue leftTransitions;
                }
            }
            throw new IllegalArgumentException("not a cdfa");
        }
        if (!foundUnknown) {
            // all follow upstates are "NO"
            distinct[leftSi][rightSi] = NO;
        }
    }

    /** Mark the pair to be distinct. Recursively marks depending pairs. */
    private void setDistinct(int leftSi, int rightSi) {
        IntArrayList tmp;
        int pair;
        int i, max;

        tmp = distinct[leftSi][rightSi];
        if (tmp != YES) {
            distinct[leftSi][rightSi] = YES;
            if (tmp != UNKNOWN) {
                max = tmp.size();
                for (i = 0; i < max; i++) {
                    pair = tmp.get(i);
                    setDistinct(left(pair), right(pair));
                }
            }
        }
    }

    private IntArrayList getCheckedDistinctAndAllocate(int leftSi, int rightSi) {
        IntArrayList tmp;

        if (leftSi > rightSi) {
            tmp = distinct[leftSi][rightSi];
            if (tmp == UNKNOWN) {
                tmp = new IntArrayList();
                distinct[leftSi][rightSi] = tmp;
            }
            return tmp;
        } else if (leftSi == rightSi) {
            return NO;
        } else {
            tmp = distinct[rightSi][leftSi];
            if (tmp == UNKNOWN) {
                tmp = new IntArrayList();
                distinct[rightSi][leftSi] = tmp;
            }
            return tmp;
        }
    }

    private static int pair(int left, int right) {
        return left << 16 | right;
    }

    private static int left(int pair) {
        return pair >>> 16;
    }

    private static int right(int pair) {
        return pair & 0xffff;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy