All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xerial.xml.index.IntervalIndexer Maven / Gradle / Ivy

The newest version!
/*--------------------------------------------------------------------------
 *  Copyright 2004 Taro L. Saito
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *--------------------------------------------------------------------------*/
//--------------------------------------
// XerialJ
//
// IntervalIndexer.java
// Since: 2005/09/09 13:06:28
//
// $URL$
// $Author$
//--------------------------------------
package org.xerial.xml.index;

import org.xerial.util.ArrayDeque;
import org.xerial.util.Deque;
import org.xerial.util.Pair;
import org.xerial.util.StringUtil;
import org.xerial.xml.XMLException;
import org.xerial.xml.pullparser.AbstractSAXEventHandler;
import org.xmlpull.v1.XmlPullParser;

/**
 * Label XML documents with start and end intervals
 * 
 * @author leo
 * 
 */
public class IntervalIndexer extends AbstractSAXEventHandler {
    private final static String EMPTY_STRING = "";

    // (start, end) stack
    Deque> _startOrderStack;
    int _currentDepth = 0;
    int _startOrder = 0;
    LWIndexWriter _writer;

    int STARTORDER_INCREMENT = 1;
    int MINIMUM_INTERAVAL = 1;

    public class LWIndex implements XMLNode {
        int start;
        int end;
        int level;

        /**
         * @param end
         * @param level
         * @param start
         */
        public LWIndex(int start, int end, int level) {
            this.start = start;
            this.end = end;
            this.level = level;
        }

        public String outputAsTabDelimited() {
            return StringUtil.concatinateWithTab(start, end, level);
        }
    }

    public IntervalIndexer(LWIndexWriter writer) {
        _writer = writer;
    }

    @Override
    public void endDocument(XmlPullParser parser) throws XMLException {
        // tear down
        popStack(parser);
    }

    @Override
    public void endTag(XmlPullParser parser) throws XMLException {
        popStack(parser);
    }

    @Override
    public void startDocument(XmlPullParser parser) throws XMLException {
        // initialize 
        _startOrderStack = new ArrayDeque>();
        _startOrder = 0;
        _currentDepth = 0;
        pushStack();
    }

    @Override
    public void startTag(XmlPullParser parser) throws XMLException {
        pushStack();
    }

    @Override
    public void text(XmlPullParser parser) throws XMLException {
        Pair currentNode = _startOrderStack.removeLast();
        String text = parser.getText();
        _startOrderStack.addLast(new Pair(currentNode.getFirst(), currentNode
                .getSecond() == null ? text : currentNode.getSecond() + text));
    }

    private void pushStack() {
        _startOrderStack.addLast(new Pair(_startOrder, EMPTY_STRING));
        _startOrder += STARTORDER_INCREMENT;
        _currentDepth++;
    }

    private void popStack(XmlPullParser parser) {
        int endOrder = _startOrder + MINIMUM_INTERAVAL;

        Pair currentNode = _startOrderStack.removeLast();
        // output node data
        _writer.write(new LWIndex(currentNode.getFirst(), endOrder, _currentDepth), parser
                .getName(), currentNode.getSecond());

        _startOrder = endOrder + STARTORDER_INCREMENT;
        _currentDepth--;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy