All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.tagger.TagLL Maven / Gradle / Ivy

/*
 * This software was produced for the U. S. Government
 * under Contract No. W15P7T-11-C-F600, and is
 * subject to the Rights in Noncommercial Computer Software
 * and Noncommercial Computer Software Documentation
 * Clause 252.227-7014 (JUN 1995)
 *
 * Copyright 2013 The MITRE Corporation. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.handler.tagger;

import java.io.IOException;

import org.apache.lucene.util.BytesRef;

/**
 * This is a Tag -- a startOffset, endOffset and value.
 * 

* A Tag starts without a value in an * "advancing" state. {@link #advance(org.apache.lucene.util.BytesRef, int)} * is called with subsequent words and then eventually it won't advance any * more, and value is set (could be null). *

* A Tag is also a doubly-linked-list (hence the LL in the name). All tags share * a reference to the head via a 1-element array, which is potentially modified * if any of the linked-list methods are called. Tags in the list should have * equal or increasing start offsets. */ public class TagLL{ private final TagLL[] head;//a shared pointer to the head; 1 element TagLL prevTag, nextTag; // linked list private TermPrefixCursor cursor; final int startOffset;//inclusive int endOffset;//exclusive Object value;//null means unset /** optional boolean used by some TagClusterReducer's */ boolean mark = false; TagLL(TagLL[] head, TermPrefixCursor cursor, int startOffset, int endOffset, Object value) { this.head = head; this.cursor = cursor; this.startOffset = startOffset; this.endOffset = endOffset; this.value = value; } /** * Advances this tag with "word" at offset "offset". If this tag is not in * an advancing state then it does nothing. If it is advancing and prior to * advancing further it sees a value, then a non-advancing tag may be inserted * into the LL as side-effect. If this returns false (it didn't advance) and * if there is no value, then it will also be removed. * * * @param word The next word or null if at an end * @param offset The last character in word's offset in the underlying * stream. If word is null then it's meaningless. * * @return Whether it advanced or not. */ boolean advance(BytesRef word, int offset) throws IOException { if (!isAdvancing()) return false; Object iVal = cursor.getDocIds(); if (word != null && cursor.advance(word)) { if (iVal != null) { addBeforeLL(new TagLL(head, null, startOffset, endOffset, iVal)); } assert offset >= endOffset; endOffset = offset; return true; } else { this.value = iVal; this.cursor = null; if (iVal == null) removeLL(); return false; } } /** Removes this tag from the chain, connecting prevTag and nextTag. Does not * modify "this" object's pointers, so the caller can refer to nextTag after * removing it. */ public void removeLL() { if (head[0] == this) head[0] = nextTag; if (prevTag != null) { prevTag.nextTag = nextTag; } if (nextTag != null) { nextTag.prevTag = prevTag; } } void addBeforeLL(TagLL tag) { assert tag.startOffset <= startOffset; if (prevTag != null) { assert prevTag.startOffset <= tag.startOffset; prevTag.nextTag = tag; tag.prevTag = prevTag; } else { assert head[0] == this; head[0] = tag; } prevTag = tag; tag.nextTag = this; } void addAfterLL(TagLL tag) { assert tag.startOffset >= startOffset; if (nextTag != null) { assert nextTag.startOffset >= tag.startOffset; nextTag.prevTag = tag; tag.nextTag = nextTag; } nextTag = tag; tag.prevTag = this; } public int charLen() { return endOffset - startOffset; } public TagLL getNextTag() { return nextTag; } public TagLL getPrevTag() { return prevTag; } public int getStartOffset() { return startOffset; } public int getEndOffset() { return endOffset; } public boolean overlaps(TagLL other) { //don't use >= or <= because startOffset is inclusive while endOffset is exclusive if (startOffset < other.startOffset) return endOffset > other.startOffset; else return startOffset < other.endOffset; } boolean isAdvancing() { return cursor != null; } @Override public String toString() { return (prevTag != null ? '*' : '-') + "|" + (nextTag != null ? '*' : '-') + " " + startOffset + " to " + endOffset + (isAdvancing() ? '+' : " #" + value); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy