All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.tagger.TagClusterReducer Maven / Gradle / Ivy

There is a newer version: 9.6.1
Show newest version
/*
 * This software was produced for the U. S. Government
 * under Contract No. W15P7T-11-C-F600, and is
 * subject to the Rights in Noncommercial Computer Software
 * and Noncommercial Computer Software Documentation
 * Clause 252.227-7014 (JUN 1995)
 *
 * Copyright 2013 The MITRE Corporation. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.handler.tagger;

public interface TagClusterReducer {
  /**
   * Reduces the linked-list to only those tags that should be emitted
   * @param head not null; 1-element array to head which isn't null either
   */
  void reduce(TagLL[] head);

  static final TagClusterReducer ALL = new TagClusterReducer() {
    @Override
    public void reduce(TagLL[] head) {
    }
  };

  static final TagClusterReducer NO_SUB = new TagClusterReducer() {
    @Override
    public void reduce(TagLL[] head) {
      //loop forward over all tags
      for (TagLL tag = head[0].nextTag; tag != null; tag = tag.nextTag) {
        //loop backwards over prev tags from this tag
        for (TagLL tPrev = tag.prevTag; tPrev != null; tPrev = tPrev.prevTag) {
          assert tPrev.startOffset <= tag.startOffset;
          //if a previous tag's endOffset is <= this one's, tForward can be removed
          if (tPrev.endOffset >= tag.endOffset) {
            tag.removeLL();
            break;
          } else if (tPrev.startOffset == tag.startOffset) {
            tPrev.removeLL();
            //continue; 'tag' is still valid
          }
        }
      }
    }
  };

  static final TagClusterReducer LONGEST_DOMINANT_RIGHT = new TagClusterReducer() {
    @Override
    public void reduce(TagLL[] head) {

      //--Optimize for common single-tag case
      if (head[0].nextTag == null)
        return;

      while (true) {
        //--Find longest not already marked
        TagLL longest = null;
        for (TagLL t = head[0]; t != null; t = t.nextTag) {
          if (!t.mark && (longest == null || t.charLen() >= longest.charLen()))
            longest = t;
        }
        if (longest == null)
          break;
        //--Mark longest (so we return it eventually)
        longest.mark = true;
        //--Remove tags overlapping this longest
        for (TagLL t = head[0]; t != null; t = t.nextTag) {
          if (t.mark)
            continue;

          if (t.overlaps(longest)) {
            t.removeLL();
          } else if (t.startOffset >= longest.endOffset) {
            break;//no subsequent can possibly overlap
          }
        }
      }//loop

      //all-remaining should be marked
//      for (TagLL t = head; t != null; t = t.nextTag) {
//        assert t.mark;
////        if (!t.mark) {
////          t.removeLL();
////          if (head == t)
////            head = t.nextTag;
////        }
//      }
      assert head[0].mark;
    }
  };
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy