All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.tagger.TagClusterReducer Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * This software was produced for the U. S. Government
 * under Contract No. W15P7T-11-C-F600, and is
 * subject to the Rights in Noncommercial Computer Software
 * and Noncommercial Computer Software Documentation
 * Clause 252.227-7014 (JUN 1995)
 *
 * Copyright 2013 The MITRE Corporation. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.handler.tagger;

public interface TagClusterReducer {
  /**
   * Reduces the linked-list to only those tags that should be emitted
   *
   * @param head not null; 1-element array to head which isn't null either
   */
  void reduce(TagLL[] head);

  static final TagClusterReducer ALL =
      new TagClusterReducer() {
        @Override
        public void reduce(TagLL[] head) {}
      };

  static final TagClusterReducer NO_SUB =
      new TagClusterReducer() {
        @Override
        public void reduce(TagLL[] head) {
          // loop forward over all tags
          for (TagLL tag = head[0].nextTag; tag != null; tag = tag.nextTag) {
            // loop backwards over prev tags from this tag
            for (TagLL tPrev = tag.prevTag; tPrev != null; tPrev = tPrev.prevTag) {
              assert tPrev.startOffset <= tag.startOffset;
              // if a previous tag's endOffset is <= this one's, tForward can be removed
              if (tPrev.endOffset >= tag.endOffset) {
                tag.removeLL();
                break;
              } else if (tPrev.startOffset == tag.startOffset) {
                tPrev.removeLL();
                // continue; 'tag' is still valid
              }
            }
          }
        }
      };

  static final TagClusterReducer LONGEST_DOMINANT_RIGHT =
      new TagClusterReducer() {
        @Override
        public void reduce(TagLL[] head) {

          // --Optimize for common single-tag case
          if (head[0].nextTag == null) return;

          while (true) {
            // --Find longest not already marked
            TagLL longest = null;
            for (TagLL t = head[0]; t != null; t = t.nextTag) {
              if (!t.mark && (longest == null || t.charLen() >= longest.charLen())) longest = t;
            }
            if (longest == null) break;
            // --Mark longest (so we return it eventually)
            longest.mark = true;
            // --Remove tags overlapping this longest
            for (TagLL t = head[0]; t != null; t = t.nextTag) {
              if (t.mark) continue;

              if (t.overlaps(longest)) {
                t.removeLL();
              } else if (t.startOffset >= longest.endOffset) {
                break; // no subsequent can possibly overlap
              }
            }
          } // loop

          // all-remaining should be marked
          //      for (TagLL t = head; t != null; t = t.nextTag) {
          //        assert t.mark;
          ////        if (!t.mark) {
          ////          t.removeLL();
          ////          if (head == t)
          ////            head = t.nextTag;
          ////        }
          //      }
          assert head[0].mark;
        }
      };
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy