org.apache.solr.handler.tagger.TagClusterReducer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* This software was produced for the U. S. Government
* under Contract No. W15P7T-11-C-F600, and is
* subject to the Rights in Noncommercial Computer Software
* and Noncommercial Computer Software Documentation
* Clause 252.227-7014 (JUN 1995)
*
* Copyright 2013 The MITRE Corporation. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.tagger;
public interface TagClusterReducer {
/**
* Reduces the linked-list to only those tags that should be emitted
*
* @param head not null; 1-element array to head which isn't null either
*/
void reduce(TagLL[] head);
static final TagClusterReducer ALL =
new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {}
};
static final TagClusterReducer NO_SUB =
new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {
// loop forward over all tags
for (TagLL tag = head[0].nextTag; tag != null; tag = tag.nextTag) {
// loop backwards over prev tags from this tag
for (TagLL tPrev = tag.prevTag; tPrev != null; tPrev = tPrev.prevTag) {
assert tPrev.startOffset <= tag.startOffset;
// if a previous tag's endOffset is <= this one's, tForward can be removed
if (tPrev.endOffset >= tag.endOffset) {
tag.removeLL();
break;
} else if (tPrev.startOffset == tag.startOffset) {
tPrev.removeLL();
// continue; 'tag' is still valid
}
}
}
}
};
static final TagClusterReducer LONGEST_DOMINANT_RIGHT =
new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {
// --Optimize for common single-tag case
if (head[0].nextTag == null) return;
while (true) {
// --Find longest not already marked
TagLL longest = null;
for (TagLL t = head[0]; t != null; t = t.nextTag) {
if (!t.mark && (longest == null || t.charLen() >= longest.charLen())) longest = t;
}
if (longest == null) break;
// --Mark longest (so we return it eventually)
longest.mark = true;
// --Remove tags overlapping this longest
for (TagLL t = head[0]; t != null; t = t.nextTag) {
if (t.mark) continue;
if (t.overlaps(longest)) {
t.removeLL();
} else if (t.startOffset >= longest.endOffset) {
break; // no subsequent can possibly overlap
}
}
} // loop
// all-remaining should be marked
// for (TagLL t = head; t != null; t = t.nextTag) {
// assert t.mark;
//// if (!t.mark) {
//// t.removeLL();
//// if (head == t)
//// head = t.nextTag;
//// }
// }
assert head[0].mark;
}
};
}