org.apache.lucene.analysis.synonym.SynonymMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.analysis.synonym;
import org.apache.lucene.analysis.CharArrayMap;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.Version;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* Mapping rules for use with {@link SynonymFilter}
*/
public class SynonymMap {
/**
* @lucene.internal
*/
public CharArrayMap submap; // recursive: Map
/**
* @lucene.internal
*/
public Token[] synonyms;
int flags;
static final int INCLUDE_ORIG = 0x01;
static final int IGNORE_CASE = 0x02;
public SynonymMap() {
}
public SynonymMap(boolean ignoreCase) {
if (ignoreCase) flags |= IGNORE_CASE;
}
public boolean includeOrig() {
return (flags & INCLUDE_ORIG) != 0;
}
public boolean ignoreCase() {
return (flags & IGNORE_CASE) != 0;
}
/**
* @param singleMatch List, the sequence of strings to match
* @param replacement List the list of tokens to use on a match
* @param includeOrig sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
* @param mergeExisting merge the replacement tokens with any other mappings that exist
*/
public void add(List singleMatch, List replacement, boolean includeOrig, boolean mergeExisting) {
SynonymMap currMap = this;
for (String str : singleMatch) {
if (currMap.submap == null) {
// for now hardcode at 4.0, as its what the old code did.
// would be nice to fix, but shouldn't store a version in each submap!!!
currMap.submap = new CharArrayMap(Version.LUCENE_31, 1, ignoreCase());
}
SynonymMap map = currMap.submap.get(str);
if (map == null) {
map = new SynonymMap();
map.flags |= flags & IGNORE_CASE;
currMap.submap.put(str, map);
}
currMap = map;
}
if (currMap.synonyms != null && !mergeExisting) {
throw new RuntimeException("SynonymFilter: there is already a mapping for " + singleMatch);
}
List superset = currMap.synonyms == null ? replacement :
mergeTokens(Arrays.asList(currMap.synonyms), replacement);
currMap.synonyms = superset.toArray(new Token[superset.size()]);
if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("<");
if (synonyms != null) {
sb.append("[");
for (int i = 0; i < synonyms.length; i++) {
if (i != 0) sb.append(',');
sb.append(synonyms[i]);
}
if ((flags & INCLUDE_ORIG) != 0) {
sb.append(",ORIG");
}
sb.append("],");
}
sb.append(submap);
sb.append(">");
return sb.toString();
}
/**
* Produces a List from a List
*/
public static List makeTokens(List strings) {
List ret = new ArrayList(strings.size());
for (String str : strings) {
//Token newTok = new Token(str,0,0,"SYNONYM");
Token newTok = new Token(str, 0, 0, "SYNONYM");
ret.add(newTok);
}
return ret;
}
/**
* Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
* the tokens end up at the same position.
*
* Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position)
* Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
*/
public static List mergeTokens(List lst1, List lst2) {
ArrayList result = new ArrayList();
if (lst1 == null || lst2 == null) {
if (lst2 != null) result.addAll(lst2);
if (lst1 != null) result.addAll(lst1);
return result;
}
int pos = 0;
Iterator iter1 = lst1.iterator();
Iterator iter2 = lst2.iterator();
Token tok1 = iter1.hasNext() ? iter1.next() : null;
Token tok2 = iter2.hasNext() ? iter2.next() : null;
int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
while (tok1 != null || tok2 != null) {
while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
tok.copyBuffer(tok1.buffer(), 0, tok1.length());
tok.setPositionIncrement(pos1 - pos);
result.add(tok);
pos = pos1;
tok1 = iter1.hasNext() ? iter1.next() : null;
pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
}
while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
tok.copyBuffer(tok2.buffer(), 0, tok2.length());
tok.setPositionIncrement(pos2 - pos);
result.add(tok);
pos = pos2;
tok2 = iter2.hasNext() ? iter2.next() : null;
pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
}
}
return result;
}
}