All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.index.mapper.TokenCountFieldMapper Maven / Gradle / Ivy

/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.index.mapper;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import static org.opensearch.common.xcontent.support.XContentMapValues.nodeIntegerValue;

/**
 * A {@link FieldMapper} that takes a string and writes a count of the tokens in that string
 * to the index.  In most ways the mapper acts just like an {@link NumberFieldMapper}.
 */
public class TokenCountFieldMapper extends ParametrizedFieldMapper {
    public static final String CONTENT_TYPE = "token_count";

    private static TokenCountFieldMapper toType(FieldMapper in) {
        return (TokenCountFieldMapper) in;
    }

    public static class Builder extends ParametrizedFieldMapper.Builder {

        private final Parameter index = Parameter.indexParam(m -> toType(m).index, true);
        private final Parameter hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true);
        private final Parameter store = Parameter.storeParam(m -> toType(m).store, false);

        private final Parameter analyzer = Parameter.analyzerParam("analyzer", true, m -> toType(m).analyzer, () -> null);
        private final Parameter nullValue = new Parameter<>(
            "null_value",
            false,
            () -> null,
            (n, c, o) -> o == null ? null : nodeIntegerValue(o),
            m -> toType(m).nullValue
        ).acceptsNull();
        private final Parameter enablePositionIncrements = Parameter.boolParam(
            "enable_position_increments",
            false,
            m -> toType(m).enablePositionIncrements,
            true
        );

        private final Parameter> meta = Parameter.metaParam();

        public Builder(String name) {
            super(name);
        }

        @Override
        protected List> getParameters() {
            return Arrays.asList(index, hasDocValues, store, analyzer, nullValue, enablePositionIncrements, meta);
        }

        @Override
        public TokenCountFieldMapper build(BuilderContext context) {
            if (analyzer.getValue() == null) {
                throw new MapperParsingException("Analyzer must be set for field [" + name + "] but wasn't.");
            }
            MappedFieldType ft = new TokenCountFieldType(
                buildFullName(context),
                index.getValue(),
                store.getValue(),
                hasDocValues.getValue(),
                nullValue.getValue(),
                meta.getValue()
            );
            return new TokenCountFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), this);
        }
    }

    static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {

        TokenCountFieldType(
            String name,
            boolean isSearchable,
            boolean isStored,
            boolean hasDocValues,
            Number nullValue,
            Map meta
        ) {
            super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, false, nullValue, meta);
        }

        @Override
        public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
            if (hasDocValues() == false) {
                return lookup -> List.of();
            }
            return new DocValueFetcher(docValueFormat(format, null), searchLookup.doc().getForField(this));
        }
    }

    public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n));

    private final boolean index;
    private final boolean hasDocValues;
    private final boolean store;
    private final NamedAnalyzer analyzer;
    private final boolean enablePositionIncrements;
    private final Integer nullValue;

    protected TokenCountFieldMapper(
        String simpleName,
        MappedFieldType defaultFieldType,
        MultiFields multiFields,
        CopyTo copyTo,
        Builder builder
    ) {
        super(simpleName, defaultFieldType, multiFields, copyTo);
        this.analyzer = builder.analyzer.getValue();
        this.enablePositionIncrements = builder.enablePositionIncrements.getValue();
        this.nullValue = builder.nullValue.getValue();
        this.index = builder.index.getValue();
        this.hasDocValues = builder.hasDocValues.getValue();
        this.store = builder.store.getValue();
    }

    @Override
    protected void parseCreateField(ParseContext context) throws IOException {
        final String value;
        if (context.externalValueSet()) {
            value = context.externalValue().toString();
        } else {
            value = context.parser().textOrNull();
        }

        if (value == null && nullValue == null) {
            return;
        }

        final int tokenCount;
        if (value == null) {
            tokenCount = nullValue;
        } else {
            tokenCount = countPositions(analyzer, name(), value, enablePositionIncrements);
        }

        context.doc().addAll(NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, store));
    }

    /**
     * Count position increments in a token stream.  Package private for testing.
     * @param analyzer analyzer to create token stream
     * @param fieldName field name to pass to analyzer
     * @param fieldValue field value to pass to analyzer
     * @param enablePositionIncrements should we count position increments ?
     * @return number of position increments in a token stream
     * @throws IOException if tokenStream throws it
     */
    static int countPositions(Analyzer analyzer, String fieldName, String fieldValue, boolean enablePositionIncrements) throws IOException {
        try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) {
            int count = 0;
            PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                if (enablePositionIncrements) {
                    count += position.getPositionIncrement();
                } else {
                    count += Math.min(1, position.getPositionIncrement());
                }
            }
            tokenStream.end();
            if (enablePositionIncrements) {
                count += position.getPositionIncrement();
            }
            return count;
        }
    }

    /**
     * Name of analyzer.
     * @return name of analyzer
     */
    public String analyzer() {
        return analyzer.name();
    }

    /**
     * Indicates if position increments are counted.
     * @return true if position increments are counted
     */
    public boolean enablePositionIncrements() {
        return enablePositionIncrements;
    }

    @Override
    protected String contentType() {
        return CONTENT_TYPE;
    }

    @Override
    public ParametrizedFieldMapper.Builder getMergeBuilder() {
        return new Builder(simpleName()).init(this);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy