All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.stratio.cassandra.lucene.schema.mapping.Mapper Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2014 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.cassandra.lucene.schema.mapping;

import com.google.common.base.MoreObjects;
import com.stratio.cassandra.lucene.IndexException;
import com.stratio.cassandra.lucene.column.Column;
import com.stratio.cassandra.lucene.column.Columns;
import com.stratio.cassandra.lucene.schema.analysis.StandardAnalyzers;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.math.BigInteger;
import java.net.InetAddress;
import java.util.*;
import java.util.function.Function;

import static java.util.stream.Collectors.toList;

/**
 * Class for mapping between Cassandra's columns and Lucene documents.
 *
 * @author Andres de la Pena {@literal }
 */
public abstract class Mapper {

    private static final Logger logger = LoggerFactory.getLogger(Mapper.class);

    /** A no-action analyzer for not tokenized {@link Mapper} implementations. */
    static final String KEYWORD_ANALYZER = StandardAnalyzers.KEYWORD.toString();

    static final List> TEXT_TYPES = Collections.singletonList(String.class);

    static final List> INTEGER_TYPES = Arrays.asList(
            String.class, Byte.class, Short.class, Integer.class, Long.class, BigInteger.class);

    static final List> NUMERIC_TYPES = Arrays.asList(String.class, Number.class);

    static final List> DATE_TYPES = Arrays.asList(
            String.class, Integer.class, Long.class, BigInteger.class, Date.class, UUID.class);

    static final List> NUMERIC_TYPES_WITH_DATE = Arrays.asList(String.class, Number.class, Date.class);

    static final List> PRINTABLE_TYPES = Arrays.asList(
            String.class, Number.class, UUID.class, Boolean.class, InetAddress.class);

    static final List> EMPTY_TYPE_LIST = Collections.emptyList();

    /** The store field in Lucene default option. */
    static final Store STORE = Store.NO;

    /** If the field must be validated when no specified. */
    static final boolean DEFAULT_VALIDATED = false;

    /** The name of the Lucene field. */
    public final String field;

    /** If the field produces doc values. */
    public final Boolean docValues;

    /** If the field must be validated. */
    public final Boolean validated;

    /** The name of the analyzer to be used. */
    public final String analyzer;

    /** The names of the columns to be mapped. */
    public final List mappedColumns;

    /** The names of the columns to be mapped. */
    public final List mappedCells;

    /** The supported column value data types. */
    public final List> supportedTypes;

    /** The explicitly excluded column value data types. */
    public final List> excludedTypes;

    /** If this mapper support collections. */
    public final Boolean supportsCollections;

    /**
     * Builds a new {@link Mapper} supporting the specified types for indexing.
     *
     * @param field the name of the field
     * @param docValues if the mapper supports doc values
     * @param validated if the field must be validated
     * @param analyzer the name of the analyzer to be used
     * @param mappedColumns the names of the columns to be mapped
     * @param supportedTypes the supported column value data types
     * @param excludedTypes the explicitly excluded value data types
     * @param supportsCollections if this mapper supports collections
     */
    protected Mapper(String field,
                     Boolean docValues,
                     Boolean validated,
                     String analyzer,
                     List mappedColumns,
                     List> supportedTypes,
                     List> excludedTypes,
                     Boolean supportsCollections) {
        if (StringUtils.isBlank(field)) {
            throw new IndexException("Field name is required");
        }
        this.field = field;
        this.docValues = docValues;
        this.validated = validated == null ? DEFAULT_VALIDATED : validated;
        this.analyzer = analyzer;
        this.mappedColumns = mappedColumns.stream().filter(Objects::nonNull).collect(toList()); // Remove nulls
        this.mappedCells = this.mappedColumns.stream().map(Column::parseCellName).collect(toList());
        this.supportedTypes = supportedTypes;
        this.excludedTypes= excludedTypes;
        this.supportsCollections = supportsCollections;
    }

    /**
     * Returns the Lucene {@link IndexableField}s resulting from the mapping of the specified {@link Columns}.
     *
     * @param columns the columns
     * @return a list of indexable fields
     */
    public abstract List indexableFields(Columns columns);

    /**
     * Returns the Lucene {@link IndexableField}s resulting from the mapping of the specified {@link Columns}, ignoring
     * any mapping errors.
     *
     * @param columns the columns
     * @return a list of indexable fields
     */
    public List bestEffortIndexableFields(Columns columns) {
        return bestEffort(columns, this::indexableFields);
    }

     List bestEffort(T base, Function> mapping) {
        try {
            return mapping.apply(base);
        } catch (IndexException e) {
            logger.warn("Error in Lucene index:\n\t" +
                        "while mapping : {}\n\t" +
                        "with mapper   : {}\n\t" +
                        "caused by     : {}", base, this, e.getMessage());
            return Collections.emptyList();
        }
    }

    /**
     * Validates the specified {@link Columns} if {#validated}.
     *
     * @param columns the columns to be validated
     */
    public void validate(Columns columns) {
        if (validated) {
            indexableFields(columns);
        }
    }

    /**
     * Returns the {@link SortField} resulting from the mapping of the specified object.
     *
     * @param name the name of the sorting field
     * @param reverse {@code true} the sort must be reversed, {@code false} otherwise
     * @return the sort field
     */
    public abstract SortField sortField(String name, boolean reverse);

    /**
     * Returns if this maps the specified cell.
     *
     * @param cell the cell name
     * @return {@code true} if this maps the column, {@code false} otherwise
     */
    public boolean mapsCell(String cell) {
        return mappedCells.stream().anyMatch(x -> x.equals(cell));
    }

    void validateTerm(String name, BytesRef term) {
        int maxSize = IndexWriter.MAX_TERM_LENGTH;
        int size = term.length;
        if (size > maxSize) {
            throw new IndexException("Discarding immense term in field='{}', " +
                                     "Lucene only allows terms with at most " +
                                     "{} bytes in length; got {} bytes: {}...",
                                     name, maxSize, size, term.utf8ToString().substring(0, 10));
        }
    }

    protected MoreObjects.ToStringHelper toStringHelper(Object self) {
        return MoreObjects.toStringHelper(self).add("field", field).add("validated", validated);
    }

    /** {@inheritDoc} */
    @Override
    public String toString() {
        return toStringHelper(this).toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy