All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.craftercms.search.commons.service.impl.TokenizedElementParser Maven / Gradle / Ivy

There is a newer version: 4.2.0
Show newest version
/*
 * Copyright (C) 2007-2019 Crafter Software Corporation. All Rights Reserved.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.craftercms.search.commons.service.impl;

import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.craftercms.search.commons.service.ElementParser;
import org.craftercms.search.commons.service.ElementParserService;
import org.craftercms.search.commons.utils.BooleanUtils;
import org.dom4j.Attribute;
import org.dom4j.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Implementation of {@link ElementParser} that parses elements marked with a "tokenized" attribute. This attribute
 * indicates that the field should be tokenized and analyzed by the search engine, and by definition it isn't
 * (like _s fields) so a copy of the field is created with a field name that can actually be tokenized (like those
 * ending with _t).
 * @param  the type of document for the search engine
 *
 * @author Dejan Brkic
 * @author Alfonso Vásqiuez
 */
public class TokenizedElementParser implements ElementParser {

    private static final Logger logger = LoggerFactory.getLogger(TokenizedElementParser.class);

    public static final String DEFAULT_TOKENIZED_ATTRIBUTE_NAME = "tokenized";

    protected String tokenizedAttributeName;
    protected Map fieldSuffixMappings;

    public TokenizedElementParser() {
        tokenizedAttributeName = DEFAULT_TOKENIZED_ATTRIBUTE_NAME;
        fieldSuffixMappings = new HashMap<>(2);

        fieldSuffixMappings.put("_s", "_t");
        fieldSuffixMappings.put("_smv", "_tmv");
    }

    public void setTokenizedAttributeName(String tokenizedAttributeName) {
        this.tokenizedAttributeName = tokenizedAttributeName;
    }

    public void setFieldSuffixMappings(Map fieldSuffixMappings) {
        this.fieldSuffixMappings = fieldSuffixMappings;
    }

    @Override
    public boolean parse(Element element, String fieldName, String parentFieldName, T doc,
                         ElementParserService parserService) {
        Attribute tokenizedAttribute = element.attribute(tokenizedAttributeName);
        if (tokenizedAttribute != null && BooleanUtils.toBoolean(tokenizedAttribute.getValue())) {
            logger.debug("Parsing element '{}' marked to tokenize", fieldName);

            // Remove the attribute so that at the end the element can be parsed as a normal attribute.
            element.remove(tokenizedAttribute);

            String elementName = element.getName();

            for (Map.Entry mapping : fieldSuffixMappings.entrySet()) {
                if (elementName.endsWith(mapping.getKey())) {
                    String newElementName = StringUtils.substringBefore(elementName, mapping.getKey()) +
                                            mapping.getValue();

                    Element tokenizedElement = element.createCopy(newElementName);

                    if (logger.isDebugEnabled()) {
                        logger.debug("Created new element for tokenized search: " + tokenizedElement.getName());
                    }

                    parserService.parse(tokenizedElement, parentFieldName, doc);

                    break;
                }
            }

            parserService.parse(element, parentFieldName, doc);

            return true;
        } else {
            return false;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy