All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.search.fields.CmsSearchFieldConfiguration Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * File   : $Source$
 * Date   : $Date$
 * Version: $Revision$
 *
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search.fields;

import org.opencms.file.CmsObject;
import org.opencms.file.CmsProperty;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsResource;
import org.opencms.file.types.I_CmsResourceType;
import org.opencms.loader.CmsLoaderException;
import org.opencms.main.CmsException;
import org.opencms.main.OpenCms;
import org.opencms.relations.CmsCategoryService;
import org.opencms.search.CmsSearchIndex;
import org.opencms.search.I_CmsSearchDocument;
import org.opencms.search.extractors.I_CmsExtractionResult;
import org.opencms.util.CmsStringUtil;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.lucene.uninverting.UninvertingReader.Type;

/**
 * Abstract implementation for OpenCms search field configurations.

* * @since 8.5.0 */ public class CmsSearchFieldConfiguration implements Comparable { /** A list of fields that should be lazy-loaded. */ public static final List LAZY_FIELDS = new ArrayList(); /** The name for the standard field configuration. */ public static final String STR_STANDARD = "standard"; static { LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT); LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT_BLOB); } /** The description of the configuration. */ private String m_description; /** Map to lookup the configured {@link CmsSearchField} instances by name. */ private Map m_fieldLookup; /** The list of configured {@link CmsSearchField} names. */ private List m_fieldNames; /** The list of configured {@link CmsSearchField} instances. */ private List m_fields; /** The current index. */ private CmsSearchIndex m_index; /** The name of the configuration. */ private String m_name; /** * Creates a new, empty field configuration.

*/ public CmsSearchFieldConfiguration() { m_fields = new ArrayList(); } /** * Returns the locale extended name for the given lookup String.

* * @param lookup the lookup String * @param locale the locale * * @return the locale extended name for the given lookup String */ public static final String getLocaleExtendedName(String lookup, Locale locale) { if (locale == null) { return lookup; } return getLocaleExtendedName(lookup, locale.toString()); } /** * Returns the locale extended name for the given lookup String.

* * @param lookup the lookup String * @param locale the locale * * @return the locale extended name for the given lookup String */ public static final String getLocaleExtendedName(String lookup, String locale) { StringBuffer result = new StringBuffer(32); result.append(lookup); result.append('_'); result.append(locale); return result.toString(); } /** * Creates a space separated list of all parent folders of the given root path.

* * @param rootPath the root path to get the parent folder list for * * @return a space separated list of all parent folders of the given root path */ public static String getParentFolderTokens(String rootPath) { if (CmsStringUtil.isEmpty(rootPath)) { return "/"; } StringBuffer result = new StringBuffer(128); String folderName = CmsResource.getFolderPath(rootPath); for (int i = 0; i < folderName.length(); i++) { char c = folderName.charAt(i); if (c == '/') { if (result.length() > 0) { result.append(' '); } result.append(folderName.substring(0, i + 1)); } } return result.toString(); } /** * Adds a field to this search field configuration.

* * @param field the field to add */ public void addField(CmsSearchField field) { if (field != null) { m_fields.add(field); } } /** * Adds fields.

* * @param fields the fields to add */ public void addFields(Collection fields) { for (CmsSearchField field : fields) { if (!getFieldNames().contains(field.getName())) { addField(field); } } } /** To allow sorting on a field the field must be added to the map given to {@link org.apache.lucene.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}. * The method adds the configured fields. * @param uninvertingMap the map to which the fields are added. */ public void addUninvertingMappings(Map uninvertingMap) { for (String fieldName : getFieldNames()) { uninvertingMap.put(fieldName, Type.SORTED); } } /** * @see java.lang.Comparable#compareTo(java.lang.Object) */ public int compareTo(CmsSearchFieldConfiguration obj) { return m_name.compareTo(obj.getName()); } /** * Creates the Lucene Document with this field configuration for the provided VFS resource, search index and content.

* * This triggers the indexing process for the given VFS resource according to the configuration * of the provided index.

* * The provided index resource contains the basic contents to index. * The provided search index contains the configuration what to index, such as the locale and * possible special field mappings.

* * @param cms the OpenCms user context used to access the OpenCms VFS * @param resource the resource to create the Lucene document from * @param index the search index to create the Document for * @param extraction the plain text content extracted from the document * * @return the Search Document for the given VFS resource and the given search index * * @throws CmsException if something goes wrong */ public I_CmsSearchDocument createDocument( CmsObject cms, CmsResource resource, CmsSearchIndex index, I_CmsExtractionResult extraction) throws CmsException { m_index = index; I_CmsSearchDocument document = m_index.createEmptyDocument(resource); List propertiesSearched = cms.readPropertyObjects(resource, true); List properties = cms.readPropertyObjects(resource, false); document = appendContentBlob(document, cms, resource, extraction, properties, propertiesSearched); document = appendPath(document, cms, resource, extraction, properties, propertiesSearched); document = appendType(document, cms, resource, extraction, properties, propertiesSearched); document = appendFileSize(document, cms, resource, extraction, properties, propertiesSearched); document = appendDates(document, cms, resource, extraction, properties, propertiesSearched); document = appendLocales(document, cms, resource, extraction, properties, propertiesSearched); document = appendProperties(document, cms, resource, extraction, properties, propertiesSearched); document = appendCategories(document, cms, resource, extraction, properties, propertiesSearched); document = appendFieldMappings(document, cms, resource, extraction, properties, propertiesSearched); document = appendAdditionalValuesToDcoument( document, cms, resource, extraction, properties, propertiesSearched); document = setBoost(document, cms, resource, extraction, properties, propertiesSearched); return document; } /** * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object obj) { if (obj == this) { return true; } if ((obj instanceof CmsSearchFieldConfiguration)) { return ((CmsSearchFieldConfiguration)obj).getName().equals(m_name); } return false; } /** * Returns the description of this field configuration.

* * @return the description of this field configuration */ public String getDescription() { return m_description; } /** * Returns the configured {@link CmsSearchField} instance with the given name.

* * @param name the search field name to look up * * @return the configured {@link CmsSearchField} instance with the given name */ public CmsSearchField getField(String name) { if (m_fieldLookup == null) { // lazy initialize the field names m_fieldLookup = new HashMap(); for (CmsSearchField field : m_fields) { m_fieldLookup.put(field.getName(), field); } } return m_fieldLookup.get(name); } /** * Returns the list of configured field names (Strings).

* * @return the list of configured field names (Strings) */ public List getFieldNames() { if (m_fieldNames == null) { // lazy initialize the field names m_fieldNames = new ArrayList(); for (CmsSearchField field : m_fields) { m_fieldNames.add(field.getName()); } } // create a copy of the list to prevent changes in other classes return new ArrayList(m_fieldNames); } /** * Returns the list of configured {@link CmsSearchField} instances.

* * @return the list of configured {@link CmsSearchField} instances */ public List getFields() { return m_fields; } /** * Returns the index.

* * @return the index */ public CmsSearchIndex getIndex() { return m_index; } /** * Returns the name of this field configuration.

* * @return the name of this field configuration */ public String getName() { return m_name; } /** * @see java.lang.Object#hashCode() */ @Override public int hashCode() { return m_name.hashCode(); } /** * Initializes this field configuration.

*/ public void init() { // nothing to do here } /** * Sets the description of this field configuration.

* * @param description the description to set */ public void setDescription(String description) { m_description = description; } /** * Sets the index.

* * @param index the index to set */ public void setIndex(CmsSearchIndex index) { m_index = index; } /** * Sets the name of this field configuration.

* * @param name the name to set */ public void setName(String name) { m_name = name; } /** * Overriding this method allows to append some 'extra' values/fields to a document * without overriding the {@link #createDocument} method itself.

* * The method {@link #createDocument} reads all properties of the current resource which is * an expensive operation. In order to avoid reading those properties twice, this method has been introduced.

* * Compared with all the other appender methods the name of this method is generic.

* * In this default implementation the document is returned unchanged.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extraction the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by resource category information */ protected I_CmsSearchDocument appendAdditionalValuesToDcoument( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction, List properties, List propertiesSearched) { return document; } /** * Extends the given document by resource category information based on properties.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by resource category information * * @throws CmsException if something goes wrong */ protected I_CmsSearchDocument appendCategories( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) throws CmsException { CmsCategoryService categoryService = CmsCategoryService.getInstance(); document.addCategoryField(categoryService.readResourceCategories(cms, resource)); return document; } /** * Extends the given document by a field that contains the extracted content blob.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by a field that contains the extracted content blob */ protected I_CmsSearchDocument appendContentBlob( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { if (extractionResult != null) { byte[] data = extractionResult.getBytes(); if (data != null) { document.addContentField(data); } } return document; } /** * Extends the given document by fields for date of creation, content and last modification.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by fields for date of creation, content and last modification */ protected I_CmsSearchDocument appendDates( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), true); document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), true); document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false); return document; } /** * Extends the given document by the mappings for the given field.

* * @param document the document to extend * @param field the field to create the mappings for * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by the mappings for the given field */ protected I_CmsSearchDocument appendFieldMapping( I_CmsSearchDocument document, CmsSearchField field, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { StringBuffer text = new StringBuffer(); for (I_CmsSearchFieldMapping mapping : field.getMappings()) { String mapResult = mapping.getStringValue(cms, resource, extractionResult, properties, propertiesSearched); if (mapResult != null) { if (text.length() > 0) { text.append('\n'); } text.append(mapResult); } } if (text.length() > 0) { document.addSearchField(field, text.toString()); } return document; } /** * Extends the given document by the configured field mappings.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by the configured field mappings */ protected I_CmsSearchDocument appendFieldMappings( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { for (CmsSearchField field : getFields()) { document = appendFieldMapping( document, field, cms, resource, extractionResult, properties, propertiesSearched); } return document; } /** * Extends the given document by the "size" field.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by the resource locales */ protected I_CmsSearchDocument appendFileSize( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { document.addFileSizeField(resource.getLength()); return document; } /** * Extends the given document by the "res_locales" field.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extraction the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by the resource locales */ protected I_CmsSearchDocument appendLocales( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction, List properties, List propertiesSearched) { return document; } /** * Extends the given document by fields for VFS path lookup.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by fields for VFS path lookup */ protected I_CmsSearchDocument appendPath( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { document.addPathField(resource.getRootPath()); document.addRootPathField(resource.getRootPath()); return document; } /** * Appends all direct properties, that are not empty or white space only to the document.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extraction the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by resource category information */ protected I_CmsSearchDocument appendProperties( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction, List properties, List propertiesSearched) { return document; } /** * Extends the given document by a field that contains the resource type name.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by a field that contains the resource type name * * @throws CmsLoaderException in case of errors identifying the resource type name */ protected I_CmsSearchDocument appendType( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) throws CmsLoaderException { // add the resource type to the document I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); String typeName = "VFS"; if (type != null) { typeName = type.getTypeName(); } document.addTypeField(typeName); // add the file name suffix to the document String resName = CmsResource.getName(resource.getRootPath()); int index = resName.lastIndexOf('.'); if ((index != -1) && (resName.length() > index)) { document.addSuffixField(resName.substring(index + 1)); } return document; } /** * Extends the given document with a boost factor.

* * @param document the document to extend * @param cms the OpenCms context used for building the search index * @param resource the resource that is indexed * @param extractionResult the plain text extraction result from the resource * @param properties the list of all properties directly attached to the resource (not searched) * @param propertiesSearched the list of all searched properties of the resource * * @return the document extended by a boost factor */ protected I_CmsSearchDocument setBoost( I_CmsSearchDocument document, CmsObject cms, CmsResource resource, I_CmsExtractionResult extractionResult, List properties, List propertiesSearched) { String value; // set individual document boost factor for the search float boost = CmsSearchField.BOOST_DEFAULT; // note that the priority property IS searched, so you can easily flag whole folders as "high" or "low" value = CmsProperty.get(CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY, propertiesSearched).getValue(); if (value != null) { value = value.trim().toLowerCase(); if (value.equals(I_CmsSearchDocument.SEARCH_PRIORITY_MAX_VALUE)) { boost = 2.0f; } else if (value.equals(I_CmsSearchDocument.SEARCH_PRIORITY_HIGH_VALUE)) { boost = 1.5f; } else if (value.equals(I_CmsSearchDocument.SEARCH_PRIORITY_LOW_VALUE)) { boost = 0.5f; } } if (boost != CmsSearchField.BOOST_DEFAULT) { // set individual document boost factor if required document.setBoost(boost); } return document; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy