org.opencms.search.solr.CmsSolrDocumentXmlContent Maven / Gradle / Ivy
Show all versions of opencms-test Show documentation
/*
* File : $Source$
* Date : $Date$
* Version: $Revision$
*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search.solr;
import org.opencms.file.CmsFile;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsResource;
import org.opencms.file.types.CmsResourceTypeXmlContent;
import org.opencms.file.types.I_CmsResourceType;
import org.opencms.i18n.CmsLocaleManager;
import org.opencms.jsp.CmsJspTagContainer;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.search.CmsIndexException;
import org.opencms.search.CmsSearchIndex;
import org.opencms.search.documents.A_CmsVfsDocument;
import org.opencms.search.documents.CmsIndexNoContentException;
import org.opencms.search.documents.Messages;
import org.opencms.search.extractors.CmsExtractionResult;
import org.opencms.search.extractors.I_CmsExtractionResult;
import org.opencms.search.fields.CmsSearchField;
import org.opencms.search.fields.CmsSearchFieldConfiguration;
import org.opencms.search.galleries.CmsGalleryNameMacroResolver;
import org.opencms.util.CmsStringUtil;
import org.opencms.xml.A_CmsXmlDocument;
import org.opencms.xml.CmsXmlContentDefinition;
import org.opencms.xml.CmsXmlUtils;
import org.opencms.xml.content.CmsXmlContent;
import org.opencms.xml.content.CmsXmlContentFactory;
import org.opencms.xml.content.I_CmsXmlContentHandler;
import org.opencms.xml.types.CmsXmlNestedContentDefinition;
import org.opencms.xml.types.I_CmsXmlContentValue;
import org.opencms.xml.types.I_CmsXmlSchemaType;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import com.google.common.collect.Sets;
/**
* Special document text extraction factory for Solr index.
*
* @since 8.5.0
*/
public class CmsSolrDocumentXmlContent extends A_CmsVfsDocument {
/**
* The gallery name is determined by resolving the macros in a string which can either come from a field mapped
* to the gallery name, or the title, or from default values for those fields. This class is used to select the
* value to use and performs the macro substitution.
*/
private static class GalleryNameChooser {
/** CMS context for this instance. */
private CmsObject m_cms;
/** Current XML content. */
private A_CmsXmlDocument m_content;
/** Default value of field mapped to gallery name. */
private String m_defaultGalleryNameValue;
/** Default value of field mapped to title. */
private String m_defaultTitleValue;
/** Current locale. */
private Locale m_locale;
/** Content value mapped to gallery name. */
private String m_mappedGalleryNameValue;
/** Content value mapped to title. */
private String m_mappedTitleValue;
/**
* Creates a new instance.
*
* @param cms the CMS context
* @param content the XML content
* @param locale the locale in the XML content
*/
public GalleryNameChooser(CmsObject cms, A_CmsXmlDocument content, Locale locale) {
m_cms = cms;
m_content = content;
m_locale = locale;
}
/**
* Selects the gallery name.
*
* This method assumes that all the available values have been set via the setters of this class.
*
* @return the gallery name
*
* @throws CmsException of something goes wrong
*/
public String getGalleryName() throws CmsException {
String result = null;
for (String resultCandidateWithMacros : new String[] {
// Prioritize gallery name over title, and actual content values over defaults
m_mappedGalleryNameValue, m_defaultGalleryNameValue, m_mappedTitleValue, m_defaultTitleValue}) {
if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) {
CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, m_locale);
result = resolver.resolveMacros(resultCandidateWithMacros);
return result;
}
}
result = m_cms.readPropertyObject(
m_content.getFile(),
CmsPropertyDefinition.PROPERTY_TITLE,
false).getValue();
return result;
}
/**
* Sets the defaultGalleryNameValue.
*
* @param defaultGalleryNameValue the defaultGalleryNameValue to set
*/
public void setDefaultGalleryNameValue(String defaultGalleryNameValue) {
m_defaultGalleryNameValue = defaultGalleryNameValue;
}
/**
* Sets the defaultTitleValue.
*
* @param defaultTitleValue the defaultTitleValue to set
*/
public void setDefaultTitleValue(String defaultTitleValue) {
m_defaultTitleValue = defaultTitleValue;
}
/**
* Sets the mappedGalleryNameValue.
*
* @param mappedGalleryNameValue the mappedGalleryNameValue to set
*/
public void setMappedGalleryNameValue(String mappedGalleryNameValue) {
m_mappedGalleryNameValue = mappedGalleryNameValue;
}
/**
* Sets the mappedTitleValue.
*
* @param mappedTitleValue the mappedTitleValue to set
*/
public void setMappedTitleValue(String mappedTitleValue) {
m_mappedTitleValue = mappedTitleValue;
}
}
/** Mapping name used to indicate that the value should be used for the gallery name. */
public static final String MAPPING_GALLERY_NAME = "galleryName";
/** The solr document type name for xml-contents. */
public static final String TYPE_XMLCONTENT_SOLR = "xmlcontent-solr";
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsSolrDocumentXmlContent.class);
/**
* Public constructor.
*
* @param name the name for the document type
*/
public CmsSolrDocumentXmlContent(String name) {
super(name);
}
/**
* Collects a list of all possible XPaths for a content definition.
*
* @param cms the CMS context to use
* @param def the content definition
* @param path the path of the given content definition
* @param result the set used to collect the XPaths
*/
public static void collectSchemaXpathsForSimpleValues(
CmsObject cms,
CmsXmlContentDefinition def,
String path,
Set result) {
List nestedTypes = def.getTypeSequence();
for (I_CmsXmlSchemaType nestedType : nestedTypes) {
String subPath = path + "/" + nestedType.getName();
if (nestedType instanceof CmsXmlNestedContentDefinition) {
CmsXmlContentDefinition nestedDef = ((CmsXmlNestedContentDefinition)nestedType).getNestedContentDefinition();
collectSchemaXpathsForSimpleValues(cms, nestedDef, subPath, result);
} else {
result.add(subPath);
}
}
}
/**
* Extracts the content of a single XML content resource.
*
* @param cms the cms context
* @param resource the resource
* @param index the used index
*
* @return the extraction result
*
* @throws CmsException in case reading or unmarshalling the content fails
*/
public static CmsExtractionResult extractXmlContent(CmsObject cms, CmsResource resource, CmsSearchIndex index)
throws CmsException {
return extractXmlContent(cms, resource, index, null);
}
/**
* Extracts the content of a single XML content resource.
*
* @param cms the cms context
* @param resource the resource
* @param index the used index
* @param forceLocale if set, only the content values for the given locale will be extracted
*
* @return the extraction result
*
* @throws CmsException in case reading or unmarshalling the content fails
*/
public static CmsExtractionResult extractXmlContent(
CmsObject cms,
CmsResource resource,
CmsSearchIndex index,
Locale forceLocale)
throws CmsException {
// un-marshal the content
CmsFile file = cms.readFile(resource);
if (file.getLength() <= 0) {
throw new CmsIndexNoContentException(
Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath()));
}
A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file);
// initialize some variables
Map> items = new HashMap>();
Map fieldMappings = new HashMap();
List contentLocales = forceLocale != null
? Collections.singletonList(forceLocale)
: xmlContent.getLocales();
Locale resourceLocale = index.getLocaleForResource(cms, resource, contentLocales);
LinkedHashMap localeItems = null;
// loop over the locales of the content
for (Locale locale : contentLocales) {
GalleryNameChooser galleryNameChooser = new GalleryNameChooser(cms, xmlContent, locale);
localeItems = new LinkedHashMap();
StringBuffer textContent = new StringBuffer();
// store the locales of the content as space separated field
// loop over the available element paths of the current content locale
List paths = xmlContent.getNames(locale);
for (String xpath : paths) {
// try to get the value extraction for the current element path
String extracted = null;
I_CmsXmlContentValue value = xmlContent.getValue(xpath, locale);
try {
extracted = value.getPlainText(cms);
if (CmsStringUtil.isEmptyOrWhitespaceOnly(extracted) && value.isSimpleType()) {
// no text value for simple type, so take the string value as item
extracted = value.getStringValue(cms);
}
} catch (Exception e) {
// it can happen that a exception is thrown while extracting a single value
LOG.warn(Messages.get().container(Messages.LOG_EXTRACT_VALUE_2, xpath, resource), e);
}
// put the extraction to the items and to the textual content
if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
localeItems.put(xpath, extracted);
}
if (value.getContentDefinition().getContentHandler().isSearchable(value)
&& CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
// value is search-able and the extraction is not empty, so added to the textual content
textContent.append(extracted);
textContent.append('\n');
}
List mappings = xmlContent.getHandler().getMappings(value.getPath());
if (mappings.size() > 0) {
// mappings are defined, lets check if we have mappings that interest us
for (String mapping : mappings) {
if (mapping.startsWith(I_CmsXmlContentHandler.MAPTO_PROPERTY)) {
// this is a property mapping
String propertyName = mapping.substring(mapping.lastIndexOf(':') + 1);
if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)
|| CmsPropertyDefinition.PROPERTY_DESCRIPTION.equals(propertyName)) {
if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
// search index field names and property names are different ["Title" vs. "title"]
String fieldName = null;
if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)) {
galleryNameChooser.setMappedTitleValue(extracted);
} else {
// if field is not title, it must be description
fieldName = CmsSearchField.FIELD_DESCRIPTION;
fieldMappings.put(
CmsSearchFieldConfiguration.getLocaleExtendedName(fieldName, locale) + "_s",
extracted);
}
}
}
} else if (mapping.equals(MAPPING_GALLERY_NAME)) {
galleryNameChooser.setMappedGalleryNameValue(value.getPlainText(cms));
}
}
}
}
Set xpaths = Sets.newHashSet();
collectSchemaXpathsForSimpleValues(cms, xmlContent.getContentDefinition(), "", xpaths);
for (String xpath : xpaths) {
// mappings always are stored with indexes, so we add them to the xpath
List mappings = xmlContent.getHandler().getMappings(CmsXmlUtils.createXpath(xpath, 1));
for (String mapping : mappings) {
if (mapping.equals(MAPPING_GALLERY_NAME)
|| mapping.equals(
I_CmsXmlContentHandler.MAPTO_PROPERTY + CmsPropertyDefinition.PROPERTY_TITLE)) {
String defaultValue = xmlContent.getHandler().getDefault(
cms,
xmlContent.getFile(),
null,
xpath,
locale);
if (mapping.equals(MAPPING_GALLERY_NAME)) {
galleryNameChooser.setDefaultGalleryNameValue(defaultValue);
} else {
galleryNameChooser.setDefaultTitleValue(defaultValue);
}
}
}
}
final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName(
CmsSearchField.FIELD_TITLE_UNSTORED,
locale) + "_s";
final String galleryNameValue = galleryNameChooser.getGalleryName();
fieldMappings.put(galleryTitleFieldKey, galleryNameValue);
// handle the textual content
if (textContent.length() > 0) {
// add the textual content with a localized key to the items
//String key = CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale);
//items.put(key, textContent.toString());
// use the default locale of this resource as general text content for the extraction result
localeItems.put(I_CmsExtractionResult.ITEM_CONTENT, textContent.toString());
}
items.put(locale, localeItems);
}
// if the content is locale independent, it should have only one content locale, but that should be indexed for all available locales.
// TODO: One could think of different indexing behavior, i.e., index only for getDefaultLocales(cms,resource)
// But using getAvailableLocales(cms,resource) does not work, because locale-available is set to "en" for all that content.
if ((xmlContent instanceof CmsXmlContent) && ((CmsXmlContent)xmlContent).isLocaleIndependent()) {
if (forceLocale != null) {
items.put(forceLocale, localeItems);
} else {
for (Locale l : OpenCms.getLocaleManager().getAvailableLocales()) {
items.put(l, localeItems);
}
}
}
// add the locales that have been indexed for this document as item and return the extraction result
// fieldMappings.put(CmsSearchField.FIELD_RESOURCE_LOCALES, locales.toString().trim());
return new CmsExtractionResult(resourceLocale, items, fieldMappings);
}
/**
* @see org.opencms.search.documents.CmsDocumentXmlContent#extractContent(org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.CmsSearchIndex)
*/
@Override
public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index)
throws CmsException {
logContentExtraction(resource, index);
try {
I_CmsExtractionResult result = null;
List ex = new ArrayList();
for (CmsResource detailContainers : CmsJspTagContainer.getDetailOnlyResources(cms, resource)) {
CmsSolrDocumentContainerPage containerpageExtractor = new CmsSolrDocumentContainerPage("");
String localeTemp = detailContainers.getRootPath();
localeTemp = CmsResource.getParentFolder(localeTemp);
localeTemp = CmsResource.getName(localeTemp);
localeTemp = localeTemp.substring(0, localeTemp.length() - 1);
Locale locale = CmsLocaleManager.getLocale(localeTemp);
if (CmsJspTagContainer.useSingleLocaleDetailContainers(
OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))
&& locale.equals(CmsLocaleManager.getDefaultLocale())) {
// in case of single locale detail containers do not force the locale
locale = null;
}
I_CmsExtractionResult containersExtractionResult = containerpageExtractor.extractContent(
cms,
detailContainers,
index,
locale);
// only use the locales of the resource itself, not the ones of the detail containers page
containersExtractionResult.getContentItems().remove(CmsSearchField.FIELD_RESOURCE_LOCALES);
ex.add(containersExtractionResult);
}
result = extractXmlContent(cms, resource, index);
result = result.merge(ex);
return result;
} catch (Throwable t) {
throw new CmsIndexException(Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource), t);
}
}
/**
* @see org.opencms.search.documents.I_CmsDocumentFactory#getDocumentKeys(java.util.List, java.util.List)
*/
@Override
public List getDocumentKeys(List resourceTypes, List mimeTypes) throws CmsException {
if (resourceTypes.contains("*")) {
// we need to find all configured XML content types
List allTypes = new ArrayList();
for (Iterator i = OpenCms.getResourceManager().getResourceTypes().iterator(); i.hasNext();) {
I_CmsResourceType resourceType = i.next();
if ((resourceType instanceof CmsResourceTypeXmlContent)
// either we need a configured schema, or another class name (which must then contain an inline schema)
&& (((CmsResourceTypeXmlContent)resourceType).getConfiguration().containsKey(
CmsResourceTypeXmlContent.CONFIGURATION_SCHEMA)
|| !CmsResourceTypeXmlContent.class.equals(resourceType.getClass()))) {
// add the XML content resource type name
allTypes.add(resourceType.getTypeName());
}
}
resourceTypes = allTypes;
}
return super.getDocumentKeys(resourceTypes, mimeTypes);
}
/**
* Solr index content is stored in multiple languages, so the result is NOT locale dependent.
*
* @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend()
*/
public boolean isLocaleDependend() {
return false;
}
/**
* @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
*/
public boolean isUsingCache() {
return true;
}
}