All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.search.extractors.CmsExtractionResult Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH & Co. KG, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search.extractors;

import org.opencms.util.CmsStringUtil;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

/**
 * The result of a document text extraction.

* * This data structure contains the extracted text as well as (optional) * meta information extracted from the document.

* * @since 6.0.0 */ public class CmsExtractionResult implements I_CmsExtractionResult, Serializable { /** UID required for safe serialization. */ private static final long serialVersionUID = 1465447302192195154L; /** The extracted individual content items. */ private Map> m_contentItems; /** The locales of the content. */ private Collection m_locales; /** The default locale of the content. Can be null for unilingual extraction results. */ private Locale m_defaultLocale; /** The extracted values directly added to the index. */ private Map m_fieldMappings; /** The serialized version of this object. */ private byte[] m_serializedVersion; /** Creates a new multilingual extraction result. * @param defaultLocale the default (best fitting) locale of the result. * @param multilingualContentItems the content items for the different locales * @param fieldMappings special mappings to search fields with values extracted from the content */ public CmsExtractionResult( Locale defaultLocale, Map> multilingualContentItems, Map fieldMappings) { m_defaultLocale = defaultLocale; m_contentItems = null != multilingualContentItems ? removeNullEntries(multilingualContentItems) : new HashMap>(1); // ensure that a version for the default locale is present if (null == m_contentItems.get(m_defaultLocale)) { m_contentItems.put(m_defaultLocale, new LinkedHashMap()); } m_fieldMappings = null != fieldMappings ? fieldMappings : new HashMap(); // set the locales m_locales = new HashSet(); for (Locale locale : m_contentItems.keySet()) { if (null != locale) { m_locales.add(locale); } } } /** * Creates a new extraction result without meta information and without additional fields.

* * @param content the extracted content */ public CmsExtractionResult(String content) { this(content, null, null); m_contentItems.get(m_defaultLocale).put(ITEM_RAW, content); } /** * Creates a new unilingual extraction result.

* * @param content the extracted content * @param contentItems the individual extracted content items */ public CmsExtractionResult(String content, LinkedHashMap contentItems) { this(content, contentItems, null); } /** * Creates a new unilingual extraction result.

* * @param content the extracted content * @param contentItems the individual extracted content items * @param fieldMappings extraction results that should directly be indexed */ public CmsExtractionResult( String content, LinkedHashMap contentItems, Map fieldMappings) { m_defaultLocale = null; m_locales = new HashSet(); m_contentItems = new LinkedHashMap>(1); if (fieldMappings != null) { m_fieldMappings = fieldMappings; } else { m_fieldMappings = new HashMap(); } if (contentItems != null) { m_contentItems.put(m_defaultLocale, contentItems); } else { m_contentItems.put(m_defaultLocale, new LinkedHashMap()); } if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(content)) { m_contentItems.get(m_defaultLocale).put(ITEM_CONTENT, content); } } /** * Creates an extraction result from a serialized byte array.

* * @param bytes the serialized version of the extraction result * * @return extraction result created from the serialized byte array */ public static final CmsExtractionResult fromBytes(byte[] bytes) { Object obj = null; if (bytes != null) { // create an object out of the byte array try { ByteArrayInputStream in = new ByteArrayInputStream(bytes); ObjectInputStream oin = new ObjectInputStream(in); obj = oin.readObject(); oin.close(); } catch (Exception e) { // ignore, null is not an instance of CmsExtractionResult } if (obj instanceof CmsExtractionResult) { CmsExtractionResult result = (CmsExtractionResult)obj; result.m_serializedVersion = bytes; return result; } } return null; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getBytes() */ public byte[] getBytes() { // check if we have a cached version of the serialized object available if (m_serializedVersion != null) { return m_serializedVersion; } try { // serialize this object and return ByteArrayOutputStream out = new ByteArrayOutputStream(512); ObjectOutputStream oout = new ObjectOutputStream(out); oout.writeObject(this); oout.close(); m_serializedVersion = out.toByteArray(); } catch (Exception e) { // ignore, serialized version will be null } return m_serializedVersion; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContent() */ public String getContent() { return m_contentItems.get(m_defaultLocale).get(ITEM_CONTENT); } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContent(java.util.Locale) */ public String getContent(Locale locale) { Map localeItems = m_contentItems.get(locale); return null == localeItems ? null : localeItems.get(ITEM_CONTENT); } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContentItems() */ public LinkedHashMap getContentItems() { return m_contentItems.get(m_defaultLocale); } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getContentItems(java.util.Locale) */ public LinkedHashMap getContentItems(Locale locale) { LinkedHashMap localeItems = m_contentItems.get(locale); return null == localeItems ? new LinkedHashMap() : localeItems; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getDefaultLocale() */ public Locale getDefaultLocale() { return m_defaultLocale; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getFieldMappings() */ public Map getFieldMappings() { return m_fieldMappings; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#getLocales() */ public Collection getLocales() { return m_locales; } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#merge(java.util.List) */ public I_CmsExtractionResult merge(List extractionResults) { //prepare copy Map> contentItems = new HashMap>( m_contentItems.size()); for (Locale locale : m_contentItems.keySet()) { LinkedHashMap originalLocalValues = m_contentItems.get(locale); LinkedHashMap localeValues = new LinkedHashMap(originalLocalValues); contentItems.put(locale, localeValues); } HashMap fieldMappings = new HashMap(m_fieldMappings.size()); for (String fieldMapping : m_fieldMappings.keySet()) { fieldMappings.put(fieldMapping, m_fieldMappings.get(fieldMapping)); } //merge content from the other extraction results for (Locale locale : contentItems.keySet()) { Map localeValues = contentItems.get(locale); for (I_CmsExtractionResult result : extractionResults) { if (result.getLocales().contains(locale)) { Map resultLocaleValues = result.getContentItems(locale); for (String item : Arrays.asList(ITEMS_TO_MERGE)) { localeValues = mergeItem(item, localeValues, resultLocaleValues); } } } } return new CmsExtractionResult(m_defaultLocale, contentItems, fieldMappings); } /** * @see org.opencms.search.extractors.I_CmsExtractionResult#release() */ public void release() { if (!m_contentItems.isEmpty()) { m_contentItems.clear(); } m_contentItems = null; m_serializedVersion = null; } /** Merges the item from the resultLocaleValues into the corresponding item of the localeValues. * @param item the item to merge * @param localeValues the values where the item gets merged into * @param resultLocaleValues the values where the item to merge is read from * @return the modified localeValues with the merged item */ private Map mergeItem( String item, Map localeValues, Map resultLocaleValues) { if (resultLocaleValues.get(item) != null) { if (localeValues.get(item) != null) { localeValues.put(item, localeValues.get(item) + " " + resultLocaleValues.get(item)); } else { localeValues.put(item, resultLocaleValues.get(item)); } } return localeValues; } /** Replaces all null values with empty maps. * @param multilingualContentItems the map where replacement should take place * @return the map with all null values replaced with empty maps. */ private Map> removeNullEntries( Map> multilingualContentItems) { for (Locale locale : multilingualContentItems.keySet()) { if (null == multilingualContentItems.get(locale)) { multilingualContentItems.put(locale, new LinkedHashMap()); } } return multilingualContentItems; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy