All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.search.CmsLuceneDocument Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * File   : $Source$
 * Date   : $Date$
 * Version: $Revision$
 *
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search;

import org.opencms.main.CmsRuntimeException;
import org.opencms.main.OpenCms;
import org.opencms.relations.CmsCategory;
import org.opencms.search.fields.CmsLuceneField;
import org.opencms.search.fields.CmsSearchField;
import org.opencms.search.fields.CmsSearchFieldConfiguration;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.tika.io.IOUtils;

/**
 * A Lucene search document implementation.

*/ public class CmsLuceneDocument implements I_CmsSearchDocument { /** * Type for a stored-only field. */ public static final FieldType NOT_STORED_ANALYSED_TYPE; /** * Type for a stored-and analyzed fields. */ public static final FieldType STORED_ANALYSED_TYPE; /** * Type for a stored-only field. */ public static final FieldType STORED_NOT_ANALYSED_TYPE; static { STORED_ANALYSED_TYPE = new FieldType(); STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); STORED_ANALYSED_TYPE.setOmitNorms(false); STORED_ANALYSED_TYPE.setStored(true); STORED_ANALYSED_TYPE.setTokenized(true); STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); STORED_ANALYSED_TYPE.setStoreTermVectors(false); STORED_ANALYSED_TYPE.freeze(); } static { NOT_STORED_ANALYSED_TYPE = new FieldType(); NOT_STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); NOT_STORED_ANALYSED_TYPE.setOmitNorms(false); NOT_STORED_ANALYSED_TYPE.setStored(false); NOT_STORED_ANALYSED_TYPE.setTokenized(true); NOT_STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); NOT_STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); NOT_STORED_ANALYSED_TYPE.setStoreTermVectors(false); NOT_STORED_ANALYSED_TYPE.freeze(); } static { STORED_NOT_ANALYSED_TYPE = new FieldType(); STORED_NOT_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS); STORED_NOT_ANALYSED_TYPE.setOmitNorms(false); STORED_NOT_ANALYSED_TYPE.setStored(true); STORED_NOT_ANALYSED_TYPE.setTokenized(false); STORED_NOT_ANALYSED_TYPE.setStoreTermVectorPositions(false); STORED_NOT_ANALYSED_TYPE.setStoreTermVectorOffsets(false); STORED_NOT_ANALYSED_TYPE.setStoreTermVectors(false); STORED_NOT_ANALYSED_TYPE.freeze(); } /** The Lucene document. */ private Document m_doc; /** The fields stored in this document. */ private Map m_fields; /** Holds the score for this document. */ private float m_score; /** * Public constructor.

* * @param doc the Lucene document */ public CmsLuceneDocument(Document doc) { m_doc = doc; m_fields = new HashMap(); } /** * Generate a list of date terms for the optimized date range search.

* * @param date the date for get the date terms for * * @return a list of date terms for the optimized date range search * * @see CmsSearchIndex#getDateRangeSpan(long, long) */ public static String getDateTerms(long date) { Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); cal.setTimeInMillis(date); String day = CmsSearchIndex.DATES[cal.get(5)]; String month = CmsSearchIndex.DATES[(cal.get(2) + 1)]; String year = String.valueOf(cal.get(1)); StringBuffer result = new StringBuffer(); result.append(year); result.append(month); result.append(day); result.append(' '); result.append(year); result.append(month); result.append(' '); result.append(year); return result.toString(); } /** * @see org.opencms.search.I_CmsSearchDocument#addCategoryField(java.util.List) */ public void addCategoryField(List categories) { if ((categories != null) && (categories.size() > 0)) { StringBuffer categoryBuffer = new StringBuffer(128); for (CmsCategory category : categories) { categoryBuffer.append(category.getPath()); categoryBuffer.append(' '); } if (categoryBuffer.length() > 0) { Field field = new Field( CmsSearchField.FIELD_CATEGORY, categoryBuffer.toString().toLowerCase(), STORED_ANALYSED_TYPE); field.setBoost(0); add(field); } } else { // synthetic "unknown" category if no category property defined for resource Field field = new Field( CmsSearchField.FIELD_CATEGORY, CmsSearchCategoryCollector.UNKNOWN_CATEGORY, STORED_ANALYSED_TYPE); add(field); } } /** * @see org.opencms.search.I_CmsSearchDocument#addContentField(byte[]) */ public void addContentField(byte[] data) { Field field = new StoredField(CmsSearchField.FIELD_CONTENT_BLOB, data); m_doc.add(field); } /** * @see org.opencms.search.I_CmsSearchDocument#addContentLocales(java.util.Collection) */ public void addContentLocales(Collection locales) { // Lucene documents are not localized by defualt: Nothing to do here } /** * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean) */ public void addDateField(String name, long date, boolean analyzed) { Field field = new Field( name, DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND), STORED_NOT_ANALYSED_TYPE); field.setBoost(0.0F); add(field); if (analyzed) { field = new Field( name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX, getDateTerms(date), NOT_STORED_ANALYSED_TYPE); add(field); } } /** * @see org.opencms.search.I_CmsSearchDocument#addFileSizeField(int) */ public void addFileSizeField(int length) { // a default lucene implementation does not have a field for the file size } /** * @see org.opencms.search.I_CmsSearchDocument#addPathField(java.lang.String) */ public void addPathField(String rootPath) { String parentFolders = CmsSearchFieldConfiguration.getParentFolderTokens(rootPath); Field field = new Field(CmsSearchField.FIELD_PARENT_FOLDERS, parentFolders, NOT_STORED_ANALYSED_TYPE); field.setBoost(0.0F); add(field); } /** * @see org.opencms.search.I_CmsSearchDocument#addResourceLocales(java.util.Collection) */ public void addResourceLocales(Collection locales) { // A default lucene document has only one locale. } /** * @see org.opencms.search.I_CmsSearchDocument#addRootPathField(java.lang.String) */ public void addRootPathField(String rootPath) { add(new StringField(CmsSearchField.FIELD_PATH, rootPath, Field.Store.YES)); } /** * @see org.opencms.search.I_CmsSearchDocument#addSearchField(org.opencms.search.fields.CmsSearchField, java.lang.String) */ public void addSearchField(CmsSearchField field, String value) { if (field instanceof CmsLuceneField) { add(((CmsLuceneField)field).createField(value)); } else { throw (new CmsRuntimeException( Messages.get().container(Messages.LOG_INVALID_FIELD_CLASS_1, field.getClass().getName()))); } } /** * @see org.opencms.search.I_CmsSearchDocument#addSuffixField(java.lang.String) */ public void addSuffixField(String suffix) { add(new StringField(CmsSearchField.FIELD_SUFFIX, suffix, Field.Store.YES)); } /** * @see org.opencms.search.I_CmsSearchDocument#addTypeField(java.lang.String) */ public void addTypeField(String typeName) { add(new StringField(CmsSearchField.FIELD_TYPE, typeName, Field.Store.YES)); } /** * @see org.opencms.search.I_CmsSearchDocument#getContentBlob() */ public byte[] getContentBlob() { IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB); if (fieldContentBlob != null) { try { if (fieldContentBlob.readerValue() != null) { return IOUtils.toByteArray(fieldContentBlob.readerValue()); } } catch (IOException e) { // TODO: } } return null; } /** * @see org.opencms.search.I_CmsSearchDocument#getDocument() */ public Object getDocument() { return m_doc; } /** * @see org.opencms.search.I_CmsSearchDocument#getFieldNames() */ public List getFieldNames() { List result = new ArrayList(); for (IndexableField field : m_doc.getFields()) { result.add(field.name()); } return result; } /** * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsDate(java.lang.String) */ public Date getFieldValueAsDate(String fieldName) { String contentDate = getFieldValueAsString(fieldName); if (contentDate != null) { try { return new Date(DateTools.stringToTime(contentDate)); } catch (ParseException e) { // ignore and assume the given field name does not refer a date field } } return null; } /** * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsString(java.lang.String) */ public String getFieldValueAsString(String fieldName) { IndexableField fieldValue = m_doc.getField(fieldName); if (fieldValue != null) { return fieldValue.stringValue(); } return null; } /** * @see org.opencms.search.I_CmsSearchDocument#getMultivaluedFieldAsStringList(java.lang.String) */ public List getMultivaluedFieldAsStringList(String fieldName) { return Collections.singletonList(getFieldValueAsString(fieldName)); } /** * @see org.opencms.search.I_CmsSearchDocument#getPath() */ public String getPath() { return getFieldValueAsString(CmsSearchField.FIELD_PATH); } /** * @see org.opencms.search.I_CmsSearchDocument#getScore() */ public float getScore() { return m_score; } /** * @see org.opencms.search.I_CmsSearchDocument#getType() */ public String getType() { return getFieldValueAsString(CmsSearchField.FIELD_TYPE); } /** * @see org.opencms.search.I_CmsSearchDocument#setBoost(float) */ public void setBoost(float boost) { m_doc.removeFields(CmsSearchField.FIELD_META); m_doc.removeFields(CmsSearchField.FIELD_CONTENT); m_doc.removeFields(CmsSearchField.FIELD_DESCRIPTION); m_doc.removeFields(CmsSearchField.FIELD_KEYWORDS); m_doc.removeFields(CmsSearchField.FIELD_TITLE); m_doc.removeFields(CmsSearchField.FIELD_TITLE_UNSTORED); Field f = m_fields.get(CmsSearchField.FIELD_META); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_CONTENT); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_DESCRIPTION); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_KEYWORDS); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_TITLE); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_META); if (f != null) { f.setBoost(boost); m_doc.add(f); } f = m_fields.get(CmsSearchField.FIELD_TITLE_UNSTORED); if (f != null) { f.setBoost(boost); m_doc.add(f); } } /** * @see org.opencms.search.I_CmsSearchDocument#setScore(float) */ public void setScore(float score) { m_score = score; } /** * Adds a field to this document.

* * @param f the field to add */ private void add(Field f) { m_fields.put(f.name(), f); m_doc.add(f); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy