All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.document.PropertyBasedDocumentFactory Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.document;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2005-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMaps;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.util.Properties;

import java.lang.reflect.InvocationTargetException;
import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

/** A document factory initialised by default properties.
 * 
 * 

Many document factories need a number of default values that are used when * the metadata passed to * {@link it.unimi.dsi.big.mg4j.document.DocumentFactory#getDocument(java.io.InputStream,Reference2ObjectMap)} is * not sufficient or lacks some key. This abstract class provides a common base for all such factories. * *

All concrete implementations of this class should have: *

    *
  1. an empty constructor; *
  2. a constructor taking a {@link it.unimi.dsi.fastutil.objects.Reference2ObjectMap} * having {@link java.lang.Enum} keys; *
  3. a constructor taking a {@link Properties} object; *
  4. a constructor taking a string array. *
* *

In the third case, the properties will be parsed by the {@link #parseProperties(Properties)} * method. In the fourth case, by the {@link #parseProperties(String[])} method. * *

Since all implementations are expected to provide such constructors, corresponding * {@linkplain #getInstance(Class, String[]) static factory methods} have been provided to * simplify factory instantiation. * *

If the implementation needs to read and parse some key, it must override the * {@link #parseProperty(String, String[], Reference2ObjectMap)} method. * *

Keys are specified with a dotted notation. The last dot-separated token is the actual key. The prefix is used * to select properties: only properties with a prefix that is a prefix of the current class name are considered. * Moreover, if a property with a completely specified prefix (i.e., a prefix that is a class name) is not parsed * an exception will be thrown. * *

This class provide helpers methods {@link #resolve(Enum, Reference2ObjectMap)} and {@link #resolveNotNull(Enum, Reference2ObjectMap)} * to help in writing implementations of {@link it.unimi.dsi.big.mg4j.document.DocumentFactory#getDocument(java.io.InputStream,Reference2ObjectMap)} that * handle default metadata correctly. */ public abstract class PropertyBasedDocumentFactory extends AbstractDocumentFactory { private static final long serialVersionUID = 1L; private static final Logger LOGGER = Util.getLogger( PropertyBasedDocumentFactory.class ); /** Case-insensitive keys for metadata passed to * {@link it.unimi.dsi.big.mg4j.document.DocumentFactory#getDocument(java.io.InputStream,it.unimi.dsi.fastutil.objects.Reference2ObjectMap)}. * *

The keys in this class are general-purpose keys that are meaningful for most factories. * Specific factory implementations might choose to interpret more keys, but then it is * up to the {@link it.unimi.dsi.big.mg4j.document.DocumentSequence} that uses the factory to * provide data for those keys. * *

Note that the metadata map is a reference map. We cannot use * an {@link java.util.EnumMap} because we do not know in advance the enum(s) whose items will be put * in the map. */ public static enum MetadataKeys { /** The tag for a document title (a character sequence). */ TITLE, /** The tag for a document uri (a character sequence). */ URI, /** The tag for MIME type metadata (a string). */ MIMETYPE, /** The tag for charset encoding metadata (a string normalised through {@link java.nio.charset.Charset#forName(java.lang.String)}). */ ENCODING, /** The tag for the optional name of a {@linkplain WordReader word reader} class. */ WORDREADER, /** The tag for locale metadata (a {@link java.util.Locale}). */ LOCALE }; /** The set of default metadata for this factory. It is initalised by {@link #parseProperties(Properties)}. * */ protected Reference2ObjectMap,Object> defaultMetadata; public static PropertyBasedDocumentFactory getInstance( final Class klass, final String[] property ) throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return (PropertyBasedDocumentFactory)( klass.getConstructor( new Class[] { String[].class } ).newInstance( new Object[] { property } ) ); } public static PropertyBasedDocumentFactory getInstance( final Class klass, final Properties properties ) throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return (PropertyBasedDocumentFactory)( klass.getConstructor( Properties.class ).newInstance( properties ) ); } public static PropertyBasedDocumentFactory getInstance( final Class klass ) throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return (PropertyBasedDocumentFactory)( klass.getConstructor().newInstance() ); } public static PropertyBasedDocumentFactory getInstance( final Class klass, final Reference2ObjectMap,Object> metadata ) throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { return (PropertyBasedDocumentFactory)( klass.getConstructor( new Class[] { Reference2ObjectMap.class } ).newInstance( new Object[] { metadata } ) ); } private void logMetadata() { LOGGER.debug( this.getClass().getName() + " initialised with default metadata " + defaultMetadata ); } protected PropertyBasedDocumentFactory( final Reference2ObjectMap,Object>defaultMetadata ) { this.defaultMetadata = defaultMetadata; logMetadata(); } protected PropertyBasedDocumentFactory( final Properties properties ) throws ConfigurationException { this.defaultMetadata = parseProperties( properties ); logMetadata(); } protected PropertyBasedDocumentFactory( final String[] property ) throws ConfigurationException { this.defaultMetadata = parseProperties( property ); logMetadata(); } @SuppressWarnings("unchecked") protected PropertyBasedDocumentFactory() { this.defaultMetadata = Reference2ObjectMaps.EMPTY_MAP; logMetadata(); } /** A utility method checking whether the downcased name of an {@link Enum} is equal to a given string. * *

This class uses an {@link Enum} ({@link MetadataKeys}) to store valid property keys. We follow * both the uppercase naming convention for enums and the lowercase naming convention for properties, * and this method encapsulates the method calls that necessary to correctly handle key parsing. * * @param enumKey a key expressed as an {@link Enum}. * @param key a key expressed as a string. * @return true if key is equal to the downcased {@linkplain Enum#name() name} of enumKey. */ public static boolean sameKey( final Enum enumKey, final String key ) { return key.equals( enumKey.name().toLowerCase() ); } /** Parses a property with given key and value, adding it to the given map. * *

Currently this implementation just parses the {@link MetadataKeys#LOCALE} property. *

Subclasses should do their own parsing, returing true in case of success and * returning super.parseProperty() otherwise. * * @param key the property key. * @param valuesUnused the property value; this is an array, because properties may have a list of comma-separated values. * @param metadataUnused the metadata map. * @return true if the property was parsed correctly, false if it was ignored. * */ protected boolean parseProperty( final String key, final String[] valuesUnused, final Reference2ObjectMap,Object> metadataUnused ) throws ConfigurationException { if ( sameKey( MetadataKeys.LOCALE, key ) ) throw new ConfigurationException( "Locales are currently unsupported" ); return false; } /** This method checks that the array of values contains just one element, and returns the element. * * @param key the property name (used to build the exception message). * @param values the array of values. * @return the only value (if the array contains exactly one element). * @throws ConfigurationException iff values does not contain a single element. */ protected static String ensureJustOne( final String key, final String[] values ) throws ConfigurationException { if ( values.length != 1 ) throw new ConfigurationException( "Property " + key + " should have just one value" ); return values[ 0 ]; } /** Scans the property set, parsing the properties that concern this class. * * @param properties a set of properties. * @return a metadata map. */ @SuppressWarnings("unchecked") public Reference2ObjectMap,Object> parseProperties( final Properties properties ) throws ConfigurationException { String key, qualifier, className = this.getClass().getName(); int lastDot; Reference2ObjectArrayMap,Object> metadata = new Reference2ObjectArrayMap,Object>(); for( Iterator i = properties.getKeys(); i.hasNext(); ) { key = i.next().toString(); lastDot = key.lastIndexOf( '.' ); qualifier = lastDot == -1 ? "" : key.substring( 0, lastDot ); if ( className.startsWith( qualifier ) && ! parseProperty( key.substring( lastDot + 1 ), properties.getStringArray( key ), metadata ) && className.equals( qualifier ) ) throw new ConfigurationException( "Unknown property " + key ); } return metadata.isEmpty() ? Reference2ObjectMaps.EMPTY_MAP : metadata; } /** Parses the given list of properties either as key=value specs (value may * be a list of comma-separated values), or as filenames. * * @param property an array of strings specifying properties. * @return a metadata map. */ public Reference2ObjectMap,Object> parseProperties( final String[] property ) throws ConfigurationException { final Reference2ObjectArrayMap,Object> metadata = new Reference2ObjectArrayMap,Object>(); Properties properties; int pos; for( int i = 0; i < property.length; i++ ) { if ( ( pos = property[ i ].indexOf( '=' ) ) != - 1 ) { properties = new Properties(); properties.addProperty( property[ i ].substring( 0, pos ), property[ i ].substring( pos + 1 ) ); } else properties = new Properties( property[ i ] ); metadata.putAll( parseProperties( properties ) );; } return metadata; } /** Resolves the given key against the given metadata, falling back to the default metadata. * * @param key a key. * @param metadata a metadata map. * @return the value returned by metadata for key, or the value * returned by {@link #defaultMetadata} for key if the former is null (the latter, * of course, might be null). */ protected Object resolve( final Enum key, final Reference2ObjectMap,Object> metadata ) { Object value = metadata.get( key ); return value != null ? value : defaultMetadata.get( key ); } /** Resolves the given key against the given metadata, falling back to the provided object. * * @param key a key. * @param metadata a metadata map. * @param o a default object. * @return the value returned by metadata for key, or o if the * former is null. */ protected Object resolve( final Enum key, final Reference2ObjectMap,Object> metadata, final Object o ) { Object value = metadata.get( key ); return value != null ? value : o; } /** Resolves the given key against the given metadata, falling back to the default metadata * and guaranteeing a non-null result. * * @param key a key. * @param metadata a metadata map. * @return the value returned by metadata for key, or the value * returned by {@link #defaultMetadata} for key if the former is null; if the * latter is null, too, a {@link NoSuchElementException} will be thrown. */ protected Object resolveNotNull( final Enum key, final Reference2ObjectMap,Object> metadata ) { final Object value = resolve( key, metadata ); if ( value == null ) throw new NoSuchElementException( "The key " + key + " cannot be resolved" ); return value; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy