All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.mg4j.document.tika.TextDocumentFactory Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java.

The newest version!
package it.unimi.dsi.mg4j.document.tika;

/*		 
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2011 Paolo Boldi and Sebastiano Vigna  
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.util.Properties;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.txt.TXTParser;


/**
 * A document factory for the text format; the character set will be autodetected.
 * 
 * 

This factory has no metadata. * * @author Salvatore Insalaco */ public class TextDocumentFactory extends AbstractSimpleTikaDocumentFactory { private static final TXTParser TXT_PARSER = new TXTParser(); private static final long serialVersionUID = 1L; public TextDocumentFactory() {} public TextDocumentFactory(Reference2ObjectMap, Object> defaultMetadata) { super(defaultMetadata); } public TextDocumentFactory(Properties properties) throws ConfigurationException { super(properties); } public TextDocumentFactory(String[] property) throws ConfigurationException { super(property); } @Override protected Parser getParser() { return TXT_PARSER; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy