![JAR search and dependency download from the Maven repository](/logo.png)
src.it.unimi.dsi.big.mg4j.document.tika.TikaField Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.document.tika;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2011 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.mg4j.document.DocumentFactory;
import java.io.Serializable;
import org.apache.tika.metadata.Metadata;
/** A Tika field represented inside MG4J. */
public class TikaField implements Serializable {
private static final long serialVersionUID = 1L;
/** The MG4J name of the field. */
private final String mg4jName;
/** The Tika name of the field. It is null
for the Tika content. */
private final String tikaName;
/** Creates a new Tika field corresponding to the Tika content: its Tika name is null
and its MG4J name text. */
public TikaField() {
this.mg4jName = "text";
// This happens for the Tika content.
this.tikaName = null;
}
/** Creates a new Tika field with given Tika name and the same MG4J name.
*
* @param tikaName the Tika name of the field, which will be used also as MG4J name.
*/
public TikaField( String tikaName ) {
this.mg4jName = tikaName;
this.tikaName = tikaName;
}
/** Creates a new Tika field with given Tika name and given MG4J name.
*
* @param mg4jName the MG4J name of the field.
* @param tikaName the Tika name of the field.
*/
public TikaField( String mg4jName, String tikaName ) {
this.mg4jName = mg4jName;
this.tikaName = tikaName;
}
/** The MG4J name of this field.
*
* @return the MG4J name.
*/
public String mg4jName() {
return mg4jName;
}
/** The Tika name of this field (null
for the Tika content).
*
* @return the Tika name.
*/
public String tikaName() {
return tikaName;
}
/** Gets the content of this Tika field from the given metadata.
*
* @param metadata the metadata.
* @return the content of this Tika field.
*/
public String contentFromMetadata( Metadata metadata ) {
return metadata.get( tikaName() );
}
/** The type of this field (currently only {@link DocumentFactory.FieldType#TEXT} is supported).
*
* @return the type of this field.
*/
public DocumentFactory.FieldType getType() {
return DocumentFactory.FieldType.TEXT;
}
/** Returns true
if this field represents the Tika content.
*
* @return true
iff this field is the Tika content.
*/
public boolean isBody() {
return tikaName == null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy