All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.batch.fs.FSFileResource Maven / Gradle / Ivy

There is a newer version: 1.0.18
Show newest version
package org.apache.tika.batch.fs;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Locale;

import org.apache.tika.batch.FileResource;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;

/**
 * FileSystem(FS)Resource wraps a file name.
 * 

* This class automatically sets the following keys in Metadata: *

    *
  • Metadata.RESOURCE_NAME_KEY (file name)
  • *
  • Metadata.CONTENT_LENGTH
  • *
  • FSProperties.FS_REL_PATH
  • *
  • FileResource.FILE_EXTENSION
  • *
, */ public class FSFileResource implements FileResource { private final Path fullPath; private final String relativePath; private final Metadata metadata; /** * * @param inputRoot * @param fullPath * @see FSFileResource#FSFileResource(Path, Path) * @deprecated to be removed in Tika 2.0 */ @Deprecated public FSFileResource(File inputRoot, File fullPath) { this(Paths.get(inputRoot.getAbsolutePath()), Paths.get(fullPath.getAbsolutePath())); } /** * Constructor * * @param inputRoot the input root for the file * @param fullPath the full path to the file * @throws IllegalArgumentException if the fullPath is not * a child of inputRoot */ public FSFileResource(Path inputRoot, Path fullPath) { this.fullPath = fullPath; this.metadata = new Metadata(); //child path must actually be a child assert(fullPath.toAbsolutePath().startsWith(inputRoot.toAbsolutePath())); this.relativePath = inputRoot.relativize(fullPath).toString(); //need to set these now so that the filter can determine //whether or not to crawl this file metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getFileName().toString()); long sz = -1; try { sz = Files.size(fullPath); } catch (IOException e) { //swallow //not existent file will be handled downstream } metadata.set(Metadata.CONTENT_LENGTH, Long.toString(sz)); metadata.set(FSProperties.FS_REL_PATH, relativePath); metadata.set(FileResource.FILE_EXTENSION, getExtension(fullPath)); } /** * Simple extension extractor that takes whatever comes after the * last period in the path. It returns a lowercased version of the "extension." *

* If there is no period, it returns an empty string. * * @param fullPath full path from which to try to find an extension * @return the lowercased extension or an empty string */ private String getExtension(Path fullPath) { String p = fullPath.getFileName().toString(); int i = p.lastIndexOf("."); if (i > -1) { return p.substring(i + 1).toLowerCase(Locale.ROOT); } return ""; } /** * * @return file's relativePath */ @Override public String getResourceId() { return relativePath; } @Override public Metadata getMetadata() { return metadata; } @Override public InputStream openInputStream() throws IOException { //no need to include Metadata because we already set the //same information in the initializer return TikaInputStream.get(fullPath); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy