All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.text.ReadOnlyFileSystemDirectory Maven / Gradle / Ivy

Go to download

Optional components of Mahout which generally support interaction with third party systems, formats, APIs, etc.

There is a newer version: 0.13.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.text;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.store.BaseDirectory;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.BufferedIndexOutput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collection;

//TODO: is there a better way of doing this in Lucene 4.x?

/**
 * This class implements a read-only Lucene Directory on top of a general FileSystem.
 * Currently it does not support locking.
 * 

* // TODO: Rename to FileSystemReadOnlyDirectory */ public class ReadOnlyFileSystemDirectory extends BaseDirectory { private final FileSystem fs; private final Path directory; private final int ioFileBufferSize; private static final Logger log = LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class); /** * Constructor * * @param fs - filesystem * @param directory - directory path * @param create - if true create the directory * @param conf - MR Job Configuration * @throws IOException */ public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDir(); } } catch (IOException e) { log.error(e.getMessage(), e); } if (!isDir) { throw new IOException(directory + " is not a directory"); } } private void create() throws IOException { if (!fs.exists(directory)) { fs.mkdirs(directory); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDir(); } } catch (IOException e) { log.error(e.getMessage(), e); } if (!isDir) { throw new IOException(directory + " is not a directory"); } // clear old index files FileStatus[] fileStatus = fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter()); for (FileStatus status : fileStatus) { if (!fs.delete(status.getPath(), true)) { throw new IOException("Cannot delete index file " + status.getPath()); } } } public String[] list() throws IOException { FileStatus[] fileStatus = fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter()); String[] result = new String[fileStatus.length]; for (int i = 0; i < fileStatus.length; i++) { result[i] = fileStatus[i].getPath().getName(); } return result; } @Override public String[] listAll() throws IOException { return list(); } @Override public boolean fileExists(String name) throws IOException { return fs.exists(new Path(directory, name)); } @Override public long fileLength(String name) throws IOException { return fs.getFileStatus(new Path(directory, name)).getLen(); } @Override public void deleteFile(String name) throws IOException { if (!fs.delete(new Path(directory, name), true)) { throw new IOException("Cannot delete index file " + name); } } @Override public IndexOutput createOutput(String name, IOContext context) throws IOException { //TODO: What should we be doing with the IOContext here, if anything? Path file = new Path(directory, name); if (fs.exists(file) && !fs.delete(file, true)) { // delete the existing one if applicable throw new IOException("Cannot overwrite index file " + file); } return new FileSystemIndexOutput(file, ioFileBufferSize); } @Override public void sync(Collection names) throws IOException { // do nothing, as this is read-only } @Override public IndexInput openInput(String name, IOContext context) throws IOException { return new FileSystemIndexInput(new Path(directory, name), ioFileBufferSize); } @Override public Lock makeLock(final String name) { return new Lock() { public boolean obtain() { return true; } public void release() { } public boolean isLocked() { throw new UnsupportedOperationException(); } public String toString() { return "Lock@" + new Path(directory, name); } }; } @Override public void clearLock(String name) throws IOException { // do nothing } @Override public void close() throws IOException { // do not close the file system } @Override public void setLockFactory(LockFactory lockFactory) throws IOException { // do nothing } @Override public LockFactory getLockFactory() { return null; } @Override public String toString() { return this.getClass().getName() + "@" + directory; } private class FileSystemIndexInput extends BufferedIndexInput implements Cloneable { // shared by clones private class Descriptor { public final FSDataInputStream in; public long position; // cache of in.getPos() public Descriptor(Path file, int ioFileBufferSize) throws IOException { this.in = fs.open(file, ioFileBufferSize); } } private final Path filePath; // for debugging private final Descriptor descriptor; private final long length; private boolean isOpen; private boolean isClone; public FileSystemIndexInput(Path path, int ioFileBufferSize) throws IOException { super("FSII_" + path.getName(), ioFileBufferSize); filePath = path; descriptor = new Descriptor(path, ioFileBufferSize); length = fs.getFileStatus(path).getLen(); isOpen = true; } @Override protected void readInternal(byte[] b, int offset, int len) throws IOException { long position = getFilePointer(); if (position != descriptor.position) { descriptor.in.seek(position); descriptor.position = position; } int total = 0; do { int i = descriptor.in.read(b, offset + total, len - total); if (i == -1) { throw new IOException("Read past EOF"); } descriptor.position += i; total += i; } while (total < len); } @Override public void close() throws IOException { if (!isClone) { if (isOpen) { descriptor.in.close(); isOpen = false; } else { throw new IOException("Index file " + filePath + " already closed"); } } } @Override protected void seekInternal(long position) { // handled in readInternal() } @Override public long length() { return length; } @Override protected void finalize() throws Throwable { super.finalize(); if (!isClone && isOpen) { close(); // close the file } } @Override public BufferedIndexInput clone() { FileSystemIndexInput clone = (FileSystemIndexInput) super.clone(); clone.isClone = true; return clone; } } private class FileSystemIndexOutput extends BufferedIndexOutput { private final Path filePath; // for debugging private final FSDataOutputStream out; private boolean isOpen; public FileSystemIndexOutput(Path path, int ioFileBufferSize) throws IOException { filePath = path; // overwrite is true by default out = fs.create(path, true, ioFileBufferSize); isOpen = true; } @Override public void flushBuffer(byte[] b, int offset, int size) throws IOException { out.write(b, offset, size); } @Override public void close() throws IOException { if (isOpen) { super.close(); out.close(); isOpen = false; } else { throw new IOException("Index file " + filePath + " already closed"); } } @Override public void seek(long pos) throws IOException { throw new UnsupportedOperationException(); } @Override public long length() throws IOException { return out.getPos(); } @Override protected void finalize() throws Throwable { super.finalize(); if (isOpen) { close(); // close the file } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy