All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metamx.common.io.smoosh.FileSmoosher Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2011,2012 Metamarkets Group Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.metamx.common.io.smoosh;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import com.google.common.primitives.Ints;
import com.metamx.common.IAE;
import com.metamx.common.ISE;

import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * A class that concatenates files together into configurable sized chunks, works in conjunction
 * with the SmooshedFileMapper to provide access to the individual files.
 * 

* It does not split input files among separate output files, instead the various "chunk" files will * be varying sizes and it is not possible to add a file of size greater than Integer.MAX_VALUE */ public class FileSmoosher implements Closeable { private static final String FILE_EXTENSION = "smoosh"; private static final Joiner joiner = Joiner.on(","); private final File baseDir; private final int maxChunkSize; private final List outFiles = Lists.newArrayList(); private final Map internalFiles = Maps.newTreeMap(); private Outer currOut = null; public FileSmoosher( File baseDir ) { this(baseDir, Integer.MAX_VALUE); } public FileSmoosher( File baseDir, int maxChunkSize ) { this.baseDir = baseDir; this.maxChunkSize = maxChunkSize; Preconditions.checkArgument(maxChunkSize > 0, "maxChunkSize must be a positive value."); } private FileSmoosher( File baseDir, int maxChunkSize, List outFiles, Map internalFiles ) { this.baseDir = baseDir; this.maxChunkSize = maxChunkSize; this.outFiles.addAll(outFiles); this.internalFiles.putAll(internalFiles); } public Set getInternalFilenames() { return internalFiles.keySet(); } public void add(File fileToAdd) throws IOException { add(fileToAdd.getName(), Files.map(fileToAdd)); } public void add(String name, File fileToAdd) throws IOException { add(name, Files.map(fileToAdd)); } public void add(String name, ByteBuffer bufferToAdd) throws IOException { add(name, Arrays.asList(bufferToAdd)); } public void add(String name, List bufferToAdd) throws IOException { if (name.contains(",")) { throw new IAE("Cannot have a comma in the name of a file, got[%s].", name); } if (internalFiles.get(name) != null) { throw new IAE("Cannot add files of the same name, already have [%s]", name); } long size = 0; for (ByteBuffer buffer : bufferToAdd) { size += buffer.remaining(); } SmooshedWriter out = addWithSmooshedWriter(name, size); try { for (ByteBuffer buffer : bufferToAdd) { out.write(buffer); } } finally { Closeables.closeQuietly(out); } } public SmooshedWriter addWithSmooshedWriter(final String name, final long size) throws IOException { if (size > maxChunkSize) { throw new IAE("Asked to add buffers[%,d] larger than configured max[%,d]", size, maxChunkSize); } if (currOut == null) { currOut = getNewCurrOut(); } if (currOut.bytesLeft() < size) { Closeables.close(currOut, false); currOut = getNewCurrOut(); } final int startOffset = currOut.getCurrOffset(); return new SmooshedWriter() { private boolean open = true; private long bytesWritten = 0; @Override public int write(InputStream in) throws IOException { return verifySize(currOut.write(in)); } @Override public int write(ByteBuffer in) throws IOException { return verifySize(currOut.write(in)); } private int verifySize(int bytesWrittenInChunk) throws IOException { bytesWritten += bytesWrittenInChunk; if (bytesWritten != currOut.getCurrOffset() - startOffset) { throw new ISE("WTF? Perhaps there is some concurrent modification going on?"); } if (bytesWritten > size) { throw new ISE("Wrote[%,d] bytes for something of size[%,d]. Liar!!!", bytesWritten, size); } return bytesWrittenInChunk; } @Override public boolean isOpen() { return open; } @Override public void close() throws IOException { open = false; internalFiles.put(name, new Metadata(currOut.getFileNum(), startOffset, currOut.getCurrOffset())); if (bytesWritten != currOut.getCurrOffset() - startOffset) { throw new ISE("WTF? Perhaps there is some concurrent modification going on?"); } if (bytesWritten != size) { throw new IOException( String.format("Expected [%,d] bytes, only saw [%,d], potential corruption?", size, bytesWritten) ); } } }; } @Override public void close() throws IOException { Closeables.close(currOut, false); File metaFile = metaFile(baseDir); Writer out = null; try { out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(metaFile), Charsets.UTF_8)); out.write(String.format("v1,%d,%d", maxChunkSize, outFiles.size())); out.write("\n"); for (Map.Entry entry : internalFiles.entrySet()) { final Metadata metadata = entry.getValue(); out.write( joiner.join( entry.getKey(), metadata.getFileNum(), metadata.getStartOffset(), metadata.getEndOffset() ) ); out.write("\n"); } } finally { Closeables.close(out, false); } } private Outer getNewCurrOut() throws FileNotFoundException { final int fileNum = outFiles.size(); File outFile = makeChunkFile(baseDir, fileNum); outFiles.add(outFile); return new Outer(fileNum, new BufferedOutputStream(new FileOutputStream(outFile)), maxChunkSize); } static File metaFile(File baseDir) { return new File(baseDir, String.format("meta.%s", FILE_EXTENSION)); } static File makeChunkFile(File baseDir, int i) { return new File(baseDir, String.format("%05d.%s", i, FILE_EXTENSION)); } public static class Outer implements SmooshedWriter { private final int fileNum; private final OutputStream out; private final int maxLength; private boolean open = true; private int currOffset = 0; Outer(int fileNum, OutputStream out, int maxLength) { this.fileNum = fileNum; this.out = out; this.maxLength = maxLength; } public int getFileNum() { return fileNum; } public int getCurrOffset() { return currOffset; } public int bytesLeft() { return maxLength - currOffset; } @Override public int write(ByteBuffer buffer) throws IOException { WritableByteChannel channel = Channels.newChannel(out); return addToOffset(channel.write(buffer)); } @Override public int write(InputStream in) throws IOException { return addToOffset(Ints.checkedCast(ByteStreams.copy(in, out))); } public int addToOffset(int numBytesWritten) { if (numBytesWritten > bytesLeft()) { throw new ISE("Wrote more bytes[%,d] than available[%,d]. Don't do that.", numBytesWritten, bytesLeft()); } currOffset += numBytesWritten; return numBytesWritten; } @Override public boolean isOpen() { return open; } @Override public void close() throws IOException { open = false; out.close(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy