org.apache.avro.tool.DataFileRepairTool Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of avro-tools Show documentation
Show all versions of avro-tools Show documentation
Avro command line tools and utilities
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.tool;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
/** Recovers data from a corrupt Avro Data file */
public class DataFileRepairTool implements Tool {
@Override
public String getName() {
return "repair";
}
@Override
public String getShortDescription() {
return "Recovers data from a corrupt Avro Data file";
}
private void printInfo(PrintStream output) {
output.println("Insufficient arguments. Arguments: [-o option] " + "input_file output_file \n"
+ " Where option is one of the following: \n" + " " + ALL
+ " (default) recover as many records as possible.\n" + " " + PRIOR
+ " recover only records prior to the first instance" + " of corruption \n" + " " + AFTER
+ " recover only records after the first instance of" + " corruption.\n" + " " + REPORT
+ " print the corruption report only, reporting the\n"
+ " number of valid and corrupted blocks and records\n"
+ " input_file is the file to read from. output_file is the file to\n"
+ " create and write recovered data to. output_file is ignored if\n" + " using the report option.");
}
private static final Set OPTIONS = new HashSet<>();
private static final String ALL = "all";
private static final String PRIOR = "prior";
private static final String AFTER = "after";
private static final String REPORT = "report";
static {
OPTIONS.add(ALL);
OPTIONS.add(PRIOR);
OPTIONS.add(AFTER);
OPTIONS.add(REPORT);
}
@Override
public int run(InputStream stdin, PrintStream out, PrintStream err, List args) throws Exception {
if (args.size() < 2) {
printInfo(err);
return 1;
}
int index = 0;
String input = args.get(index);
String option = "all";
if ("-o".equals(input)) {
option = args.get(1);
index += 2;
}
if (!OPTIONS.contains(option) || (args.size() - index < 1)) {
printInfo(err);
return 1;
}
input = args.get(index++);
if (!REPORT.equals(option)) {
if (args.size() - index < 1) {
printInfo(err);
return 1;
}
}
if (ALL.equals(option)) {
return recoverAll(input, args.get(index), out, err);
} else if (PRIOR.equals(option)) {
return recoverPrior(input, args.get(index), out, err);
} else if (AFTER.equals(option)) {
return recoverAfter(input, args.get(index), out, err);
} else if (REPORT.equals(option)) {
return reportOnly(input, out, err);
} else {
return 1;
}
}
private int recover(String input, String output, PrintStream out, PrintStream err, boolean recoverPrior,
boolean recoverAfter) throws IOException {
File infile = new File(input);
if (!infile.canRead()) {
err.println("cannot read file: " + input);
return 1;
}
out.println("Recovering file: " + input);
GenericDatumReader