org.databene.benerator.util.LineShuffler Maven / Gradle / Ivy
Go to download
benerator is a framework for creating realistic and valid high-volume test data, used for
testing (unit/integration/load) and showcase setup.
Metadata constraints are imported from systems and/or configuration files. Data can imported from
and exported to files and systems, anonymized or generated from scratch. Domain packages provide
reusable generators for creating domain-specific data as names and addresses internationalizable
in language and region. It is strongly customizable with plugins and configuration options.
/*
* (c) Copyright 2007-2009 by Volker Bergmann. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted under the terms of the
* GNU General Public License.
*
* For redistributing this software or a derivative work under a license other
* than the GPL-compatible Free Software License as defined by the Free
* Software Foundation or approved by OSI, you must first obtain a commercial
* license to this software product from Volker Bergmann.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
* REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
* HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.databene.benerator.util;
import org.databene.commons.ReaderLineIterator;
import org.databene.commons.StringUtil;
import org.databene.commons.IOUtil;
import org.databene.commons.ui.ConsoleInfoPrinter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.List;
import java.util.ArrayList;
/**
* Reads a text file, shuffles its lines and writes it to another file.
*
* Created: 16.07.2007 20:29:10
*/
public class LineShuffler {
public static final Logger logger = LoggerFactory.getLogger(LineShuffler.class);
public static void main(String[] args) throws IOException {
if (args.length < 2) {
printHelp();
System.exit(-1);
}
String inFilename = args[0];
String outFilename = args[1];
int bufferSize = (args.length > 2 ? Integer.parseInt(args[2]) : 100000);
shuffle(inFilename, outFilename, bufferSize);
}
public static void shuffle(String inFilename, String outFilename, int bufferSize) throws IOException {
logger.info("shuffling " + inFilename + " and writing to " + outFilename + " (max. " + bufferSize + " lines)");
ReaderLineIterator iterator = new ReaderLineIterator(new BufferedReader(IOUtil.getReaderForURI(inFilename)));
List lines = read(bufferSize, iterator);
shuffle(lines);
save(lines, outFilename);
}
public static void shuffle(List lines) {
int size = lines.size();
//Generator indexGenerator = new IntegerGenerator(0, size - 1, 1, Sequence.RANDOM);
int iterations = size / 2;
for (int i = 0; i < iterations; i++) {
int i1 = RandomUtil.randomInt(0, size - 1);
int i2;
do {
i2 = RandomUtil.randomInt(0, size - 1);
} while (i1 == i2);
String tmp = lines.get(i1);
lines.set(i1, lines.get(i2));
lines.set(i2, tmp);
}
}
// private helpers -------------------------------------------------------------------------------------------------
private static List read(int bufferSize, ReaderLineIterator iterator) {
List lines = new ArrayList(Math.max(100000, bufferSize));
int lineCount = 0;
while (iterator.hasNext() && lineCount < bufferSize) {
String line = iterator.next();
if (!StringUtil.isEmpty(line)) {
lines.add(line);
lineCount++;
if (lineCount % 100000 == 99999)
logger.info("parsed " + lineCount + " lines");
}
}
return lines;
}
private static void save(List lines, String outputFilename) throws IOException {
logger.info("saving " + outputFilename + "...");
PrintWriter printer = new PrintWriter(new BufferedWriter(new FileWriter(outputFilename)));
try {
for (String line : lines)
printer.println(line);
} finally {
IOUtil.close(printer);
}
}
private static void printHelp() {
ConsoleInfoPrinter.printHelp("Parameters: inFile outFile [buffer size]");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy