All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.cobber.fta.driver.faker.Faker Maven / Gradle / Ivy

Go to download

Analyze Text data to determine simple type and Semantic type information as well as other key metrics associated with a text stream.

There is a newer version: 15.10.0
Show newest version
/*
 * Copyright 2017-2024 Tim Segall
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cobber.fta.driver.faker;

import java.io.PrintStream;
import java.util.Collection;
import java.util.Random;

import com.cobber.fta.LogicalType;
import com.cobber.fta.LogicalTypeFactory;
import com.cobber.fta.LogicalTypeRegExp;
import com.cobber.fta.PluginDefinition;
import com.cobber.fta.TextAnalyzer;
import com.cobber.fta.core.FTAPluginException;
import com.cobber.fta.driver.DriverOptions;

public class Faker {
	private final DriverOptions options;
	private final PrintStream output;
	private final PrintStream error;

	public Faker(final DriverOptions options, final PrintStream output, final PrintStream error) {
		this.options = options;
		this.output = output;
		this.error = error;
	}

	public void fake() {
		final TextAnalyzer analyzer = TextAnalyzer.getDefaultAnalysis(options.locale);
		final Random random = new Random(31415926);
		final long outputRecords = options.recordsToProcess == -1 ? 20 : options.recordsToProcess;
		final Collection registered = analyzer.getPlugins().getRegisteredSemanticTypes();
		final String[] pluginDefinitions = options.faker.split(",");
		final LogicalType[] logicals = new LogicalType[pluginDefinitions.length];
		final FakerParameters[] parameters = new FakerParameters[pluginDefinitions.length];
		final String[] pluginNames = new String[pluginDefinitions.length];

		for (int i = 0; i < pluginDefinitions.length; i++) {
			final int parameterIndex = pluginDefinitions[i].indexOf('[');
			if (parameterIndex == -1) {
				error.printf("ERROR: Failed to retrieve parameters for '%s', missing open ('[')?, use --help%n", pluginDefinitions[i]);
				System.exit(1);
			}
			if (pluginDefinitions[i].charAt(pluginDefinitions[i].length() - 1) != ']') {
				error.printf("ERROR: Missing close (']') for parameter definition for '%s', use --help%n", pluginDefinitions[i]);
				System.exit(1);
			}
			parameters[i] = new FakerParameters(pluginDefinitions[i].substring(parameterIndex + 1, pluginDefinitions[i].length() - 1));
			pluginNames[i] = pluginDefinitions[i].substring(0, parameterIndex);

			for (final LogicalType logical : registered)
				if (logical.getSemanticType().equals(parameters[i].type)) {
					logicals[i] = logical;
					break;
				}

			// If we did not find the Semantic Type it must be one of the Base Types
			if (logicals[i] == null) {
				final String baseType = parameters[i].type;
				if (!"DOUBLE".equals(baseType) && !"LONG".equals(baseType) && !"LOCALDATE".equals(baseType) && !"LOCALDATETIME".equals(baseType) && !"ENUM".equals(baseType)) {
					error.printf("ERROR: Unknown type '%s', use --help%n", baseType);
					System.exit(1);
				}
				final PluginDefinition plugin = new PluginDefinition(parameters[i].type, parameters[i].clazz);
				try {
					logicals[i] = LogicalTypeFactory.newInstance(plugin, analyzer.getConfig());
					((FakerLT)logicals[i]).setControl(parameters[i]);
				} catch (FTAPluginException e) {
					error.printf("ERROR: Failed to locate plugin named '%s', use --help%n", pluginDefinitions[i]);
					System.exit(1);
				}
			}
		}

		final StringBuilder line = new StringBuilder();

		// Build the Header
		for (int i = 0; i < logicals.length; i++) {
			if (i != 0)
				line.append(',');
			line.append(quoteIfNeeded(pluginNames[i]));
			if (logicals[i] instanceof LogicalTypeRegExp && !((LogicalTypeRegExp)logicals[i]).isRegExpComplete())
				error.printf("ERROR: Semantic Type (%s) does implement LTRandom interface - however samples may not be useful.%n", logicals[i].getSemanticType());
		}
		output.println(line);

		for (long l = 0; l < outputRecords; l++) {
			line.setLength(0);
			for (int i = 0; i < logicals.length; i++) {
				if (i != 0)
					line.append(',');
				if (parameters[i].nullPercent != 0 && random.nextDouble() <= parameters[i].nullPercent)
					;		// Don't output anything which generates a null
				else if (parameters[i].blankPercent != 0 && random.nextDouble() <= parameters[i].blankPercent) {
					final StringBuilder blank = new StringBuilder();
					blank.append('"');
					final int blankLength = parameters[i].blankLength == -1 ? random.nextInt(6) : parameters[i].blankLength;
					for (int b = 0; b < blankLength; b++)
						blank.append(' ');
					blank.append('"');
					line.append(blank.toString());
				}
				else
					line.append(quoteIfNeeded(logicals[i].nextRandom()));
			}
			output.println(line);
		}
	}

	private String quoteIfNeeded(final String input) {
		if (input.indexOf(',') == -1 && input.indexOf(' ') == -1)
			return input;

		return "\"" + input + "\"";
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy