All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.dbselection.DatabaseSelector Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.dbselection;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.UnknownHostException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.Set;

import javax.sound.sampled.UnsupportedAudioFileException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.TreeWalker;
import org.xml.sax.SAXException;

import marytts.client.MaryClient;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.features.FeatureDefinition;
import marytts.server.Mary;
import marytts.util.Pair;
import marytts.util.dom.MaryDomUtils;
import marytts.util.http.Address;
import org.apache.commons.io.FileUtils;

/**
 * Main class to be run over a database for selection
 * 
 * @author Anna Hunecke
 *
 */
public class DatabaseSelector {

	private static String locale;
	// the feature definition for the feature vectors
	public static FeatureDefinition featDef;
	// the file containing the feature definition
	private static String featDefFileName;
	// the file containing the coverage data needed to initialise the algorithm
	private static String initFileName;
	// the directory to print the selection results to
	private static String selectionDirName;
	// the config file for the coverage definition
	private static String covDefConfigFileName;
	// the stop criterion (as string)
	private static String stopCriterion;
	// the log file to log the result to
	private static String overallLogFile;
	// if true, feature vectors are kept in memory
	private static boolean holdVectorsInMemory;
	// if true, print more information to command line
	private static boolean verbose;
	// if true, print a table containing the coverage
	// development over time
	private static boolean logCovDevelopment;
	// private static List of selected sentences ids;
	private static Set selectedIdSents;
	private static Set unwantedIdSents;

	private static String selectedSentencesTableName;
	private static String tableDescription;
	// mySql database
	protected static DBHandler wikiToDB;
	private static String mysqlHost;
	private static String mysqlDB;
	private static String mysqlUser;
	private static String mysqlPasswd;
	private static boolean considerOnlyReliableSentences;

	/**
	 * Main method to be run from the directory where the data is. Expects already computed unit features in directory
	 * unitfeatures
	 * 
	 * @param args
	 *            the command line args (see printUsage for details)
	 * @throws Exception
	 *             Exception
	 */
	public static void main(String[] args) throws Exception {
		main2(args);
	}

	/**
	 * Main method to be run from the directory where the data is. Expects already computed unit features in directory
	 * unitfeatures.
	 * 
	 * @param args
	 *            the command line args (see printUsage for details)
	 * @throws Exception
	 *             Exception
	 */
	public static void main2(String[] args) throws Exception {
		/* Sort out the filenames and dirs for the logfiles */
		System.out.println("Starting Database Selection...");

		long time = System.currentTimeMillis();
		PrintWriter logOut;

		String dateString = "", dateDir = "";
		DateFormat fullDate = new SimpleDateFormat("dd_MM_yyyy_HH_mm_ss");
		DateFormat day = new SimpleDateFormat("dd_MM_yyyy");
		Date date = new Date();
		dateString = fullDate.format(date);
		dateDir = day.format(date);

		System.out.println("Reading arguments ...");
		StringBuffer logBuf = new StringBuffer();
		if (!readArgs(args, logBuf)) {
			throw new Exception("Something wrong with the arguments.");
		}

		// make sure the stop criterion is allright
		SelectionFunction selFunc = new SelectionFunction();
		if (!selFunc.stopIsOkay(stopCriterion)) {
			System.out.println("Stop criterion format is wrong: " + stopCriterion);
			printUsage();
			throw new Exception("Stop criterion format is wrong: " + stopCriterion);
		}

		// make various dirs
		File selectionDir = new File(selectionDirName);
		if (!selectionDir.exists())
			selectionDir.mkdir();
		File dateDirFile = new File(selectionDirName + dateDir);
		if (!dateDirFile.exists())
			dateDirFile.mkdir();

		// open log file
		String filename = selectionDirName + dateDir + "/selectionLog_" + dateString + ".txt";
		try {
			logOut = new PrintWriter(new BufferedWriter(new FileWriter(new File(filename))), true);
		} catch (Exception e) {
			e.printStackTrace();
			throw new Exception("Error opening logfile");
		}
		// print date and arguments to log file
		logOut.println("Date: " + dateString);
		logOut.println(logBuf.toString());

		wikiToDB = new DBHandler(locale);

		// Check if name of selectedSentencesTable has to be changed
		if (selectedSentencesTableName != null)
			wikiToDB.setSelectedSentencesTableName(selectedSentencesTableName);
		else
			System.out.println("Current selected sentences table name = " + selectedSentencesTableName);

		// If connection succeed
		if (wikiToDB.createDBConnection(mysqlHost, mysqlDB, mysqlUser, mysqlPasswd)) {

			/* Read in the feature definition */
			System.out.println("\nLoading feature definition...");
			try {
				BufferedReader uttFeats = new BufferedReader(new InputStreamReader(
						new FileInputStream(new File(featDefFileName)), "UTF-8"));
				featDef = new FeatureDefinition(uttFeats, false);
				uttFeats.close();
				System.out.println("TARGETFEATURES:" + featDef.getNumberOfFeatures() + " =  " + featDef.getFeatureNames());
			} catch (Exception e) {
				e.printStackTrace();
				throw new Exception("Error opening featureDefinition file");
			}

			System.out.println("Getting a list of ids for all the sentences in the DB...");
			System.out.println("(if the number of sentences is large, this can take a while)");
			System.out.println();
			String condition = null;
			if (considerOnlyReliableSentences) {
				condition = "reliable=true";
			}
			CoverageFeatureProvider cfp;
			if (holdVectorsInMemory) {
				/* Load the feature vectors from the database */
				System.out.println("Will also load feature vectors into memory (increase memory if this fails)");
				Pair pair = wikiToDB.getIdsAndFeatureVectors("dbselection", condition);
				int[] sentenceIDs = pair.getFirst();
				byte[][] vectorArray = pair.getSecond();
				cfp = new InMemoryCFProvider(vectorArray, sentenceIDs);
			} else {
				cfp = new DatabaseCFProvider(wikiToDB, condition);
			}

			/* Initialise the coverage definition */
			System.out.println("\nInitiating coverage...");
			CoverageDefinition covDef = new CoverageDefinition(featDef, cfp, covDefConfigFileName);

			// If the selectedSentencesTable is new, (does not exist) then a new table
			// will be created, the selected field in the dbselection table will be initialised to selected=false.
			// The sentences already marke in this db as unwanted=true will be kept.
			wikiToDB.createSelectedSentencesTable(stopCriterion, featDefFileName, covDefConfigFileName);
			// With the information provided by the user
			wikiToDB.setTableDescription(wikiToDB.getSelectedSentencesTableName(), tableDescription, stopCriterion,
					featDefFileName, covDefConfigFileName);

			long startTime = System.currentTimeMillis();
			File covSetFile = new File(initFileName);
			boolean readCovFromFile = true;
			if (!covSetFile.exists()) {
				// coverage has to be initialised
				readCovFromFile = false;
				covDef.initialiseCoverage();
				System.out.println("\nWriting coverage to file " + initFileName);
				covDef.writeCoverageBin(initFileName);
			} else {
				condition = null;
				if (considerOnlyReliableSentences) {
					condition = "reliable=true";
				}
				int[] idSentenceList = wikiToDB.getIdListOfType("dbselection", condition);
				covDef.readCoverageBin(initFileName, idSentenceList);
			}

			/* add already selected sentences to cover */
			System.out.println("\nAdd to cover already selected sentences marked as unwanted=false.");
			selectedIdSents = new LinkedHashSet();
			addSelectedSents(selectedSentencesTableName, covDef);

			/* remove unwanted sentences from basename list */
			System.out.println("\nRemoving selected sentences marked as unwanted=true.");
			unwantedIdSents = new LinkedHashSet();
			removeUnwantedSentences(selectedSentencesTableName);

			long startDuration = System.currentTimeMillis() - startTime;
			if (verbose)
				System.out.println("Startup took " + startDuration + " milliseconds");
			logOut.println("Startup took " + startDuration + " milliseconds");

			/* print text corpus statistics */
			if (!readCovFromFile) {
				// only print if we did not read from file
				filename = selectionDirName + "textcorpus_distribution.txt";
				System.out.println("Printing text corpus statistics to " + filename + "...");
				PrintWriter out = null;
				try {
					out = new PrintWriter(new FileWriter(new File(filename)), true);
					covDef.printTextCorpusStatistics(out);
				} catch (Exception e) {
					e.printStackTrace();
					throw new Exception("Error printing statistics");
				} finally {
					out.close();
				}
			}

			// print settings of the coverage definition to log file
			covDef.printSettings(logOut);

			/* Start the algorithm */
			System.out.println("\nSelecting sentences...");

			// If it is not already running (could happen when SynthesisScriptGUI is used)
			// Start builtin MARY TTS in order to get and save the transcription
			// of the selected sentences (selected_text_transcription.log)
			if (Mary.currentState() == Mary.STATE_OFF) {
				System.out.print("Starting builtin MARY TTS...");
				Mary.startup();
				System.out.println(" MARY TTS started.");
			}

			// selFunc.select(selectedSents,covDef,logOut,basenameList,holdVectorsInMemory,verbose);
			selFunc.select(selectedIdSents, unwantedIdSents, covDef, logOut, cfp, verbose, wikiToDB);

			/* Store list of selected files */
			filename = selectionDirName + dateDir + "/selectionResult_" + dateString + ".txt";
			// storeResult(filename,selectedSents);
			storeResult(filename, selectedIdSents);

			/* print statistics */
			System.out.println("Printing selection distribution and table...");
			String disFile = selectionDirName + dateDir + "/selectionDistribution_" + dateString + ".txt";
			String devFile = selectionDirName + dateDir + "/selectionDevelopment_" + dateString + ".txt";
			try {
				covDef.printSelectionDistribution(disFile, devFile, logCovDevelopment);
			} catch (Exception e) {
				e.printStackTrace();
				throw new Exception("Error printing statistics");
			}

			if (overallLogFile != null) {
				// append results to end of overall log file
				PrintWriter overallLogOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(new File(overallLogFile),
						true), "UTF-8"), true);
				overallLogOut.println("*******************************\n" + "Results for " + dateString + ":");

				// overallLogOut.println("number of basenames "+basenameList.length);
				overallLogOut.println("number of basenames " + cfp.getNumSentences());

				overallLogOut.println("Stop criterion " + stopCriterion);
				covDef.printResultToLog(overallLogOut);
				overallLogOut.close();
			}

			// print timing information
			long elapsedTime = System.currentTimeMillis() - time;
			double minutes = (double) elapsedTime / (double) 1000 / (double) 60;
			System.out.println("Selection took " + minutes + " minutes(" + elapsedTime + " milliseconds)");
			logOut.println("Selection took " + minutes + " minutes (" + elapsedTime + " milliseconds)");
			logOut.flush();
			logOut.close();

			wikiToDB.closeDBConnection();
			System.out.println("All done!");

		} else { // connection did not succeed
			System.out.println("\nERROR: Problems with connection to the DB, please check the mysql parameters.");
			throw new Exception("ERROR: Problems with connection to the DB, please check the mysql parameters.");
		}

	}

	/**
	 * Read and check the command line arguments
	 * 
	 * @param args
	 *            the arguments
	 * @param log
	 *            a StringBufffer for logging
	 * @throws Exception
	 *             Exception
	 * @return true if args can be parsed and all essential args are there, false otherwise
	 */
	private static boolean readArgs(String[] args, StringBuffer log) throws Exception {
		// initialise default values
		String currentDir = System.getProperty("user.dir");
		String maryBaseDir = System.getenv("MARY_BASE");
		System.out.println("Current directory: " + currentDir + "  MARY_BASE=" + maryBaseDir);

		locale = null;
		selectionDirName = null;
		initFileName = null;
		covDefConfigFileName = null;
		featDefFileName = null;
		overallLogFile = null;
		holdVectorsInMemory = true;
		verbose = false;
		logCovDevelopment = false;
		mysqlHost = null;
		mysqlDB = null;
		mysqlUser = null;
		mysqlPasswd = null;
		selectedSentencesTableName = null;
		tableDescription = "";
		considerOnlyReliableSentences = true;
		stopCriterion = null;

		// Default values for
		holdVectorsInMemory = true;
		verbose = false;
		logCovDevelopment = false;

		int i = 0;
		int numEssentialArgs = 0;

		// loop over args
		while (args.length > i) {
			if (args[i].equals("-locale")) {
				if (args.length > i + 1) {
					i++;
					locale = args[i];
					log.append("locale : " + args[i] + "\n");
					System.out.println("  locale : " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No locale.");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-mysqlHost")) {
				if (args.length > i + 1) {
					i++;
					mysqlHost = args[i];
					log.append("mysqlHost : " + args[i] + "\n");
					System.out.println("  mysqlHost : " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No mysqlHost.");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-mysqlDB")) {
				if (args.length > i + 1) {
					i++;
					mysqlDB = args[i];
					log.append("mysqlDB : " + args[i] + "\n");
					System.out.println("  mysqlDB : " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No mysqlDB.");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-mysqlUser")) {
				if (args.length > i + 1) {
					i++;
					mysqlUser = args[i];
					log.append("mysqlUser : " + args[i] + "\n");
					System.out.println("  mysqlUser : " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No mysqlUser.");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-mysqlPasswd")) {
				if (args.length > i + 1) {
					i++;
					mysqlPasswd = args[i];
					log.append("mysqlPasswd : " + args[i] + "\n");
					System.out.println("  mysqlPasswd : " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No mysqlPasswd.");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-featDef")) {
				if (args.length > i + 1) {
					i++;
					featDefFileName = args[i];
					log.append("FeatDefFileName : " + args[i] + "\n");
					System.out.println("  FeatDefFileName : " + args[i]);
				} else {
					System.out.println("No featDef file");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-initFile")) {
				if (args.length > i + 1) {
					i++;
					initFileName = args[i];
					log.append("initFile : " + args[i] + "\n");
					System.out.println("  initFile : " + args[i]);
				} else {
					System.out.println("No initFile");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-tableName")) {
				if (args.length > i + 1) {
					i++;
					selectedSentencesTableName = args[i];
					log.append("selectedSentencesTable name : " + args[i] + "\n");
					System.out.println("  selectedSentencesTable name: " + args[i]);
					numEssentialArgs++;
				} else {
					System.out.println("No selectedSentencesTable name");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-tableDescription")) {
				if (args.length > i + 1) {
					i++;
					tableDescription = args[i];
					log.append("tableDescription : " + args[i] + "\n");
					System.out.println("  tableDescription: " + args[i]);
				} else {
					System.out.println("No tableDescription");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-vectorsOnDisk")) {
				holdVectorsInMemory = false;
				log.append("vectorsOnDisk");
				System.out.println("  vectorsOnDisk");
				i++;
				continue;
			}
			if (args[i].equals("-verbose")) {
				verbose = true;
				log.append("verbose");
				System.out.println("  verbose");
				i++;
				continue;
			}
			if (args[i].equals("-logCoverageDevelopment")) {
				logCovDevelopment = true;
				log.append("logCoverageDevelopment");
				System.out.println("  logCoverageDevelopment");
				i++;
				continue;
			}
			if (args[i].equals("-selectionDir")) {
				if (args.length > i + 1) {
					i++;
					selectionDirName = args[i];
					// make sure we have a slash at the end
					char lastChar = selectionDirName.charAt(selectionDirName.length() - 1);
					if (Character.isLetterOrDigit(lastChar)) {
						selectionDirName = selectionDirName + "/";
					}
					log.append("selectionDir : " + args[i] + "\n");
					System.out.println("  selectionDir : " + args[i]);
				} else {
					System.out.println("No selectionDir");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-coverageConfig")) {
				if (args.length > i + 1) {
					i++;
					covDefConfigFileName = args[i];
					log.append("coverageConfig : " + args[i] + "\n");
					System.out.println("  coverageConfig : " + args[i]);
				} else {
					System.out.println("No coverageConfig");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			if (args[i].equals("-stop")) {
				StringBuilder tmp = new StringBuilder();
				i++;
				while (args.length > i) {
					if (args[i].startsWith("-"))
						break;
					tmp.append(args[i] + " ");
					i++;
				}
				stopCriterion = tmp.toString();
				log.append("stop criterion : " + stopCriterion + "\n");
				System.out.println("  stop criterion : " + stopCriterion);
				continue;
			}
			if (args[i].equals("-overallLog")) {
				if (args.length > i + 1) {
					i++;
					overallLogFile = args[i];
					log.append("overallLogFile : " + args[i] + "\n");
					System.out.println("  overallLogFile : " + args[i]);
				} else {
					System.out.println("No overall log file");
					printUsage();
					return false;
				}
				i++;
				continue;
			}
			/*
			 * It is currently not possible to use unreliable sentences. The place where this can be influenced is the
			 * FeatureMaker, in its setting "" if (args[i].equals("-reliableOnly")) { // optionally, request that only "reliable"
			 * sentences be used in selection considerOnlyReliableSentences = true; log.append("using only reliable sentences\n");
			 * System.out.println("using only reliable sentences"); i++; continue; }
			 */
			i++;
		}
		System.out.println();
		if (numEssentialArgs < 6) {
			// not all essential arguments were given
			System.out.println("You must at least specify locale, mysql (host,user,paswd,DB), selectedSentencesTableName");
			printUsage();
			return false;
		}
		if (selectedSentencesTableName == null) {
			System.out.println("Please provide a name for the selectedSentencesTable.");
			printUsage();
			return false;
		}

		if (stopCriterion == null) {
			stopCriterion = "numSentences 90 simpleDiphones simpleProsody";
		}

		if (selectionDirName == null) {
			selectionDirName = currentDir + "/selection/";
		}
		if (initFileName == null) {
			initFileName = currentDir + "/init.bin";
		}
		if (overallLogFile == null) {
			overallLogFile = currentDir + "/overallLog.txt";
		}

		if (featDefFileName == null) {
			// check first if there exists one in the current directory
			// if not ask the user to provide one, it should have been automatically generated by the FeatureMaker in previous
			// step
			// See: http://mary.opendfki.de/wiki/NewLanguageSupport step 5
			System.out.println("Checking if there is [locale]_featureDefinition.txt in the current directory");
			File feaDef = new File(currentDir + "/" + locale + "_featureDefinition.txt");
			if (feaDef.exists()) {
				System.out.println("Using " + locale + "_featureDefinition.txt in current directory.");
				featDefFileName = currentDir + "/" + locale + "_featureDefinition.txt";
			} else
				System.out
						.println("Please provide a [locale]_featureDefinition.txt, it should have been generated by the FeatureMaker. \n"
								+ " See: http://mary.opendfki.de/wiki/NewLanguageSupport step 5.");
		}

		if (covDefConfigFileName == null) {
			// check if there is already a covDef.config file in the current directory
			// if not then copy the default covDef.config from jar archive resource (marytts/tools/dbselection/covDef.config)
			System.out.println("\nChecking if there is already a covDef.config in the current directory");
			File covDef = new File(currentDir + "/covDef.config");
			if (covDef.exists())
				System.out.println("Using covDef.config in current directory.");
			else {
				System.out.println("Copying default covDef.config file from archive");
				FileUtils.copyInputStreamToFile(DatabaseSelector.class.getResourceAsStream("covDef.config"), covDef);
			}
			covDefConfigFileName = currentDir + "/covDef.config";
			System.out.println("covDefConfigFileName = " + covDefConfigFileName);

		}

		return true;
	}

	/**
	 * Print usage of main method to standard out
	 */
	private static void printUsage() {

		System.out
				.println("\nUsage: "
						+ "Usage: java DatabaseSelector -locale language -mysqlHost host -mysqlUser user -mysqlPasswd passwd -mysqlDB wikiDB\n"
						+ "-tableName selectedSentencesTableName \n"
						+ " [-stop stopCriterion]\n"
						+ " [-featDef file -coverageConfig file]\n"
						+ " [-initFile file -selectedSentences file -unwantedSentences file ]\n"
						+ " [-tableDescription a brief description of the table ]\n"
						+ " [-vectorsOnDisk -overallLog file -selectionDir dir -logCoverageDevelopment -verbose]\n"
						+ " Arguments:\n"
						+ " -tableName selectedSentencesTableName : The name of a new selection set, change this name when\n"
						+ "     generating several selection sets. FINAL name will be: \"locale_name_selectedSenteces\". \n"
						+ "     where name is the name provided for the selected sentences table.\n"
						+ " -tableDescription : short description of the selected sentences table.\n"
						+ "     Default: empty\n"
						+ " -featDef file : The feature definition for the features\n"
						+ "     Default: [locale]_featureDefinition.txt for example for US English: en_US_featureDefinition.txt\n"
						+ "             this file is automatically created in previous steps by the FeatureMaker.\n"
						+ " -stop stopCriterion : which stop criterion to use. There are five stop criteria. \n"
						+ "     They can be used individually or can be combined:\n"
						+ "     - numSentences n : selection stops after n sentences\n"
						+ "     - simpleDiphones : selection stops when simple diphone coverage has reached maximum\n"
						+ "     - simpleProsody : selection stops when simple prosody coverage has reached maximum\n"
						+ "     Default: \"numSentences 90 simpleDiphones simpleProsody\"\n"
						+ " -coverageConfig file : The config file for the coverage definition. \n"
						+ "     Default: there is a default coverage config file in MARY_BASE/resources/marytts/tools/dbselection/covDef.config\n"
						+ "              this file will be copied to the current directory if no file is provided.\n"
						+ " -initFile file : The file containing the coverage data needed to initialise the algorithm.\n"
						+ "     Default: /current_dir/init.bin\n"
						+ " -overallLog file : Log file for all runs of the program: date, settings and results of the current\n"
						+ "     run are appended to the end of the file. This file is needed if you want to analyse your results \n"
						+ "     with the ResultAnalyser later.\n"
						+ " -selectionDir dir : the directory where all selection data is stored.\n"
						+ "     Default: /current_dir/selection\n"
						+ " -vectorsOnDisk: if this option is given, the feature vectors are not loaded into memory during\n"
						+ "     the run of the program. This notably slows down the run of the program!\n"
						+ "     Default: no vectorsOnDisk\n"
						+ " -logCoverageDevelopment : If this option is given, the coverage development over time is stored.\n"
						+ "     Default: no logCoverageDevelopment\n"
						+ " -verbose : If this option is given, there will be more output on the command line during the run of the program.\n"
						+ "     Default: no verbose\n");

	}

	/***
	 * Manual selection of wanted/unwanted selected sentences
	 *
	 */
	private static void checkSelectedSentences() {
		InputStreamReader isr = new InputStreamReader(System.in);
		BufferedReader br = new BufferedReader(isr);

		try {
			System.out.println("\nChecking selected sentences whether they are wanted or not.");
			System.out.println(" selected sentences will be saved in ./selected.log");
			PrintWriter selectedLog = new PrintWriter(new FileWriter(new File("./selected.log")));

			System.out.println(" selected sentences and transcriptions will be saved in  ./selected_text_transcription.log");
			PrintWriter selected_tra_Log = new PrintWriter(new FileWriter(new File("./selected_text_transcription.log")));

			System.out.println(" unwanted sentences will be saved in ./unwanted.log");
			PrintWriter unwantedLog = new PrintWriter(new FileWriter(new File("./unwanted.log")));

			int sel[] = wikiToDB.getIdListOfType("dbselection", "selected=true and unwanted=false");

			if (sel != null) {
				// checking selected sentences
				System.out
						.println(" Select \"y\" for marking sentence as \"wanted\" otherwise \"n\" . Press any other key to finish: \n");
				String str;
				for (int i = 0; i < sel.length; i++) {
					str = wikiToDB.getSelectedSentence(wikiToDB.getSelectedSentencesTableName(), sel[i]);
					System.out.print("id=" + sel[i] + ":  " + str + "\n  Wanted?(y/n):");

					String s = br.readLine();
					if (s.contentEquals("n")) {
						wikiToDB.setSentenceRecord(sel[i], "unwanted", true);
						unwantedLog.println(sel[i] + " " + str);
					} else if (s.contentEquals("y")) {
						selectedLog.println(sel[i] + " " + str);

						selected_tra_Log.println(sel[i] + " " + str);
						selected_tra_Log.println(sel[i] + " <" + SelectionFunction.transcribe(str, locale) + ">");
					} else {
						unwantedLog.close();
						selectedLog.close();

						selected_tra_Log.close();

						break;
					}
				}
			} else
				System.out.println("There is no selected sentences in the DB.");

		} catch (Exception e) {
			System.out.println(e);
		}

	}

	/**
	 * Add a list of sentences to the cover Here the already selected sentences are added to the cover and the indexes removed (or
	 * set to -1) in the idSentenceList
	 * 
	 * @param tableName
	 *            tableName
	 * @param covDef
	 *            the cover
	 * @throws Exception
	 *             Exception
	 */
	private static void addSelectedSents(String tableName, CoverageDefinition covDef) throws Exception {

		if (verbose)
			System.out.println("\nAdding previously selected sentences ...");
		int idSentenceListSelected[] = wikiToDB.getIdListOfSelectedSentences(wikiToDB.getSelectedSentencesTableName(),
				"unwanted=false");
		int id;
		byte[] vectorBuf;
		if (idSentenceListSelected != null) {
			for (int i = 0; i < idSentenceListSelected.length; i++) {
				id = idSentenceListSelected[i];
				vectorBuf = wikiToDB.getFeatures(id);

				// fill the cover set with the sentence
				covDef.updateCover(vectorBuf);

				// add the filename to the sentence list
				selectedIdSents.add((Integer) id);

			}

			/*
			 * int numSelectedSents = selectedIdSents.size(); int numRemovedSents = 0;
			 * 
			 * //loop over basename array // No need to mark id negative for (int i=0;i selected) {

		PrintWriter out;
		try {
			out = new PrintWriter(new FileWriter(new File(filename)));
		} catch (Exception e) {
			e.printStackTrace();
			throw new Error("Error storing result");
		}
		for (int sel : selected) {
			out.println(sel);
		}
		out.flush();
		out.close();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy