All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.until.keyword.dictionary.DartsDictionaryStandardization Maven / Gradle / Ivy

There is a newer version: 2.0.3
Show newest version
package org.until.keyword.dictionary;

import org.until.keyword.criterion.CharStandardization;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;


/**
 * @author mega
 */
public class DartsDictionaryStandardization {

	public static void main(String[] args) {
		if (args == null)
			return;
		try {
			Pattern p = Pattern.compile("\\\\u[0-9abcdefABCDEF]{4}");
			for (String filename : args) {
				BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF8"));
				String phrase;
				Set phrases = new TreeSet();
				Set unicodePhrases = new TreeSet();
				Set ignorePhrases = new TreeSet();
				while ((phrase = in.readLine()) != null) {
					phrase = CharStandardization.compositeTextConvert(phrase, false, true, true, false, false, true,
							false);
					if (phrase.length() == 0)
						continue;
					if (phrase.startsWith("#")) {
						ignorePhrases.add(phrase.trim());
					} else if (p.matcher(phrase).matches()) {
						unicodePhrases.add(phrase);
					} else {
						phrases.add(phrase.trim());
					}
				}
				in.close();
				BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF8"));
				for (String s : unicodePhrases) {
					out.write(s);
					out.write("\r\n");
				}
				for (String s : phrases) {
					out.write(s);
					out.write("\r\n");
				}
				for (String s : ignorePhrases) {
					out.write(s);
					out.write("\r\n");
				}
				out.flush();
				out.close();
				System.out.println(filename + " dictionary standardize successful");
			}
		} catch (Exception e) {
			e.printStackTrace();
			System.exit(1);
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy