com.expleague.ml.cli.modes.impl.CreateDictionary Maven / Gradle / Ivy
package com.expleague.ml.cli.modes.impl;
import com.expleague.commons.io.StreamTools;
import com.expleague.commons.io.codec.seq.DictExpansion;
import com.expleague.commons.io.codec.seq.DynamicDictionary;
import com.expleague.commons.seq.CharSeq;
import com.expleague.commons.seq.CharSeqTools;
import com.expleague.ml.cli.modes.AbstractMode;
import org.apache.commons.cli.CommandLine;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
/**
* Experts League
* Created by solar on 11.09.17.
*/
public class CreateDictionary extends AbstractMode {
int linesCount = 0;
@Override
public void run(CommandLine command) throws Exception {
final InputStreamReader reader = new InputStreamReader(System.in, StreamTools.UTF);
final LineNumberReader lnr = new LineNumberReader(reader);
final String alphaStr = lnr.readLine();
final DictExpansion result = new DictExpansion(
new DynamicDictionary<>(CharSeq.create(alphaStr)),
Integer.parseInt(command.getOptionValue('n', "1000"))
);
CharSeqTools.lines(reader, false)
.forEach(line -> {
linesCount++;
result.accept(line);
if (linesCount % 10000 == 0) {
try {
final FileWriter writer = new FileWriter(command.getOptionValue('o', "output.dict.temp"));
writer.append("After ").append(Integer.toString(linesCount)).append(" lines\n");
result.print(writer);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
});
result.print(new FileWriter(command.getOptionValue('o', "output.dict")));
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy