Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package edu.stanford.nlp.sequences;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;
import lv.semti.morphology.attributes.AttributeNames;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.optimization.StochasticCalculateMethods;
import edu.stanford.nlp.process.WordShapeClassifier;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.ReflectionLoading;
/**
* Flags for sequence classifiers. Documentation for general flags and flags for
* NER can be found in the Javadoc of
* {@link edu.stanford.nlp.ie.NERFeatureFactory}. Documentation for the flags
* for Chinese word segmentation can be found in the Javadoc of
* {@link edu.stanford.nlp.wordseg.ChineseSegmenterFeatureFactory}.
*
*
* IMPORTANT NOTE IF CHANGING THIS FILE:MAKE SURE TO ONLY ADD NEW
* VARIABLES AT THE END OF THE LIST OF VARIABLES (and not to change existing
* variables)! Otherwise you usually break all currently serialized
* classifiers!!! Search for "ADD VARIABLES ABOVE HERE" below.
*
* Some general flags are described here
*
*
*
Property Name
*
Type
*
Default Value
*
Description
*
*
*
useQN
*
boolean
*
true
*
Use Quasi-Newton (L-BFGS) to find minimum. NOTE: Need to set this to
* false if using other minimizers such as SGD.
*
*
*
QNsize
*
int
*
25
*
Number of previous iterations of Quasi-Newton to store (this increases
* memory use, but speeds convergence by letting the Quasi-Newton optimization
* more effectively approximate the second derivative).
*
*
*
QNsize2
*
int
*
25
*
Number of previous iterations of Quasi-Newton to store (used when pruning
* features, after the first iteration - the first iteration is with QNSize).
*
*
*
useInPlaceSGD
*
boolean
*
false
*
Use SGD (tweaking weights in place) to find minimum (more efficient than
* the old SGD, faster to converge than Quasi-Newtown if there are very large of
* samples). Implemented for CRFClassifier. NOTE: Remember to set useQN to false
*
*
*
*
tuneSampleSize
*
int
*
-1
*
If this number is greater than 0, specifies the number of samples to use
* for tuning (default is 1000).
*
*
*
SGDPasses
*
int
*
-1
*
If this number is greater than 0, specifies the number of SGD passes over
* entire training set) to do before giving up (default is 50). Can be smaller
* if sample size is very large.
*
*
*
useSGD
*
boolean
*
false
*
Use SGD to find minimum (can be slow). NOTE: Remember to set useQN to
* false
*
*
*
useSGDtoQN
*
boolean
*
false
*
Use SGD (SGD version selected by useInPlaceSGD or useSGD) for a certain
* number of passes (SGDPasses) and then switches to QN. Gives the quick initial
* convergence of SGD, with the desired convergence criterion of QN (there is
* some rampup time for QN). NOTE: Remember to set useQN to false
*
*
*
evaluateIters
*
int
*
0
*
If this number is greater than 0, evaluates on the test set every so
* often while minimizing. Implemented for CRFClassifier.
*
*
*
evalCmd
*
String
*
*
If specified (and evaluateIters is set), runs the specified cmdline
* command during evaluation (instead of default CONLL-like NER evaluation)
*
*
*
evaluateTrain
*
boolean
*
false
*
If specified (and evaluateIters is set), also evaluate on training set
* (can be expensive)
*
*
*
tokenizerOptions
String
*
(null)
*
Extra options to supply to the tokenizer when creating it.
*
*
*
* @author Jenny Finkel
*/
public class SeqClassifierFlags implements Serializable {
private static final long serialVersionUID = -7076671761070232567L;
public static final String DEFAULT_BACKGROUND_SYMBOL = "O";
private String stringRep = "";
/**
* AZ
*/
public boolean useMorphologyFeatures = false;
public boolean useMorphoCase = false;
public boolean useMorphoPOS = false;
public boolean useMorphoLetaLemma = false;
public boolean useMorphoNumber = false;
public boolean useNGrams = false;
public boolean conjoinShapeNGrams = false;
public boolean lowercaseNGrams = false;
public boolean dehyphenateNGrams = false;
public boolean usePrev = false;
public boolean useNext = false;
public boolean useTags = false;
public boolean useWordPairs = false;
public boolean useGazettes = false;
public boolean useSequences = true;
public boolean usePrevSequences = false;
public boolean useNextSequences = false;
public boolean useLongSequences = false;
public boolean useBoundarySequences = false;
public boolean useTaggySequences = false;
public boolean useExtraTaggySequences = false;
public boolean dontExtendTaggy = false;
public boolean useTaggySequencesShapeInteraction = false;
public boolean strictlyZeroethOrder = false;
public boolean strictlyFirstOrder = false;
public boolean strictlySecondOrder = false;
public boolean strictlyThirdOrder = false;
public String entitySubclassification = "IO";
public boolean retainEntitySubclassification = false;
public boolean useGazettePhrases = false;
public boolean makeConsistent = false;
public boolean useWordLabelCounts = false;
// boolean usePrevInstanceLabel = false;
// boolean useNextInstanceLabel = false;
public boolean useViterbi = true;
public int[] binnedLengths = null;
public boolean verboseMode = false;
public boolean useSum = false;
public double tolerance = 1e-4;
// Turned on if non-null. Becomes part of the filename features are printed to
public String printFeatures = null;
public boolean useSymTags = false;
/**
* useSymWordPairs Has a small negative effect.
*/
public boolean useSymWordPairs = false;
public String printClassifier = "WeightHistogram";
public int printClassifierParam = 100;
public boolean intern = false;
public boolean intern2 = false;
public boolean selfTest = false;
public boolean sloppyGazette = false;
public boolean cleanGazette = false;
public boolean noMidNGrams = false;
public int maxNGramLeng = -1;
public boolean useReverse = false;
public boolean greekifyNGrams = false;
public boolean useParenMatching = false;
public boolean useLemmas = false;
public boolean usePrevNextLemmas = false;
public boolean normalizeTerms = false;
public boolean normalizeTimex = false;
public boolean useNB = false;
public boolean useQN = true;
public boolean useFloat = false;
public int QNsize = 25;
public int QNsize2 = 25;
public int maxIterations = -1;
public int wordShape = WordShapeClassifier.NOWORDSHAPE;
public boolean useShapeStrings = false;
public boolean useTypeSeqs = false;
public boolean useTypeSeqs2 = false;
public boolean useTypeSeqs3 = false;
public boolean useDisjunctive = false;
public int disjunctionWidth = 4;
public boolean useDisjunctiveShapeInteraction = false;
public boolean useDisjShape = false;
public boolean useWord = true; // ON by default
public boolean useClassFeature = false;
public boolean useShapeConjunctions = false;
public boolean useWordTag = false;
public boolean useNPHead = false;
public boolean useNPGovernor = false;
public boolean useHeadGov = false;
public boolean useLastRealWord = false;
public boolean useNextRealWord = false;
public boolean useOccurrencePatterns = false;
public boolean useTypeySequences = false;
public boolean justify = false;
public boolean normalize = false;
public String priorType = "QUADRATIC";
public double sigma = 1.0;
public double epsilon = 0.01;
public int beamSize = 30;
public int maxLeft = 2;
public int maxRight = 0;
public boolean usePosition = false;
public boolean useBeginSent = false;
public boolean useGazFeatures = false;
public boolean useMoreGazFeatures = false;
public boolean useAbbr = false;
public boolean useMinimalAbbr = false;
public boolean useAbbr1 = false;
public boolean useMinimalAbbr1 = false;
public boolean useMoreAbbr = false;
public boolean deleteBlankLines = false;
public boolean useGENIA = false;
public boolean useTOK = false;
public boolean useABSTR = false;
public boolean useABSTRFreqDict = false;
public boolean useABSTRFreq = false;
public boolean useFREQ = false;
public boolean useABGENE = false;
public boolean useWEB = false;
public boolean useWEBFreqDict = false;
public boolean useIsURL = false;
public boolean useURLSequences = false;
public boolean useIsDateRange = false;
public boolean useEntityTypes = false;
public boolean useEntityTypeSequences = false;
public boolean useEntityRule = false;
public boolean useOrdinal = false;
public boolean useACR = false;
public boolean useANTE = false;
public boolean useMoreTags = false;
public boolean useChunks = false;
public boolean useChunkySequences = false;
public boolean usePrevVB = false;
public boolean useNextVB = false;
public boolean useVB = false;
public boolean subCWGaz = false;
public String documentReader = "ColumnDocumentReader"; // TODO OBSOLETE:
// delete when breaking
// serialization
// sometime.
// public String trainMap = "word=0,tag=1,answer=2";
// public String testMap = "word=0,tag=1,answer=2";
public String map = "word=0,tag=1,answer=2";
public boolean useWideDisjunctive = false;
public int wideDisjunctionWidth = 10;
// chinese word-segmenter features
public boolean useRadical = false;
public boolean useBigramInTwoClique = false;
public String morphFeatureFile = null;
public boolean useReverseAffix = false;
public int charHalfWindow = 3;
public boolean useWord1 = false;
public boolean useWord2 = false;
public boolean useWord3 = false;
public boolean useWord4 = false;
public boolean useRad1 = false;
public boolean useRad2 = false;
public boolean useWordn = false;
public boolean useCTBPre1 = false;
public boolean useCTBSuf1 = false;
public boolean useASBCPre1 = false;
public boolean useASBCSuf1 = false;
public boolean usePKPre1 = false;
public boolean usePKSuf1 = false;
public boolean useHKPre1 = false;
public boolean useHKSuf1 = false;
public boolean useCTBChar2 = false;
public boolean useASBCChar2 = false;
public boolean useHKChar2 = false;
public boolean usePKChar2 = false;
public boolean useRule2 = false;
public boolean useDict2 = false;
public boolean useOutDict2 = false;
public String outDict2 = "/u/htseng/scr/chunking/segmentation/out.lexicon";
public boolean useDictleng = false;
public boolean useDictCTB2 = false;
public boolean useDictASBC2 = false;
public boolean useDictPK2 = false;
public boolean useDictHK2 = false;
public boolean useBig5 = false;
public boolean useNegDict2 = false;
public boolean useNegDict3 = false;
public boolean useNegDict4 = false;
public boolean useNegCTBDict2 = false;
public boolean useNegCTBDict3 = false;
public boolean useNegCTBDict4 = false;
public boolean useNegASBCDict2 = false;
public boolean useNegASBCDict3 = false;
public boolean useNegASBCDict4 = false;
public boolean useNegHKDict2 = false;
public boolean useNegHKDict3 = false;
public boolean useNegHKDict4 = false;
public boolean useNegPKDict2 = false;
public boolean useNegPKDict3 = false;
public boolean useNegPKDict4 = false;
public boolean usePre = false;
public boolean useSuf = false;
public boolean useRule = false;
public boolean useHk = false;
public boolean useMsr = false;
public boolean useMSRChar2 = false;
public boolean usePk = false;
public boolean useAs = false;
public boolean useFilter = false; // TODO this flag is used for nothing;
// delete when breaking serialization
public boolean largeChSegFile = false; // TODO this flag is used for nothing;
// delete when breaking serialization
public boolean useRad2b = false;
/**
* Keep the whitespace between English words in testFile when printing out
* answers. Doesn't really change the content of the CoreLabels. (For Chinese
* segmentation.)
*/
public boolean keepEnglishWhitespaces = false;
/**
* Keep all the whitespace words in testFile when printing out answers.
* Doesn't really change the content of the CoreLabels. (For Chinese
* segmentation.)
*/
public boolean keepAllWhitespaces = false;
public boolean sighanPostProcessing = false;
/**
* use POS information (an "open" feature for Chinese segmentation)
*/
public boolean useChPos = false;
// CTBSegDocumentReader normalization table
// A value of null means that a default algorithmic normalization
// is done in which ASCII characters get mapped to their fullwidth
// equivalents in the Unihan range
public String normalizationTable; // = null;
public String dictionary; // = null;
public String serializedDictionary; // = null;
public String dictionary2; // = null;
public String normTableEncoding = "GB18030";
/**
* for Sighan bakeoff 2005, the path to the dictionary of bigrams appeared in
* corpus
*/
public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/";
// end Sighan 20005 chinese word-segmenter features/properties
public boolean useWordShapeGaz = false;
public String wordShapeGaz = null;
// TODO: This should maybe be removed in favor of suppressing splitting when
// maxDocLength <= 0, when next breaking serialization
public boolean splitDocuments = true;
public boolean printXML = false;
public boolean useSeenFeaturesOnly = false;
public String lastNameList = "/u/nlp/data/dist.all.last";
public String maleNameList = "/u/nlp/data/dist.male.first";
public String femaleNameList = "/u/nlp/data/dist.female.first";
// don't want these serialized
public transient String trainFile = null;
/** NER adaptation (Gaussian prior) parameters. */
public transient String adaptFile = null;
public transient String devFile = null;
public transient String testFile = null;
public transient String textFile = null;
public transient boolean readStdin = false;
public transient String outputFile = null;
public transient String loadClassifier = null;
public transient String loadTextClassifier = null;
public transient String loadJarClassifier = null;
public transient String loadAuxClassifier = null;
public transient String serializeTo = null;
public transient String serializeToText = null;
public transient int interimOutputFreq = 0;
public transient String initialWeights = null;
public transient List gazettes = new ArrayList();
public transient String selfTrainFile = null;
public String inputEncoding = "UTF-8"; // used for CTBSegDocumentReader as
// well
public boolean bioSubmitOutput = false;
public int numRuns = 1;
public String answerFile = null;
public String altAnswerFile = null;
public String dropGaz;
public String printGazFeatures = null;
public int numStartLayers = 1;
public boolean dump = false;
public boolean mergeTags; // whether to merge B- and I- tags
public boolean splitOnHead;
// threshold
public int featureCountThreshold = 0;
public double featureWeightThreshold = 0.0;
// feature factory
public String featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory";
public Object[] featureFactoryArgs = new Object[0];
public String backgroundSymbol = DEFAULT_BACKGROUND_SYMBOL;
// use
public boolean useObservedSequencesOnly = false;
public int maxDocSize = 0;
public boolean printProbs = false;
public boolean printFirstOrderProbs = false;
public boolean saveFeatureIndexToDisk = false;
public boolean removeBackgroundSingletonFeatures = false;
public boolean doGibbs = false;
public int numSamples = 100;
public boolean useNERPrior = false;
public boolean useAcqPrior = false;
/**
* If true and doGibbs also true, will do generic Gibbs inference without any
* priors
*/
public boolean useUniformPrior = false;
public boolean useMUCFeatures = false;
public double annealingRate = 0.0;
public String annealingType = null;
public String loadProcessedData = null;
public boolean initViterbi = true;
public boolean useUnknown = false;
public boolean checkNameList = false;
public boolean useSemPrior = false;
public boolean useFirstWord = false;
public boolean useNumberFeature = false;
public int ocrFold = 0;
public transient boolean ocrTrain = false;
public String classifierType = "MaxEnt";
public String svmModelFile = null;
public String inferenceType = "Viterbi";
public boolean useLemmaAsWord = false;
public String type = "cmm";
public String readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter";
public List comboProps = new ArrayList();
public boolean usePrediction = false;
public boolean useAltGazFeatures = false;
public String gazFilesFile = null;
public boolean usePrediction2 = false;
public String baseTrainDir = ".";
public String baseTestDir = ".";
public String trainFiles = null;
public String trainFileList = null;
public String testFiles = null;
public String trainDirs = null; // cdm 2009: this is currently unsupported,
// but one user wanted something like this....
public String testDirs = null;
public boolean useOnlySeenWeights = false;
public String predProp = null;
public CoreLabel pad = new CoreLabel();
public boolean useObservedFeaturesOnly = false;
public String distSimLexicon = null;
public boolean useDistSim = false;
public int removeTopN = 0;
public int numTimesRemoveTopN = 1;
public double randomizedRatio = 1.0;
public double removeTopNPercent = 0.0;
public int purgeFeatures = -1;
public boolean booleanFeatures = false;
public boolean iobWrapper = false;
public boolean iobTags = false;
public boolean useSegmentation = false; /*
* binary segmentation feature for
* character-based Chinese NER
*/
public boolean memoryThrift = false;
public boolean timitDatum = false;
public String serializeDatasetsDir = null;
public String loadDatasetsDir = null;
public String pushDir = null;
public boolean purgeDatasets = false;
public boolean keepOBInMemory = true;
public boolean fakeDataset = false;
public boolean restrictTransitionsTimit = false;
public int numDatasetsPerFile = 1;
public boolean useTitle = false;
// these are for the old stuff
public boolean lowerNewgeneThreshold = false;
public boolean useEitherSideWord = false;
public boolean useEitherSideDisjunctive = false;
public boolean twoStage = false;
public String crfType = "MaxEnt";
public int featureThreshold = 1;
public String featThreshFile = null;
public double featureDiffThresh = 0.0;
public int numTimesPruneFeatures = 0;
public double newgeneThreshold = 0.0;
public boolean doAdaptation = false;
public boolean useInternal = true;
public boolean useExternal = true;
public double selfTrainConfidenceThreshold = 0.9;
public int selfTrainIterations = 1;
public int selfTrainWindowSize = 1; // Unigram
public boolean useHuber = false;
public boolean useQuartic = false;
public double adaptSigma = 1.0;
public int numFolds = 1;
public int startFold = 1;
public int endFold = 1;
public boolean cacheNGrams = false;
public String outputFormat;
public boolean useSMD = false;
public boolean useSGDtoQN = false;
public boolean useStochasticQN = false;
public boolean useScaledSGD = false;
public int scaledSGDMethod = 0;
public int SGDPasses = -1;
public int QNPasses = -1;
public boolean tuneSGD = false;
public StochasticCalculateMethods stochasticMethod = StochasticCalculateMethods.NoneSpecified;
public double initialGain = 0.1;
public int stochasticBatchSize = 15;
public boolean useSGD = false;
public double gainSGD = 0.1;
public boolean useHybrid = false;
public int hybridCutoffIteration = 0;
public boolean outputIterationsToFile = false;
public boolean testObjFunction = false;
public boolean testVariance = false;
public int SGD2QNhessSamples = 50;
public boolean testHessSamples = false;
public int CRForder = 1;
public int CRFwindow = 2;
public boolean estimateInitial = false;
public transient String biasedTrainFile = null;
public transient String confusionMatrix = null;
public String outputEncoding = null;
public boolean useKBest = false;
public String searchGraphPrefix = null;
public double searchGraphPrune = Double.POSITIVE_INFINITY;
public int kBest = 1;
// more chinese segmenter features for GALE 2007
public boolean useFeaturesC4gram;
public boolean useFeaturesC5gram;
public boolean useFeaturesC6gram;
public boolean useFeaturesCpC4gram;
public boolean useFeaturesCpC5gram;
public boolean useFeaturesCpC6gram;
public boolean useUnicodeType;
public boolean useUnicodeType4gram;
public boolean useUnicodeType5gram;
public boolean use4Clique;
public boolean useUnicodeBlock;
public boolean useShapeStrings1;
public boolean useShapeStrings3;
public boolean useShapeStrings4;
public boolean useShapeStrings5;
public boolean useGoodForNamesCpC;
public boolean useDictionaryConjunctions;
public boolean expandMidDot;
public int printFeaturesUpto; // = 0;
public boolean useDictionaryConjunctions3;
public boolean useWordUTypeConjunctions2;
public boolean useWordUTypeConjunctions3;
public boolean useWordShapeConjunctions2;
public boolean useWordShapeConjunctions3;
public boolean useMidDotShape;
public boolean augmentedDateChars;
public boolean suppressMidDotPostprocessing;
public boolean printNR; // a flag for WordAndTagDocumentReaderAndWriter
public String classBias = null;
public boolean printLabelValue; // Old printErrorStuff
public boolean useRobustQN = false;
public boolean combo = false;
public boolean useGenericFeatures = false;
public boolean verboseForTrueCasing = false;
public String trainHierarchical = null;
public String domain = null;
public boolean baseline = false;
public String transferSigmas = null;
public boolean doFE = false;
public boolean restrictLabels = true;
public boolean announceObjectBankEntries = false; // whether to print a line
// giving each ObjectBank
// entry (usually a
// filename)
// Arabic Subject Detector flags
public boolean usePos = false;
public boolean useAgreement = false;
public boolean useAccCase = false;
public boolean useInna = false;
public boolean useConcord = false;
public boolean useFirstNgram = false;
public boolean useLastNgram = false;
public boolean collapseNN = false;
public boolean useConjBreak = false;
public boolean useAuxPairs = false;
public boolean usePPVBPairs = false;
public boolean useAnnexing = false;
public boolean useTemporalNN = false;
public boolean usePath = false;
public boolean innaPPAttach = false;
public boolean markProperNN = false;
public boolean markMasdar = false;
public boolean useSVO = false;
public int numTags = 3;
public boolean useTagsCpC = false;
public boolean useTagsCpCp2C = false;
public boolean useTagsCpCp2Cp3C = false;
public boolean useTagsCpCp2Cp3Cp4C = false;
public double l1reg = 0.0;
// truecaser flags:
public String mixedCaseMapFile = "";
public String auxTrueCaseModels = "";
// more flags inspired by Zhang and Johnson 2003
public boolean use2W = false;
public boolean useLC = false;
public boolean useYetMoreCpCShapes = false;
// added for the NFL domain
public boolean useIfInteger = false;
public String exportFeatures = null;
public boolean useInPlaceSGD = false;
public boolean useTopics = false;
// Number of iterations before evaluating weights (0 = don't evaluate)
public int evaluateIters = 0;
// Command to use for evaluation
public String evalCmd = "";
// Evaluate on training set or not
public boolean evaluateTrain = false;
// evaluate based on B- and I- begin & inside prefixes
// TODO: it would be useful to serialize the evaluation method
// TODO: what does this have to do with -iobTags
public transient boolean evaluateIOB = false;
public int tuneSampleSize = -1;
public boolean usePhraseFeatures = false;
public boolean usePhraseWords = false;
public boolean usePhraseWordTags = false;
public boolean usePhraseWordSpecialTags = false;
public boolean useCommonWordsFeature = false;
public boolean useProtoFeatures = false;
public boolean useWordnetFeatures = false;
public String tokenFactory = "edu.stanford.nlp.process.CoreLabelTokenFactory";
public Object[] tokenFactoryArgs = new Object[0];
public String tokensAnnotationClassName = "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation";
public transient String tokenizerOptions = null;
public boolean useCorefFeatures = false;
public String wikiFeatureDbFile = null;
// for combining 2 CRFs - one trained from noisy data and another trained from
// non-noisy
public boolean useNoisyNonNoisyFeature = false;
// year annotation of the document
public boolean useYear = false;
public boolean useSentenceNumber = false;
// to know source of the label. Currently, used to know which pattern is used
// to label the token
public boolean useLabelSource = false;
/**
* Whether to (not) lowercase tokens before looking them up in distsim
* lexicon. By default lowercasing was done, but now it doesn't have to be
* true :-).
*/
public boolean casedDistSim = false;
/**
* The format of the distsim file. Known values are: alexClark = TSV file.
* word TAB clusterNumber [optional other content] terryKoo = TSV file.
* clusterBitString TAB word TAB frequency
*/
public String distSimFileFormat = "alexClark";
/**
* If this number is greater than 0, the distSim class is assume to be a bit
* string and is truncated at this many characters. Normal distSim features
* will then use this amount of resolution. Extra, special distsim features
* may work at a coarser level of resolution. Since the lexicon only stores
* this length of bit string, there is then no way to have finer-grained
* clusters.
*/
public int distSimMaxBits = 8;
/**
* If this is set to true, all digit characters get mapped to '9' in a distsim
* lexicon and for lookup. This is a simple word shaping that can shrink
* distsim lexicons and improve their performance.
*/
public boolean numberEquivalenceDistSim = false;
/**
* What class to assign to words not found in the dist sim lexicon. You might
* want to make it a known class, if one is the "default class.
*/
public String unknownWordDistSimClass = "null";
/**
* Use prefixes and suffixes from the previous and next word.
*/
public boolean useNeighborNGrams = false;
/**
* This function maps words in the training or test data to new
* words. They are used at the feature extractor level, ie in the
* FeatureFactory. For now, only the NERFeatureFactory uses this.
*/
public Function wordFunction = null;
public static final String DEFAULT_PLAIN_TEXT_READER = "edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter";
public String plainTextDocumentReaderAndWriter = DEFAULT_PLAIN_TEXT_READER;
/**
* Add features from the automated Latvian morphology analysis
* @author Pēteris Paikens
*/
public boolean useLVMorphoAnalyzer = false;
public String lvMorphoAnalyzerTag = AttributeNames.i_PartOfSpeech;
public boolean useLVMorphoAnalyzerPOS = false;
public boolean useLVMorphoAnalyzerTag = false;
public boolean useLVMorphoAnalyzerNext = false;
public boolean useLVMorphoAnalyzerPrev = false;
public boolean useLVMorphoAnalyzerItemIDs = false;
// "ADD VARIABLES ABOVE HERE"
public transient List phraseGazettes = null;
public transient Properties props = null;
public SeqClassifierFlags() {
}
/**
* Create a new SeqClassifierFlags object and initialize it using values in
* the Properties object. The properties are printed to stderr as it works.
*
* @param props
* The properties object used for initialization
*/
public SeqClassifierFlags(Properties props) {
setProperties(props, true);
}
/**
* Initialize this object using values in Properties object. The properties
* are printed to stderr as it works.
*
* @param props
* The properties object used for initialization
*/
public final void setProperties(Properties props) {
setProperties(props, true);
}
/**
* Initialize using values in Properties file.
*
* @param props
* The properties object used for initialization
* @param printProps
* Whether to print the properties to stderr as it works.
*/
public void setProperties(Properties props, boolean printProps) {
this.props = props;
StringBuilder sb = new StringBuilder(stringRep);
for (Enumeration e = props.propertyNames(); e.hasMoreElements();) {
String key = (String) e.nextElement();
String val = props.getProperty(key);
if (!(key.length() == 0 && val.length() == 0)) {
if (printProps) {
System.err.println(key + '=' + val);
}
sb.append(key).append('=').append(val).append('\n');
}
if (key.equalsIgnoreCase("macro")) {
if (Boolean.parseBoolean(val)) {
useObservedSequencesOnly = true;
readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter";
// useClassFeature = true;
// submit
useLongSequences = true;
useTaggySequences = true;
useNGrams = true;
usePrev = true;
useNext = true;
useTags = true;
useWordPairs = true;
useSequences = true;
usePrevSequences = true;
// noMidNGrams
noMidNGrams = true;
// reverse
useReverse = true;
// typeseqs3
useTypeSeqs = true;
useTypeSeqs2 = true;
useTypeySequences = true;
// wordtypes2 && known
wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC;
// occurrence
useOccurrencePatterns = true;
// realword
useLastRealWord = true;
useNextRealWord = true;
// smooth
sigma = 3.0;
// normalize
normalize = true;
normalizeTimex = true;
}
} else if (key.equalsIgnoreCase("goodCoNLL")) {
if (Boolean.parseBoolean(val)) {
// featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory";
readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter";
useObservedSequencesOnly = true;
// useClassFeature = true;
useLongSequences = true;
useTaggySequences = true;
useNGrams = true;
usePrev = true;
useNext = true;
useTags = true;
useWordPairs = true;
useSequences = true;
usePrevSequences = true;
// noMidNGrams
noMidNGrams = true;
// should this be set?? maxNGramLeng = 6; No (to get best score).
// reverse
useReverse = false;
// typeseqs3
useTypeSeqs = true;
useTypeSeqs2 = true;
useTypeySequences = true;
// wordtypes2 && known
wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC;
// occurrence
useOccurrencePatterns = true;
// realword
useLastRealWord = true;
useNextRealWord = true;
// smooth
sigma = 50.0; // increased Aug 2006 from 20; helpful with less feats
// normalize
normalize = true;
normalizeTimex = true;
maxLeft = 2;
useDisjunctive = true;
disjunctionWidth = 4; // clearly optimal for CoNLL
useBoundarySequences = true;
useLemmas = true; // no-op except for German
usePrevNextLemmas = true; // no-op except for German
inputEncoding = "iso-8859-1"; // needed for CoNLL German files
// opt
useQN = true;
QNsize = 15;
}
} else if (key.equalsIgnoreCase("conllNoTags")) {
if (Boolean.parseBoolean(val)) {
readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter";
// trainMap=testMap="word=0,answer=1";
map = "word=0,answer=1";
useObservedSequencesOnly = true;
// useClassFeature = true;
useLongSequences = true;
// useTaggySequences = true;
useNGrams = true;
usePrev = true;
useNext = true;
// useTags = true;
useWordPairs = true;
useSequences = true;
usePrevSequences = true;
// noMidNGrams
noMidNGrams = true;
// reverse
useReverse = false;
// typeseqs3
useTypeSeqs = true;
useTypeSeqs2 = true;
useTypeySequences = true;
// wordtypes2 && known
wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC;
// occurrence
// useOccurrencePatterns = true;
// realword
useLastRealWord = true;
useNextRealWord = true;
// smooth
sigma = 20.0;
adaptSigma = 20.0;
// normalize
normalize = true;
normalizeTimex = true;
maxLeft = 2;
useDisjunctive = true;
disjunctionWidth = 4;
useBoundarySequences = true;
// useLemmas = true; // no-op except for German
// usePrevNextLemmas = true; // no-op except for German
inputEncoding = "iso-8859-1";
// opt
useQN = true;
QNsize = 15;
}
} else if (key.equalsIgnoreCase("notags")) {
if (Boolean.parseBoolean(val)) {
// turn off all features that use POS tags
// this is slightly crude: it also turns off a few things that
// don't use tags in e.g., useTaggySequences
useTags = false;
useSymTags = false;
useTaggySequences = false;
useOccurrencePatterns = false;
}
} else if (key.equalsIgnoreCase("submit")) {
if (Boolean.parseBoolean(val)) {
useLongSequences = true;
useTaggySequences = true;
useNGrams = true;
usePrev = true;
useNext = true;
useTags = true;
useWordPairs = true;
wordShape = WordShapeClassifier.WORDSHAPEDAN1;
useSequences = true;
usePrevSequences = true;
}
} else if (key.equalsIgnoreCase("binnedLengths")) {
if (val != null) {
String[] binnedLengthStrs = val.split("[, ]+");
binnedLengths = new int[binnedLengthStrs.length];
for (int i = 0; i < binnedLengths.length; i++) {
binnedLengths[i] = Integer.parseInt(binnedLengthStrs[i]);
}
}
} else if (key.equalsIgnoreCase("makeConsistent")) {
makeConsistent = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("dump")) {
dump = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNGrams")) {
useNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNeighborNGrams")) {
useNeighborNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("wordFunction")) {
wordFunction = ReflectionLoading.loadByReflection(val);
} else if (key.equalsIgnoreCase("conjoinShapeNGrams")) {
conjoinShapeNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("lowercaseNGrams")) {
lowercaseNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useIsURL")) {
useIsURL = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useURLSequences")) {
useURLSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useEntityTypes")) {
useEntityTypes = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useEntityRule")) {
useEntityRule = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useOrdinal")) {
useOrdinal = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useEntityTypeSequences")) {
useEntityTypeSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useIsDateRange")) {
useIsDateRange = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("dehyphenateNGrams")) {
dehyphenateNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("lowerNewgeneThreshold")) {
lowerNewgeneThreshold = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePrev")) {
usePrev = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNext")) {
useNext = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTags")) {
useTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordPairs")) {
useWordPairs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useGazettes")) {
useGazettes = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("wordShape")) {
wordShape = WordShapeClassifier.lookupShaper(val);
} else if (key.equalsIgnoreCase("useShapeStrings")) {
useShapeStrings = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useGoodForNamesCpC")) {
useGoodForNamesCpC = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDictionaryConjunctions")) {
useDictionaryConjunctions = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDictionaryConjunctions3")) {
useDictionaryConjunctions3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("expandMidDot")) {
expandMidDot = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSequences")) {
useSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePrevSequences")) {
usePrevSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNextSequences")) {
useNextSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLongSequences")) {
useLongSequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useBoundarySequences")) {
useBoundarySequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTaggySequences")) {
useTaggySequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useExtraTaggySequences")) {
useExtraTaggySequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTaggySequencesShapeInteraction")) {
useTaggySequencesShapeInteraction = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("strictlyZeroethOrder")) {
strictlyZeroethOrder = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("strictlyFirstOrder")) {
strictlyFirstOrder = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("strictlySecondOrder")) {
strictlySecondOrder = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("strictlyThirdOrder")) {
strictlyThirdOrder = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("dontExtendTaggy")) {
dontExtendTaggy = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("entitySubclassification")) {
entitySubclassification = val;
} else if (key.equalsIgnoreCase("useGazettePhrases")) {
useGazettePhrases = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("phraseGazettes")) {
StringTokenizer st = new StringTokenizer(val, " ,;\t");
if (phraseGazettes == null) {
phraseGazettes = new ArrayList();
}
while (st.hasMoreTokens()) {
phraseGazettes.add(st.nextToken());
}
} else if (key.equalsIgnoreCase("useSum")) {
useSum = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("verboseMode")) {
verboseMode = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("tolerance")) {
tolerance = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("maxIterations")) {
maxIterations = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("exportFeatures")) {
exportFeatures = val;
} else if (key.equalsIgnoreCase("printFeatures")) {
printFeatures = val;
} else if (key.equalsIgnoreCase("printFeaturesUpto")) {
printFeaturesUpto = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("lastNameList")) {
lastNameList = val;
} else if (key.equalsIgnoreCase("maleNameList")) {
maleNameList = val;
} else if (key.equalsIgnoreCase("femaleNameList")) {
femaleNameList = val;
} else if (key.equalsIgnoreCase("useSymTags")) {
useSymTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSymWordPairs")) {
useSymWordPairs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("printClassifier")) {
printClassifier = val;
} else if (key.equalsIgnoreCase("printClassifierParam")) {
printClassifierParam = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("intern")) {
intern = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("mergetags")) {
mergeTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("iobtags")) {
iobTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useViterbi")) {
useViterbi = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("intern2")) {
intern2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("selfTest")) {
selfTest = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("sloppyGazette")) {
sloppyGazette = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("cleanGazette")) {
cleanGazette = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("noMidNGrams")) {
noMidNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useReverse")) {
useReverse = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("retainEntitySubclassification")) {
retainEntitySubclassification = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLemmas")) {
useLemmas = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePrevNextLemmas")) {
usePrevNextLemmas = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("normalizeTerms")) {
normalizeTerms = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("normalizeTimex")) {
normalizeTimex = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNB")) {
useNB = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useParenMatching")) {
useParenMatching = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTypeSeqs")) {
useTypeSeqs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTypeSeqs2")) {
useTypeSeqs2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTypeSeqs3")) {
useTypeSeqs3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDisjunctive")) {
useDisjunctive = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("disjunctionWidth")) {
disjunctionWidth = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useDisjunctiveShapeInteraction")) {
useDisjunctiveShapeInteraction = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWideDisjunctive")) {
useWideDisjunctive = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("wideDisjunctionWidth")) {
wideDisjunctionWidth = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useDisjShape")) {
useDisjShape = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTitle")) {
useTitle = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("booleanFeatures")) {
booleanFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useClassFeature")) {
useClassFeature = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useShapeConjunctions")) {
useShapeConjunctions = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordTag")) {
useWordTag = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNPHead")) {
useNPHead = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNPGovernor")) {
useNPGovernor = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useHeadGov")) {
useHeadGov = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLastRealWord")) {
useLastRealWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNextRealWord")) {
useNextRealWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useOccurrencePatterns")) {
useOccurrencePatterns = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTypeySequences")) {
useTypeySequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("justify")) {
justify = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("normalize")) {
normalize = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("priorType")) {
priorType = val;
} else if (key.equalsIgnoreCase("sigma")) {
sigma = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("epsilon")) {
epsilon = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("beamSize")) {
beamSize = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("removeTopN")) {
removeTopN = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("removeTopNPercent")) {
removeTopNPercent = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("randomizedRatio")) {
randomizedRatio = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("numTimesRemoveTopN")) {
numTimesRemoveTopN = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("maxLeft")) {
maxLeft = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("maxRight")) {
maxRight = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("maxNGramLeng")) {
maxNGramLeng = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useGazFeatures")) {
useGazFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAltGazFeatures")) {
useAltGazFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMoreGazFeatures")) {
useMoreGazFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbbr")) {
useAbbr = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMinimalAbbr")) {
useMinimalAbbr = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbbr1")) {
useAbbr1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMinimalAbbr1")) {
useMinimalAbbr1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("documentReader")) {
System.err.println("You are using an outdated flag: -documentReader " + val);
System.err.println("Please use -readerAndWriter instead.");
} else if (key.equalsIgnoreCase("deleteBlankLines")) {
deleteBlankLines = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("answerFile")) {
answerFile = val;
} else if (key.equalsIgnoreCase("altAnswerFile")) {
altAnswerFile = val;
} else if (key.equalsIgnoreCase("loadClassifier")) {
loadClassifier = val;
} else if (key.equalsIgnoreCase("loadTextClassifier")) {
loadTextClassifier = val;
} else if (key.equalsIgnoreCase("loadJarClassifier")) {
loadJarClassifier = val;
} else if (key.equalsIgnoreCase("loadAuxClassifier")) {
loadAuxClassifier = val;
} else if (key.equalsIgnoreCase("serializeTo")) {
serializeTo = val;
} else if (key.equalsIgnoreCase("serializeToText")) {
serializeToText = val;
} else if (key.equalsIgnoreCase("serializeDatasetsDir")) {
serializeDatasetsDir = val;
} else if (key.equalsIgnoreCase("loadDatasetsDir")) {
loadDatasetsDir = val;
} else if (key.equalsIgnoreCase("pushDir")) {
pushDir = val;
} else if (key.equalsIgnoreCase("purgeDatasets")) {
purgeDatasets = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("keepOBInMemory")) {
keepOBInMemory = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("fakeDataset")) {
fakeDataset = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("numDatasetsPerFile")) {
numDatasetsPerFile = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("trainFile")) {
trainFile = val;
} else if (key.equalsIgnoreCase("biasedTrainFile")) {
biasedTrainFile = val;
} else if (key.equalsIgnoreCase("classBias")) {
classBias = val;
} else if (key.equalsIgnoreCase("confusionMatrix")) {
confusionMatrix = val;
} else if (key.equalsIgnoreCase("adaptFile")) {
adaptFile = val;
} else if (key.equalsIgnoreCase("devFile")) {
devFile = val;
} else if (key.equalsIgnoreCase("testFile")) {
testFile = val;
} else if (key.equalsIgnoreCase("outputFile")) {
outputFile = val;
} else if (key.equalsIgnoreCase("textFile")) {
textFile = val;
} else if (key.equalsIgnoreCase("readStdin")) {
readStdin = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("initialWeights")) {
initialWeights = val;
} else if (key.equalsIgnoreCase("interimOutputFreq")) {
interimOutputFreq = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("inputEncoding")) {
inputEncoding = val;
} else if (key.equalsIgnoreCase("outputEncoding")) {
outputEncoding = val;
} else if (key.equalsIgnoreCase("gazette")) {
useGazettes = true;
StringTokenizer st = new StringTokenizer(val, " ,;\t");
if (gazettes == null) {
gazettes = new ArrayList();
} // for after deserialization, as gazettes is transient
while (st.hasMoreTokens()) {
gazettes.add(st.nextToken());
}
} else if (key.equalsIgnoreCase("useQN")) {
useQN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("QNsize")) {
QNsize = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("QNsize2")) {
QNsize2 = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("l1reg")) {
useQN = false;
l1reg = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("useFloat")) {
useFloat = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("trainMap")) {
System.err.println("trainMap and testMap are no longer valid options - please use map instead.");
throw new RuntimeException();
} else if (key.equalsIgnoreCase("testMap")) {
System.err.println("trainMap and testMap are no longer valid options - please use map instead.");
throw new RuntimeException();
} else if (key.equalsIgnoreCase("map")) {
map = val;
} else if (key.equalsIgnoreCase("useMoreAbbr")) {
useMoreAbbr = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePrevVB")) {
usePrevVB = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNextVB")) {
useNextVB = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useVB")) {
if (Boolean.parseBoolean(val)) {
useVB = true;
usePrevVB = true;
useNextVB = true;
}
} else if (key.equalsIgnoreCase("useChunks")) {
useChunks = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useChunkySequences")) {
useChunkySequences = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("greekifyNGrams")) {
greekifyNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("restrictTransitionsTimit")) {
restrictTransitionsTimit = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMoreTags")) {
useMoreTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useBeginSent")) {
useBeginSent = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePosition")) {
usePosition = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useGenia")) {
useGENIA = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbstr")) {
useABSTR = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWeb")) {
useWEB = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAnte")) {
useANTE = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAcr")) {
useACR = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTok")) {
useTOK = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbgene")) {
useABGENE = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbstrFreqDict")) {
useABSTRFreqDict = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAbstrFreq")) {
useABSTRFreq = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFreq")) {
useFREQ = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usewebfreqdict")) {
useWEBFreqDict = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("bioSubmitOutput")) {
bioSubmitOutput = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("subCWGaz")) {
subCWGaz = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("splitOnHead")) {
splitOnHead = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("featureCountThreshold")) {
featureCountThreshold = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useWord")) {
useWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("memoryThrift")) {
memoryThrift = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("timitDatum")) {
timitDatum = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("splitDocuments")) {
splitDocuments = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("featureWeightThreshold")) {
featureWeightThreshold = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("backgroundSymbol")) {
backgroundSymbol = val;
} else if (key.equalsIgnoreCase("featureFactory")) {
featureFactory = val;
if (featureFactory.equalsIgnoreCase("SuperSimpleFeatureFactory")) {
featureFactory = "edu.stanford.nlp.sequences.SuperSimpleFeatureFactory";
} else if (featureFactory.equalsIgnoreCase("NERFeatureFactory")) {
featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory";
} else if (featureFactory.equalsIgnoreCase("GazNERFeatureFactory")) {
featureFactory = "edu.stanford.nlp.sequences.GazNERFeatureFactory";
} else if (featureFactory.equalsIgnoreCase("IncludeAllFeatureFactory")) {
featureFactory = "edu.stanford.nlp.sequences.IncludeAllFeatureFactory";
} else if (featureFactory.equalsIgnoreCase("PhraseFeatureFactory")) {
featureFactory = "edu.stanford.nlp.article.extraction.PhraseFeatureFactory";
}
} else if (key.equalsIgnoreCase("printXML")) {
printXML = Boolean.parseBoolean(val); // todo: This appears unused now.
// Was it replaced by
// outputFormat?
} else if (key.equalsIgnoreCase("useSeenFeaturesOnly")) {
useSeenFeaturesOnly = Boolean.parseBoolean(val);
// chinese word-segmenter features
} else if (key.equalsIgnoreCase("useRadical")) {
useRadical = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useBigramInTwoClique")) {
useBigramInTwoClique = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useReverseAffix")) {
useReverseAffix = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("charHalfWindow")) {
charHalfWindow = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("purgeFeatures")) {
purgeFeatures = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("ocrFold")) {
ocrFold = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("morphFeatureFile")) {
morphFeatureFile = val;
} else if (key.equalsIgnoreCase("svmModelFile")) {
svmModelFile = val;
/* Dictionary */
} else if (key.equalsIgnoreCase("useDictleng")) {
useDictleng = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDict2")) {
useDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useOutDict2")) {
useOutDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("outDict2")) {
outDict2 = val;
} else if (key.equalsIgnoreCase("useDictCTB2")) {
useDictCTB2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDictASBC2")) {
useDictASBC2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDictPK2")) {
useDictPK2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDictHK2")) {
useDictHK2 = Boolean.parseBoolean(val);
/* N-gram flags */
} else if (key.equalsIgnoreCase("useWord1")) {
useWord1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWord2")) {
useWord2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWord3")) {
useWord3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWord4")) {
useWord4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useRad1")) {
useRad1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useRad2")) {
useRad2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useRad2b")) {
useRad2b = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordn")) {
useWordn = Boolean.parseBoolean(val);
/* affix flags */
} else if (key.equalsIgnoreCase("useCTBPre1")) {
useCTBPre1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useCTBSuf1")) {
useCTBSuf1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useASBCPre1")) {
useASBCPre1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useASBCSuf1")) {
useASBCSuf1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useHKPre1")) {
useHKPre1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useHKSuf1")) {
useHKSuf1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePKPre1")) {
usePKPre1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePKSuf1")) {
usePKSuf1 = Boolean.parseBoolean(val);
/* POS flags */
} else if (key.equalsIgnoreCase("useCTBChar2")) {
useCTBChar2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePrediction")) {
usePrediction = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useASBCChar2")) {
useASBCChar2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useHKChar2")) {
useHKChar2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePKChar2")) {
usePKChar2 = Boolean.parseBoolean(val);
/* Rule flag */
} else if (key.equalsIgnoreCase("useRule2")) {
useRule2 = Boolean.parseBoolean(val);
/* ASBC and HK */
} else if (key.equalsIgnoreCase("useBig5")) {
useBig5 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegDict2")) {
useNegDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegDict3")) {
useNegDict3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegDict4")) {
useNegDict4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegCTBDict2")) {
useNegCTBDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegCTBDict3")) {
useNegCTBDict3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegCTBDict4")) {
useNegCTBDict4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegASBCDict2")) {
useNegASBCDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegASBCDict3")) {
useNegASBCDict3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegASBCDict4")) {
useNegASBCDict4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegPKDict2")) {
useNegPKDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegPKDict3")) {
useNegPKDict3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegPKDict4")) {
useNegPKDict4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegHKDict2")) {
useNegHKDict2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegHKDict3")) {
useNegHKDict3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNegHKDict4")) {
useNegHKDict4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePre")) {
usePre = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSuf")) {
useSuf = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useRule")) {
useRule = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAs")) {
useAs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePk")) {
usePk = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useHk")) {
useHk = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMsr")) {
useMsr = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMSRChar2")) {
useMSRChar2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesC4gram")) {
useFeaturesC4gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesC5gram")) {
useFeaturesC5gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesC6gram")) {
useFeaturesC6gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesCpC4gram")) {
useFeaturesCpC4gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesCpC5gram")) {
useFeaturesCpC5gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFeaturesCpC6gram")) {
useFeaturesCpC6gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useUnicodeType")) {
useUnicodeType = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useUnicodeBlock")) {
useUnicodeBlock = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useUnicodeType4gram")) {
useUnicodeType4gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useUnicodeType5gram")) {
useUnicodeType5gram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useShapeStrings1")) {
useShapeStrings1 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useShapeStrings3")) {
useShapeStrings3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useShapeStrings4")) {
useShapeStrings4 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useShapeStrings5")) {
useShapeStrings5 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordUTypeConjunctions2")) {
useWordUTypeConjunctions2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordUTypeConjunctions3")) {
useWordUTypeConjunctions3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordShapeConjunctions2")) {
useWordShapeConjunctions2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordShapeConjunctions3")) {
useWordShapeConjunctions3 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMidDotShape")) {
useMidDotShape = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("augmentedDateChars")) {
augmentedDateChars = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("suppressMidDotPostprocessing")) {
suppressMidDotPostprocessing = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("printNR")) {
printNR = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("use4Clique")) {
use4Clique = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFilter")) {
useFilter = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("largeChSegFile")) {
largeChSegFile = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("keepEnglishWhitespaces")) {
keepEnglishWhitespaces = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("keepAllWhitespaces")) {
keepAllWhitespaces = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("sighanPostProcessing")) {
sighanPostProcessing = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useChPos")) {
useChPos = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("sighanCorporaDict")) {
sighanCorporaDict = val;
// end chinese word-segmenter features
} else if (key.equalsIgnoreCase("useObservedSequencesOnly")) {
useObservedSequencesOnly = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("maxDocSize")) {
maxDocSize = Integer.parseInt(val);
splitDocuments = true;
} else if (key.equalsIgnoreCase("printProbs")) {
printProbs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("printFirstOrderProbs")) {
printFirstOrderProbs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("saveFeatureIndexToDisk")) {
saveFeatureIndexToDisk = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("removeBackgroundSingletonFeatures")) {
removeBackgroundSingletonFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("doGibbs")) {
doGibbs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNERPrior")) {
useNERPrior = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAcqPrior")) {
useAcqPrior = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSemPrior")) {
useSemPrior = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMUCFeatures")) {
useMUCFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("initViterbi")) {
initViterbi = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("checkNameList")) {
checkNameList = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFirstWord")) {
useFirstWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useUnknown")) {
useUnknown = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("cacheNGrams")) {
cacheNGrams = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useNumberFeature")) {
useNumberFeature = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("annealingRate")) {
annealingRate = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("annealingType")) {
if (val.equalsIgnoreCase("linear") || val.equalsIgnoreCase("exp") || val.equalsIgnoreCase("exponential")) {
annealingType = val;
} else {
System.err.println("unknown annealingType: " + val + ". Please use linear|exp|exponential");
}
} else if (key.equalsIgnoreCase("numSamples")) {
numSamples = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("inferenceType")) {
inferenceType = val;
} else if (key.equalsIgnoreCase("loadProcessedData")) {
loadProcessedData = val;
} else if (key.equalsIgnoreCase("normalizationTable")) {
normalizationTable = val;
} else if (key.equalsIgnoreCase("dictionary")) {
// don't set if empty string or spaces or true: revert it to null
// special case so can empty out dictionary list on command line!
val = val.trim();
if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) {
dictionary = val;
} else {
dictionary = null;
}
} else if (key.equalsIgnoreCase("serDictionary")) {
// don't set if empty string or spaces or true: revert it to null
// special case so can empty out dictionary list on command line!
val = val.trim();
if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) {
serializedDictionary = val;
} else {
serializedDictionary = null;
}
} else if (key.equalsIgnoreCase("dictionary2")) {
// don't set if empty string or spaces or true: revert it to null
// special case so can empty out dictionary list on command line!
val = val.trim();
if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) {
dictionary2 = val;
} else {
dictionary2 = null;
}
} else if (key.equalsIgnoreCase("normTableEncoding")) {
normTableEncoding = val;
} else if (key.equalsIgnoreCase("useLemmaAsWord")) {
useLemmaAsWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("type")) {
type = val;
} else if (key.equalsIgnoreCase("readerAndWriter")) {
readerAndWriter = val;
} else if (key.equalsIgnoreCase("plainTextDocumentReaderAndWriter")) {
plainTextDocumentReaderAndWriter = val;
} else if (key.equalsIgnoreCase("gazFilesFile")) {
gazFilesFile = val;
} else if (key.equalsIgnoreCase("baseTrainDir")) {
baseTrainDir = val;
} else if (key.equalsIgnoreCase("baseTestDir")) {
baseTestDir = val;
} else if (key.equalsIgnoreCase("trainFiles")) {
trainFiles = val;
} else if (key.equalsIgnoreCase("trainFileList")) {
trainFileList = val;
} else if (key.equalsIgnoreCase("trainDirs")) {
trainDirs = val;
} else if (key.equalsIgnoreCase("testDirs")) {
testDirs = val;
} else if (key.equalsIgnoreCase("testFiles")) {
testFiles = val;
} else if (key.equalsIgnoreCase("usePrediction2")) {
usePrediction2 = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useObservedFeaturesOnly")) {
useObservedFeaturesOnly = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("iobWrapper")) {
iobWrapper = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useDistSim")) {
useDistSim = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("casedDistSim")) {
casedDistSim = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("distSimFileFormat")) {
distSimFileFormat = val;
} else if (key.equalsIgnoreCase("distSimMaxBits")) {
distSimMaxBits = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("numberEquivalenceDistSim")) {
numberEquivalenceDistSim = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("unknownWordDistSimClass")) {
unknownWordDistSimClass = val;
} else if (key.equalsIgnoreCase("useOnlySeenWeights")) {
useOnlySeenWeights = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("predProp")) {
predProp = val;
} else if (key.equalsIgnoreCase("distSimLexicon")) {
distSimLexicon = val;
} else if (key.equalsIgnoreCase("useSegmentation")) {
useSegmentation = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useInternal")) {
useInternal = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useExternal")) {
useExternal = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useEitherSideWord")) {
useEitherSideWord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useEitherSideDisjunctive")) {
useEitherSideDisjunctive = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("featureDiffThresh")) {
featureDiffThresh = Double.parseDouble(val);
if (props.getProperty("numTimesPruneFeatures") == null) {
numTimesPruneFeatures = 1;
}
} else if (key.equalsIgnoreCase("numTimesPruneFeatures")) {
numTimesPruneFeatures = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("newgeneThreshold")) {
newgeneThreshold = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("adaptFile")) {
adaptFile = val;
} else if (key.equalsIgnoreCase("doAdaptation")) {
doAdaptation = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("selfTrainFile")) {
selfTrainFile = val;
} else if (key.equalsIgnoreCase("selfTrainIterations")) {
selfTrainIterations = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("selfTrainWindowSize")) {
selfTrainWindowSize = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("selfTrainConfidenceThreshold")) {
selfTrainConfidenceThreshold = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("numFolds")) {
numFolds = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("startFold")) {
startFold = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("endFold")) {
endFold = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("adaptSigma")) {
adaptSigma = Double.parseDouble(val);
} else if (key.startsWith("prop") && !key.equals("prop")) {
comboProps.add(val);
} else if (key.equalsIgnoreCase("outputFormat")) {
outputFormat = val;
} else if (key.equalsIgnoreCase("useSMD")) {
useSMD = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useScaledSGD")) {
useScaledSGD = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("scaledSGDMethod")) {
scaledSGDMethod = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("tuneSGD")) {
tuneSGD = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("StochasticCalculateMethod")) {
if (val.equalsIgnoreCase("AlgorithmicDifferentiation")) {
stochasticMethod = StochasticCalculateMethods.AlgorithmicDifferentiation;
} else if (val.equalsIgnoreCase("IncorporatedFiniteDifference")) {
stochasticMethod = StochasticCalculateMethods.IncorporatedFiniteDifference;
} else if (val.equalsIgnoreCase("ExternalFinitedifference")) {
stochasticMethod = StochasticCalculateMethods.ExternalFiniteDifference;
}
} else if (key.equalsIgnoreCase("initialGain")) {
initialGain = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("stochasticBatchSize")) {
stochasticBatchSize = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("SGD2QNhessSamples")) {
SGD2QNhessSamples = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useSGD")) {
useSGD = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useInPlaceSGD")) {
useInPlaceSGD = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSGDtoQN")) {
useSGDtoQN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("SGDPasses")) {
SGDPasses = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("QNPasses")) {
QNPasses = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("gainSGD")) {
gainSGD = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("useHybrid")) {
useHybrid = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("hybridCutoffIteration")) {
hybridCutoffIteration = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useStochasticQN")) {
useStochasticQN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("outputIterationsToFile")) {
outputIterationsToFile = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("testObjFunction")) {
testObjFunction = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("testVariance")) {
testVariance = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("CRForder")) {
CRForder = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("CRFwindow")) {
CRFwindow = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("testHessSamples")) {
testHessSamples = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("estimateInitial")) {
estimateInitial = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("printLabelValue")) {
printLabelValue = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("searchGraphPrefix")) {
searchGraphPrefix = val;
} else if (key.equalsIgnoreCase("searchGraphPrune")) {
searchGraphPrune = Double.parseDouble(val);
} else if (key.equalsIgnoreCase("kBest")) {
useKBest = true;
kBest = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useRobustQN")) {
useRobustQN = true;
} else if (key.equalsIgnoreCase("combo")) {
combo = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("verboseForTrueCasing")) {
verboseForTrueCasing = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("trainHierarchical")) {
trainHierarchical = val;
} else if (key.equalsIgnoreCase("domain")) {
domain = val;
} else if (key.equalsIgnoreCase("baseline")) {
baseline = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("doFE")) {
doFE = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("restrictLabels")) {
restrictLabels = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("transferSigmas")) {
transferSigmas = val;
} else if (key.equalsIgnoreCase("announceObjectBankEntries")) {
announceObjectBankEntries = true;
} else if (key.equalsIgnoreCase("usePos")) {
usePos = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAgreement")) {
useAgreement = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAccCase")) {
useAccCase = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useInna")) {
useInna = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useConcord")) {
useConcord = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useFirstNgram")) {
useFirstNgram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLastNgram")) {
useLastNgram = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("collapseNN")) {
collapseNN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTagsCpC")) {
useTagsCpC = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTagsCpCp2C")) {
useTagsCpCp2C = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTagsCpCp2Cp3C")) {
useTagsCpCp2Cp3C = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTagsCpCp2Cp3Cp4C")) {
useTagsCpCp2Cp3Cp4C = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("numTags")) {
numTags = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useConjBreak")) {
useConjBreak = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAuxPairs")) {
useAuxPairs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePPVBPairs")) {
usePPVBPairs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useAnnexing")) {
useAnnexing = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useTemporalNN")) {
useTemporalNN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("markProperNN")) {
markProperNN = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePath")) {
usePath = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("markMasdar")) {
markMasdar = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("innaPPAttach")) {
innaPPAttach = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSVO")) {
useSVO = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("mixedCaseMapFile")) {
mixedCaseMapFile = val;
} else if (key.equalsIgnoreCase("auxTrueCaseModels")) {
auxTrueCaseModels = val;
} else if (key.equalsIgnoreCase("use2W")) {
use2W = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLC")) {
useLC = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useYetMoreCpCShapes")) {
useYetMoreCpCShapes = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useIfInteger")) {
useIfInteger = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("twoStage")) {
twoStage = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("evaluateIters")) {
evaluateIters = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("evalCmd")) {
evalCmd = val;
} else if (key.equalsIgnoreCase("evaluateTrain")) {
evaluateTrain = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("evaluateIOB")) {
evaluateIOB = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("tuneSampleSize")) {
tuneSampleSize = Integer.parseInt(val);
} else if (key.equalsIgnoreCase("useTopics")) {
useTopics = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePhraseFeatures")) {
usePhraseFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePhraseWords")) {
usePhraseWords = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePhraseWordTags")) {
usePhraseWordTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("usePhraseWordSpecialTags")) {
usePhraseWordSpecialTags = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useProtoFeatures")) {
useProtoFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useWordnetFeatures")) {
useWordnetFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("wikiFeatureDbFile")) {
wikiFeatureDbFile = val;
} else if (key.equalsIgnoreCase("tokenizerOptions")) {
tokenizerOptions = val;
} else if (key.equalsIgnoreCase("useCommonWordsFeature")) {
useCommonWordsFeature = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useYear")) {
useYear = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useSentenceNumber")) {
useSentenceNumber = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLabelSource")) {
useLabelSource = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("tokenFactory")) {
tokenFactory = val;
} else if (key.equalsIgnoreCase("tokensAnnotationClassName")) {
tokensAnnotationClassName = val;
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzer")) {
useLVMorphoAnalyzer = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("LVMorphoAnalyzerTag")) {
lvMorphoAnalyzerTag = val;
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzerPOS")) {
useLVMorphoAnalyzerPOS = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzerTag")) {
useLVMorphoAnalyzerTag = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzerPrev")) {
useLVMorphoAnalyzerPrev = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzerNext")) {
useLVMorphoAnalyzerNext = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useLVMorphoAnalyzerItemIDs")) {
useLVMorphoAnalyzerItemIDs = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMorphologyFeatures")) {
useMorphologyFeatures = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMorphoCase")) {
useMorphoCase = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMorphoPOS")) {
useMorphoPOS = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMorphoLetaLemma")) {
useMorphoLetaLemma = Boolean.parseBoolean(val);
} else if (key.equalsIgnoreCase("useMorphoNumber")) {
useMorphoNumber = Boolean.parseBoolean(val);
// ADD VALUE ABOVE HERE
} else if (key.length() > 0 && !key.equals("prop")) {
System.err.println("Unknown property: |" + key + '|');
}
}
if (startFold > numFolds) {
System.err.println("startFold > numFolds -> setting startFold to 1");
startFold = 1;
}
if (endFold > numFolds) {
System.err.println("endFold > numFolds -> setting to numFolds");
endFold = numFolds;
}
if (combo) {
splitDocuments = false;
}
stringRep = sb.toString();
} // end setProperties()
/**
* Print the properties specified by this object.
*
* @return A String describing the properties specified by this object.
*/
@Override
public String toString() {
return stringRep;
}
/**
* note that this does *not* return string representation of arrays, lists and
* enums
*
* @return
* @throws IllegalAccessException
* @throws IllegalArgumentException
*/
public String getNotNullTrueStringRep() {
try {
String rep = "";
String joiner = "\n";
Field[] f = this.getClass().getFields();
for (Field ff : f) {
String name = ff.getName();
Class type = ff.getType();
if (type.equals(Boolean.class) || type.equals(boolean.class)) {
boolean val = ff.getBoolean(this);
if (val == true)
rep += joiner + name + "=" + val;
} else if (type.equals(String.class)) {
String val = (String) ff.get(this);
if (val != null)
rep += joiner + name + "=" + val;
} else if (type.equals(Double.class)) {
Double val = (Double) ff.get(this);
rep += joiner + name + "=" + val;
} else if (type.equals(double.class)) {
double val = ff.getDouble(this);
rep += joiner + name + "=" + val;
} else if (type.equals(Integer.class)) {
Integer val = (Integer) ff.get(this);
rep += joiner + name + "=" + val;
} else if (type.equals(int.class)) {
int val = ff.getInt(this);
rep += joiner + name + "=" + val;
} else if (type.equals(Float.class)) {
Float val = (Float) ff.get(this);
rep += joiner + name + "=" + val;
} else if (type.equals(float.class)) {
float val = ff.getFloat(this);
rep += joiner + name + "=" + val;
} else if (type.equals(Byte.class)) {
Byte val = (Byte) ff.get(this);
rep += joiner + name + "=" + val;
} else if (type.equals(byte.class)) {
byte val = ff.getByte(this);
rep += joiner + name + "=" + val;
} else if (type.equals(char.class)) {
char val = ff.getChar(this);
rep += joiner + name + "=" + val;
} else if (type.equals(Long.class)) {
Long val = (Long) ff.get(this);
rep += joiner + name + "=" + val;
} else if (type.equals(long.class)) {
long val = ff.getLong(this);
rep += joiner + name + "=" + val;
}
}
return rep;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
} // end class SeqClassifierFlags