All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.sequences.SeqClassifierFlags Maven / Gradle / Ivy

package edu.stanford.nlp.sequences;

import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;

import lv.semti.morphology.attributes.AttributeNames;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.optimization.StochasticCalculateMethods;
import edu.stanford.nlp.process.WordShapeClassifier;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.ReflectionLoading;

/**
 * Flags for sequence classifiers. Documentation for general flags and flags for
 * NER can be found in the Javadoc of
 * {@link edu.stanford.nlp.ie.NERFeatureFactory}. Documentation for the flags
 * for Chinese word segmentation can be found in the Javadoc of
 * {@link edu.stanford.nlp.wordseg.ChineseSegmenterFeatureFactory}.
 * 

* * IMPORTANT NOTE IF CHANGING THIS FILE: MAKE SURE TO ONLY ADD NEW * VARIABLES AT THE END OF THE LIST OF VARIABLES (and not to change existing * variables)! Otherwise you usually break all currently serialized * classifiers!!! Search for "ADD VARIABLES ABOVE HERE" below. * * Some general flags are described here *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * String * * * *
Property NameTypeDefault ValueDescription
useQNbooleantrueUse Quasi-Newton (L-BFGS) to find minimum. NOTE: Need to set this to * false if using other minimizers such as SGD.
QNsizeint25Number of previous iterations of Quasi-Newton to store (this increases * memory use, but speeds convergence by letting the Quasi-Newton optimization * more effectively approximate the second derivative).
QNsize2int25Number of previous iterations of Quasi-Newton to store (used when pruning * features, after the first iteration - the first iteration is with QNSize).
useInPlaceSGDbooleanfalseUse SGD (tweaking weights in place) to find minimum (more efficient than * the old SGD, faster to converge than Quasi-Newtown if there are very large of * samples). Implemented for CRFClassifier. NOTE: Remember to set useQN to false *
tuneSampleSizeint-1If this number is greater than 0, specifies the number of samples to use * for tuning (default is 1000).
SGDPassesint-1If this number is greater than 0, specifies the number of SGD passes over * entire training set) to do before giving up (default is 50). Can be smaller * if sample size is very large.
useSGDbooleanfalseUse SGD to find minimum (can be slow). NOTE: Remember to set useQN to * false
useSGDtoQNbooleanfalseUse SGD (SGD version selected by useInPlaceSGD or useSGD) for a certain * number of passes (SGDPasses) and then switches to QN. Gives the quick initial * convergence of SGD, with the desired convergence criterion of QN (there is * some rampup time for QN). NOTE: Remember to set useQN to false
evaluateItersint0If this number is greater than 0, evaluates on the test set every so * often while minimizing. Implemented for CRFClassifier.
evalCmdStringIf specified (and evaluateIters is set), runs the specified cmdline * command during evaluation (instead of default CONLL-like NER evaluation)
evaluateTrainbooleanfalseIf specified (and evaluateIters is set), also evaluate on training set * (can be expensive)
tokenizerOptions(null)Extra options to supply to the tokenizer when creating it.
* * @author Jenny Finkel */ public class SeqClassifierFlags implements Serializable { private static final long serialVersionUID = -7076671761070232567L; public static final String DEFAULT_BACKGROUND_SYMBOL = "O"; private String stringRep = ""; /** * AZ */ public boolean useMorphologyFeatures = false; public boolean useMorphoCase = false; public boolean useMorphoPOS = false; public boolean useMorphoLetaLemma = false; public boolean useMorphoNumber = false; public boolean useNGrams = false; public boolean conjoinShapeNGrams = false; public boolean lowercaseNGrams = false; public boolean dehyphenateNGrams = false; public boolean usePrev = false; public boolean useNext = false; public boolean useTags = false; public boolean useWordPairs = false; public boolean useGazettes = false; public boolean useSequences = true; public boolean usePrevSequences = false; public boolean useNextSequences = false; public boolean useLongSequences = false; public boolean useBoundarySequences = false; public boolean useTaggySequences = false; public boolean useExtraTaggySequences = false; public boolean dontExtendTaggy = false; public boolean useTaggySequencesShapeInteraction = false; public boolean strictlyZeroethOrder = false; public boolean strictlyFirstOrder = false; public boolean strictlySecondOrder = false; public boolean strictlyThirdOrder = false; public String entitySubclassification = "IO"; public boolean retainEntitySubclassification = false; public boolean useGazettePhrases = false; public boolean makeConsistent = false; public boolean useWordLabelCounts = false; // boolean usePrevInstanceLabel = false; // boolean useNextInstanceLabel = false; public boolean useViterbi = true; public int[] binnedLengths = null; public boolean verboseMode = false; public boolean useSum = false; public double tolerance = 1e-4; // Turned on if non-null. Becomes part of the filename features are printed to public String printFeatures = null; public boolean useSymTags = false; /** * useSymWordPairs Has a small negative effect. */ public boolean useSymWordPairs = false; public String printClassifier = "WeightHistogram"; public int printClassifierParam = 100; public boolean intern = false; public boolean intern2 = false; public boolean selfTest = false; public boolean sloppyGazette = false; public boolean cleanGazette = false; public boolean noMidNGrams = false; public int maxNGramLeng = -1; public boolean useReverse = false; public boolean greekifyNGrams = false; public boolean useParenMatching = false; public boolean useLemmas = false; public boolean usePrevNextLemmas = false; public boolean normalizeTerms = false; public boolean normalizeTimex = false; public boolean useNB = false; public boolean useQN = true; public boolean useFloat = false; public int QNsize = 25; public int QNsize2 = 25; public int maxIterations = -1; public int wordShape = WordShapeClassifier.NOWORDSHAPE; public boolean useShapeStrings = false; public boolean useTypeSeqs = false; public boolean useTypeSeqs2 = false; public boolean useTypeSeqs3 = false; public boolean useDisjunctive = false; public int disjunctionWidth = 4; public boolean useDisjunctiveShapeInteraction = false; public boolean useDisjShape = false; public boolean useWord = true; // ON by default public boolean useClassFeature = false; public boolean useShapeConjunctions = false; public boolean useWordTag = false; public boolean useNPHead = false; public boolean useNPGovernor = false; public boolean useHeadGov = false; public boolean useLastRealWord = false; public boolean useNextRealWord = false; public boolean useOccurrencePatterns = false; public boolean useTypeySequences = false; public boolean justify = false; public boolean normalize = false; public String priorType = "QUADRATIC"; public double sigma = 1.0; public double epsilon = 0.01; public int beamSize = 30; public int maxLeft = 2; public int maxRight = 0; public boolean usePosition = false; public boolean useBeginSent = false; public boolean useGazFeatures = false; public boolean useMoreGazFeatures = false; public boolean useAbbr = false; public boolean useMinimalAbbr = false; public boolean useAbbr1 = false; public boolean useMinimalAbbr1 = false; public boolean useMoreAbbr = false; public boolean deleteBlankLines = false; public boolean useGENIA = false; public boolean useTOK = false; public boolean useABSTR = false; public boolean useABSTRFreqDict = false; public boolean useABSTRFreq = false; public boolean useFREQ = false; public boolean useABGENE = false; public boolean useWEB = false; public boolean useWEBFreqDict = false; public boolean useIsURL = false; public boolean useURLSequences = false; public boolean useIsDateRange = false; public boolean useEntityTypes = false; public boolean useEntityTypeSequences = false; public boolean useEntityRule = false; public boolean useOrdinal = false; public boolean useACR = false; public boolean useANTE = false; public boolean useMoreTags = false; public boolean useChunks = false; public boolean useChunkySequences = false; public boolean usePrevVB = false; public boolean useNextVB = false; public boolean useVB = false; public boolean subCWGaz = false; public String documentReader = "ColumnDocumentReader"; // TODO OBSOLETE: // delete when breaking // serialization // sometime. // public String trainMap = "word=0,tag=1,answer=2"; // public String testMap = "word=0,tag=1,answer=2"; public String map = "word=0,tag=1,answer=2"; public boolean useWideDisjunctive = false; public int wideDisjunctionWidth = 10; // chinese word-segmenter features public boolean useRadical = false; public boolean useBigramInTwoClique = false; public String morphFeatureFile = null; public boolean useReverseAffix = false; public int charHalfWindow = 3; public boolean useWord1 = false; public boolean useWord2 = false; public boolean useWord3 = false; public boolean useWord4 = false; public boolean useRad1 = false; public boolean useRad2 = false; public boolean useWordn = false; public boolean useCTBPre1 = false; public boolean useCTBSuf1 = false; public boolean useASBCPre1 = false; public boolean useASBCSuf1 = false; public boolean usePKPre1 = false; public boolean usePKSuf1 = false; public boolean useHKPre1 = false; public boolean useHKSuf1 = false; public boolean useCTBChar2 = false; public boolean useASBCChar2 = false; public boolean useHKChar2 = false; public boolean usePKChar2 = false; public boolean useRule2 = false; public boolean useDict2 = false; public boolean useOutDict2 = false; public String outDict2 = "/u/htseng/scr/chunking/segmentation/out.lexicon"; public boolean useDictleng = false; public boolean useDictCTB2 = false; public boolean useDictASBC2 = false; public boolean useDictPK2 = false; public boolean useDictHK2 = false; public boolean useBig5 = false; public boolean useNegDict2 = false; public boolean useNegDict3 = false; public boolean useNegDict4 = false; public boolean useNegCTBDict2 = false; public boolean useNegCTBDict3 = false; public boolean useNegCTBDict4 = false; public boolean useNegASBCDict2 = false; public boolean useNegASBCDict3 = false; public boolean useNegASBCDict4 = false; public boolean useNegHKDict2 = false; public boolean useNegHKDict3 = false; public boolean useNegHKDict4 = false; public boolean useNegPKDict2 = false; public boolean useNegPKDict3 = false; public boolean useNegPKDict4 = false; public boolean usePre = false; public boolean useSuf = false; public boolean useRule = false; public boolean useHk = false; public boolean useMsr = false; public boolean useMSRChar2 = false; public boolean usePk = false; public boolean useAs = false; public boolean useFilter = false; // TODO this flag is used for nothing; // delete when breaking serialization public boolean largeChSegFile = false; // TODO this flag is used for nothing; // delete when breaking serialization public boolean useRad2b = false; /** * Keep the whitespace between English words in testFile when printing out * answers. Doesn't really change the content of the CoreLabels. (For Chinese * segmentation.) */ public boolean keepEnglishWhitespaces = false; /** * Keep all the whitespace words in testFile when printing out answers. * Doesn't really change the content of the CoreLabels. (For Chinese * segmentation.) */ public boolean keepAllWhitespaces = false; public boolean sighanPostProcessing = false; /** * use POS information (an "open" feature for Chinese segmentation) */ public boolean useChPos = false; // CTBSegDocumentReader normalization table // A value of null means that a default algorithmic normalization // is done in which ASCII characters get mapped to their fullwidth // equivalents in the Unihan range public String normalizationTable; // = null; public String dictionary; // = null; public String serializedDictionary; // = null; public String dictionary2; // = null; public String normTableEncoding = "GB18030"; /** * for Sighan bakeoff 2005, the path to the dictionary of bigrams appeared in * corpus */ public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/"; // end Sighan 20005 chinese word-segmenter features/properties public boolean useWordShapeGaz = false; public String wordShapeGaz = null; // TODO: This should maybe be removed in favor of suppressing splitting when // maxDocLength <= 0, when next breaking serialization public boolean splitDocuments = true; public boolean printXML = false; public boolean useSeenFeaturesOnly = false; public String lastNameList = "/u/nlp/data/dist.all.last"; public String maleNameList = "/u/nlp/data/dist.male.first"; public String femaleNameList = "/u/nlp/data/dist.female.first"; // don't want these serialized public transient String trainFile = null; /** NER adaptation (Gaussian prior) parameters. */ public transient String adaptFile = null; public transient String devFile = null; public transient String testFile = null; public transient String textFile = null; public transient boolean readStdin = false; public transient String outputFile = null; public transient String loadClassifier = null; public transient String loadTextClassifier = null; public transient String loadJarClassifier = null; public transient String loadAuxClassifier = null; public transient String serializeTo = null; public transient String serializeToText = null; public transient int interimOutputFreq = 0; public transient String initialWeights = null; public transient List gazettes = new ArrayList(); public transient String selfTrainFile = null; public String inputEncoding = "UTF-8"; // used for CTBSegDocumentReader as // well public boolean bioSubmitOutput = false; public int numRuns = 1; public String answerFile = null; public String altAnswerFile = null; public String dropGaz; public String printGazFeatures = null; public int numStartLayers = 1; public boolean dump = false; public boolean mergeTags; // whether to merge B- and I- tags public boolean splitOnHead; // threshold public int featureCountThreshold = 0; public double featureWeightThreshold = 0.0; // feature factory public String featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; public Object[] featureFactoryArgs = new Object[0]; public String backgroundSymbol = DEFAULT_BACKGROUND_SYMBOL; // use public boolean useObservedSequencesOnly = false; public int maxDocSize = 0; public boolean printProbs = false; public boolean printFirstOrderProbs = false; public boolean saveFeatureIndexToDisk = false; public boolean removeBackgroundSingletonFeatures = false; public boolean doGibbs = false; public int numSamples = 100; public boolean useNERPrior = false; public boolean useAcqPrior = false; /** * If true and doGibbs also true, will do generic Gibbs inference without any * priors */ public boolean useUniformPrior = false; public boolean useMUCFeatures = false; public double annealingRate = 0.0; public String annealingType = null; public String loadProcessedData = null; public boolean initViterbi = true; public boolean useUnknown = false; public boolean checkNameList = false; public boolean useSemPrior = false; public boolean useFirstWord = false; public boolean useNumberFeature = false; public int ocrFold = 0; public transient boolean ocrTrain = false; public String classifierType = "MaxEnt"; public String svmModelFile = null; public String inferenceType = "Viterbi"; public boolean useLemmaAsWord = false; public String type = "cmm"; public String readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; public List comboProps = new ArrayList(); public boolean usePrediction = false; public boolean useAltGazFeatures = false; public String gazFilesFile = null; public boolean usePrediction2 = false; public String baseTrainDir = "."; public String baseTestDir = "."; public String trainFiles = null; public String trainFileList = null; public String testFiles = null; public String trainDirs = null; // cdm 2009: this is currently unsupported, // but one user wanted something like this.... public String testDirs = null; public boolean useOnlySeenWeights = false; public String predProp = null; public CoreLabel pad = new CoreLabel(); public boolean useObservedFeaturesOnly = false; public String distSimLexicon = null; public boolean useDistSim = false; public int removeTopN = 0; public int numTimesRemoveTopN = 1; public double randomizedRatio = 1.0; public double removeTopNPercent = 0.0; public int purgeFeatures = -1; public boolean booleanFeatures = false; public boolean iobWrapper = false; public boolean iobTags = false; public boolean useSegmentation = false; /* * binary segmentation feature for * character-based Chinese NER */ public boolean memoryThrift = false; public boolean timitDatum = false; public String serializeDatasetsDir = null; public String loadDatasetsDir = null; public String pushDir = null; public boolean purgeDatasets = false; public boolean keepOBInMemory = true; public boolean fakeDataset = false; public boolean restrictTransitionsTimit = false; public int numDatasetsPerFile = 1; public boolean useTitle = false; // these are for the old stuff public boolean lowerNewgeneThreshold = false; public boolean useEitherSideWord = false; public boolean useEitherSideDisjunctive = false; public boolean twoStage = false; public String crfType = "MaxEnt"; public int featureThreshold = 1; public String featThreshFile = null; public double featureDiffThresh = 0.0; public int numTimesPruneFeatures = 0; public double newgeneThreshold = 0.0; public boolean doAdaptation = false; public boolean useInternal = true; public boolean useExternal = true; public double selfTrainConfidenceThreshold = 0.9; public int selfTrainIterations = 1; public int selfTrainWindowSize = 1; // Unigram public boolean useHuber = false; public boolean useQuartic = false; public double adaptSigma = 1.0; public int numFolds = 1; public int startFold = 1; public int endFold = 1; public boolean cacheNGrams = false; public String outputFormat; public boolean useSMD = false; public boolean useSGDtoQN = false; public boolean useStochasticQN = false; public boolean useScaledSGD = false; public int scaledSGDMethod = 0; public int SGDPasses = -1; public int QNPasses = -1; public boolean tuneSGD = false; public StochasticCalculateMethods stochasticMethod = StochasticCalculateMethods.NoneSpecified; public double initialGain = 0.1; public int stochasticBatchSize = 15; public boolean useSGD = false; public double gainSGD = 0.1; public boolean useHybrid = false; public int hybridCutoffIteration = 0; public boolean outputIterationsToFile = false; public boolean testObjFunction = false; public boolean testVariance = false; public int SGD2QNhessSamples = 50; public boolean testHessSamples = false; public int CRForder = 1; public int CRFwindow = 2; public boolean estimateInitial = false; public transient String biasedTrainFile = null; public transient String confusionMatrix = null; public String outputEncoding = null; public boolean useKBest = false; public String searchGraphPrefix = null; public double searchGraphPrune = Double.POSITIVE_INFINITY; public int kBest = 1; // more chinese segmenter features for GALE 2007 public boolean useFeaturesC4gram; public boolean useFeaturesC5gram; public boolean useFeaturesC6gram; public boolean useFeaturesCpC4gram; public boolean useFeaturesCpC5gram; public boolean useFeaturesCpC6gram; public boolean useUnicodeType; public boolean useUnicodeType4gram; public boolean useUnicodeType5gram; public boolean use4Clique; public boolean useUnicodeBlock; public boolean useShapeStrings1; public boolean useShapeStrings3; public boolean useShapeStrings4; public boolean useShapeStrings5; public boolean useGoodForNamesCpC; public boolean useDictionaryConjunctions; public boolean expandMidDot; public int printFeaturesUpto; // = 0; public boolean useDictionaryConjunctions3; public boolean useWordUTypeConjunctions2; public boolean useWordUTypeConjunctions3; public boolean useWordShapeConjunctions2; public boolean useWordShapeConjunctions3; public boolean useMidDotShape; public boolean augmentedDateChars; public boolean suppressMidDotPostprocessing; public boolean printNR; // a flag for WordAndTagDocumentReaderAndWriter public String classBias = null; public boolean printLabelValue; // Old printErrorStuff public boolean useRobustQN = false; public boolean combo = false; public boolean useGenericFeatures = false; public boolean verboseForTrueCasing = false; public String trainHierarchical = null; public String domain = null; public boolean baseline = false; public String transferSigmas = null; public boolean doFE = false; public boolean restrictLabels = true; public boolean announceObjectBankEntries = false; // whether to print a line // giving each ObjectBank // entry (usually a // filename) // Arabic Subject Detector flags public boolean usePos = false; public boolean useAgreement = false; public boolean useAccCase = false; public boolean useInna = false; public boolean useConcord = false; public boolean useFirstNgram = false; public boolean useLastNgram = false; public boolean collapseNN = false; public boolean useConjBreak = false; public boolean useAuxPairs = false; public boolean usePPVBPairs = false; public boolean useAnnexing = false; public boolean useTemporalNN = false; public boolean usePath = false; public boolean innaPPAttach = false; public boolean markProperNN = false; public boolean markMasdar = false; public boolean useSVO = false; public int numTags = 3; public boolean useTagsCpC = false; public boolean useTagsCpCp2C = false; public boolean useTagsCpCp2Cp3C = false; public boolean useTagsCpCp2Cp3Cp4C = false; public double l1reg = 0.0; // truecaser flags: public String mixedCaseMapFile = ""; public String auxTrueCaseModels = ""; // more flags inspired by Zhang and Johnson 2003 public boolean use2W = false; public boolean useLC = false; public boolean useYetMoreCpCShapes = false; // added for the NFL domain public boolean useIfInteger = false; public String exportFeatures = null; public boolean useInPlaceSGD = false; public boolean useTopics = false; // Number of iterations before evaluating weights (0 = don't evaluate) public int evaluateIters = 0; // Command to use for evaluation public String evalCmd = ""; // Evaluate on training set or not public boolean evaluateTrain = false; // evaluate based on B- and I- begin & inside prefixes // TODO: it would be useful to serialize the evaluation method // TODO: what does this have to do with -iobTags public transient boolean evaluateIOB = false; public int tuneSampleSize = -1; public boolean usePhraseFeatures = false; public boolean usePhraseWords = false; public boolean usePhraseWordTags = false; public boolean usePhraseWordSpecialTags = false; public boolean useCommonWordsFeature = false; public boolean useProtoFeatures = false; public boolean useWordnetFeatures = false; public String tokenFactory = "edu.stanford.nlp.process.CoreLabelTokenFactory"; public Object[] tokenFactoryArgs = new Object[0]; public String tokensAnnotationClassName = "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation"; public transient String tokenizerOptions = null; public boolean useCorefFeatures = false; public String wikiFeatureDbFile = null; // for combining 2 CRFs - one trained from noisy data and another trained from // non-noisy public boolean useNoisyNonNoisyFeature = false; // year annotation of the document public boolean useYear = false; public boolean useSentenceNumber = false; // to know source of the label. Currently, used to know which pattern is used // to label the token public boolean useLabelSource = false; /** * Whether to (not) lowercase tokens before looking them up in distsim * lexicon. By default lowercasing was done, but now it doesn't have to be * true :-). */ public boolean casedDistSim = false; /** * The format of the distsim file. Known values are: alexClark = TSV file. * word TAB clusterNumber [optional other content] terryKoo = TSV file. * clusterBitString TAB word TAB frequency */ public String distSimFileFormat = "alexClark"; /** * If this number is greater than 0, the distSim class is assume to be a bit * string and is truncated at this many characters. Normal distSim features * will then use this amount of resolution. Extra, special distsim features * may work at a coarser level of resolution. Since the lexicon only stores * this length of bit string, there is then no way to have finer-grained * clusters. */ public int distSimMaxBits = 8; /** * If this is set to true, all digit characters get mapped to '9' in a distsim * lexicon and for lookup. This is a simple word shaping that can shrink * distsim lexicons and improve their performance. */ public boolean numberEquivalenceDistSim = false; /** * What class to assign to words not found in the dist sim lexicon. You might * want to make it a known class, if one is the "default class. */ public String unknownWordDistSimClass = "null"; /** * Use prefixes and suffixes from the previous and next word. */ public boolean useNeighborNGrams = false; /** * This function maps words in the training or test data to new * words. They are used at the feature extractor level, ie in the * FeatureFactory. For now, only the NERFeatureFactory uses this. */ public Function wordFunction = null; public static final String DEFAULT_PLAIN_TEXT_READER = "edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter"; public String plainTextDocumentReaderAndWriter = DEFAULT_PLAIN_TEXT_READER; /** * Add features from the automated Latvian morphology analysis * @author Pēteris Paikens */ public boolean useLVMorphoAnalyzer = false; public String lvMorphoAnalyzerTag = AttributeNames.i_PartOfSpeech; public boolean useLVMorphoAnalyzerPOS = false; public boolean useLVMorphoAnalyzerTag = false; public boolean useLVMorphoAnalyzerNext = false; public boolean useLVMorphoAnalyzerPrev = false; public boolean useLVMorphoAnalyzerItemIDs = false; // "ADD VARIABLES ABOVE HERE" public transient List phraseGazettes = null; public transient Properties props = null; public SeqClassifierFlags() { } /** * Create a new SeqClassifierFlags object and initialize it using values in * the Properties object. The properties are printed to stderr as it works. * * @param props * The properties object used for initialization */ public SeqClassifierFlags(Properties props) { setProperties(props, true); } /** * Initialize this object using values in Properties object. The properties * are printed to stderr as it works. * * @param props * The properties object used for initialization */ public final void setProperties(Properties props) { setProperties(props, true); } /** * Initialize using values in Properties file. * * @param props * The properties object used for initialization * @param printProps * Whether to print the properties to stderr as it works. */ public void setProperties(Properties props, boolean printProps) { this.props = props; StringBuilder sb = new StringBuilder(stringRep); for (Enumeration e = props.propertyNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); String val = props.getProperty(key); if (!(key.length() == 0 && val.length() == 0)) { if (printProps) { System.err.println(key + '=' + val); } sb.append(key).append('=').append(val).append('\n'); } if (key.equalsIgnoreCase("macro")) { if (Boolean.parseBoolean(val)) { useObservedSequencesOnly = true; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; // useClassFeature = true; // submit useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = true; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 3.0; // normalize normalize = true; normalizeTimex = true; } } else if (key.equalsIgnoreCase("goodCoNLL")) { if (Boolean.parseBoolean(val)) { // featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // should this be set?? maxNGramLeng = 6; No (to get best score). // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 50.0; // increased Aug 2006 from 20; helpful with less feats // normalize normalize = true; normalizeTimex = true; maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; // clearly optimal for CoNLL useBoundarySequences = true; useLemmas = true; // no-op except for German usePrevNextLemmas = true; // no-op except for German inputEncoding = "iso-8859-1"; // needed for CoNLL German files // opt useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("conllNoTags")) { if (Boolean.parseBoolean(val)) { readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; // trainMap=testMap="word=0,answer=1"; map = "word=0,answer=1"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; // useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; // useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence // useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 20.0; adaptSigma = 20.0; // normalize normalize = true; normalizeTimex = true; maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; useBoundarySequences = true; // useLemmas = true; // no-op except for German // usePrevNextLemmas = true; // no-op except for German inputEncoding = "iso-8859-1"; // opt useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("notags")) { if (Boolean.parseBoolean(val)) { // turn off all features that use POS tags // this is slightly crude: it also turns off a few things that // don't use tags in e.g., useTaggySequences useTags = false; useSymTags = false; useTaggySequences = false; useOccurrencePatterns = false; } } else if (key.equalsIgnoreCase("submit")) { if (Boolean.parseBoolean(val)) { useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; wordShape = WordShapeClassifier.WORDSHAPEDAN1; useSequences = true; usePrevSequences = true; } } else if (key.equalsIgnoreCase("binnedLengths")) { if (val != null) { String[] binnedLengthStrs = val.split("[, ]+"); binnedLengths = new int[binnedLengthStrs.length]; for (int i = 0; i < binnedLengths.length; i++) { binnedLengths[i] = Integer.parseInt(binnedLengthStrs[i]); } } } else if (key.equalsIgnoreCase("makeConsistent")) { makeConsistent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dump")) { dump = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNGrams")) { useNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNeighborNGrams")) { useNeighborNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wordFunction")) { wordFunction = ReflectionLoading.loadByReflection(val); } else if (key.equalsIgnoreCase("conjoinShapeNGrams")) { conjoinShapeNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowercaseNGrams")) { lowercaseNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsURL")) { useIsURL = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useURLSequences")) { useURLSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypes")) { useEntityTypes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityRule")) { useEntityRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOrdinal")) { useOrdinal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypeSequences")) { useEntityTypeSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsDateRange")) { useIsDateRange = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dehyphenateNGrams")) { dehyphenateNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowerNewgeneThreshold")) { lowerNewgeneThreshold = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrev")) { usePrev = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNext")) { useNext = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTags")) { useTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordPairs")) { useWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGazettes")) { useGazettes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wordShape")) { wordShape = WordShapeClassifier.lookupShaper(val); } else if (key.equalsIgnoreCase("useShapeStrings")) { useShapeStrings = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGoodForNamesCpC")) { useGoodForNamesCpC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions")) { useDictionaryConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions3")) { useDictionaryConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("expandMidDot")) { expandMidDot = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSequences")) { useSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevSequences")) { usePrevSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextSequences")) { useNextSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLongSequences")) { useLongSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBoundarySequences")) { useBoundarySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequences")) { useTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExtraTaggySequences")) { useExtraTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequencesShapeInteraction")) { useTaggySequencesShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyZeroethOrder")) { strictlyZeroethOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyFirstOrder")) { strictlyFirstOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlySecondOrder")) { strictlySecondOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyThirdOrder")) { strictlyThirdOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dontExtendTaggy")) { dontExtendTaggy = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("entitySubclassification")) { entitySubclassification = val; } else if (key.equalsIgnoreCase("useGazettePhrases")) { useGazettePhrases = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("phraseGazettes")) { StringTokenizer st = new StringTokenizer(val, " ,;\t"); if (phraseGazettes == null) { phraseGazettes = new ArrayList(); } while (st.hasMoreTokens()) { phraseGazettes.add(st.nextToken()); } } else if (key.equalsIgnoreCase("useSum")) { useSum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verboseMode")) { verboseMode = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tolerance")) { tolerance = Double.parseDouble(val); } else if (key.equalsIgnoreCase("maxIterations")) { maxIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("exportFeatures")) { exportFeatures = val; } else if (key.equalsIgnoreCase("printFeatures")) { printFeatures = val; } else if (key.equalsIgnoreCase("printFeaturesUpto")) { printFeaturesUpto = Integer.parseInt(val); } else if (key.equalsIgnoreCase("lastNameList")) { lastNameList = val; } else if (key.equalsIgnoreCase("maleNameList")) { maleNameList = val; } else if (key.equalsIgnoreCase("femaleNameList")) { femaleNameList = val; } else if (key.equalsIgnoreCase("useSymTags")) { useSymTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSymWordPairs")) { useSymWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printClassifier")) { printClassifier = val; } else if (key.equalsIgnoreCase("printClassifierParam")) { printClassifierParam = Integer.parseInt(val); } else if (key.equalsIgnoreCase("intern")) { intern = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("mergetags")) { mergeTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobtags")) { iobTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useViterbi")) { useViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("intern2")) { intern2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTest")) { selfTest = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sloppyGazette")) { sloppyGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cleanGazette")) { cleanGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("noMidNGrams")) { noMidNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverse")) { useReverse = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("retainEntitySubclassification")) { retainEntitySubclassification = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLemmas")) { useLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevNextLemmas")) { usePrevNextLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTerms")) { normalizeTerms = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTimex")) { normalizeTimex = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNB")) { useNB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useParenMatching")) { useParenMatching = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs")) { useTypeSeqs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs2")) { useTypeSeqs2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs3")) { useTypeSeqs3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDisjunctive")) { useDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("disjunctionWidth")) { disjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjunctiveShapeInteraction")) { useDisjunctiveShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWideDisjunctive")) { useWideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wideDisjunctionWidth")) { wideDisjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjShape")) { useDisjShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTitle")) { useTitle = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("booleanFeatures")) { booleanFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useClassFeature")) { useClassFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeConjunctions")) { useShapeConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordTag")) { useWordTag = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPHead")) { useNPHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPGovernor")) { useNPGovernor = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHeadGov")) { useHeadGov = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLastRealWord")) { useLastRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextRealWord")) { useNextRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOccurrencePatterns")) { useOccurrencePatterns = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeySequences")) { useTypeySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("justify")) { justify = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalize")) { normalize = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorType")) { priorType = val; } else if (key.equalsIgnoreCase("sigma")) { sigma = Double.parseDouble(val); } else if (key.equalsIgnoreCase("epsilon")) { epsilon = Double.parseDouble(val); } else if (key.equalsIgnoreCase("beamSize")) { beamSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopN")) { removeTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopNPercent")) { removeTopNPercent = Double.parseDouble(val); } else if (key.equalsIgnoreCase("randomizedRatio")) { randomizedRatio = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numTimesRemoveTopN")) { numTimesRemoveTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxLeft")) { maxLeft = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxRight")) { maxRight = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxNGramLeng")) { maxNGramLeng = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useGazFeatures")) { useGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAltGazFeatures")) { useAltGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreGazFeatures")) { useMoreGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr")) { useAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr")) { useMinimalAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr1")) { useAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr1")) { useMinimalAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("documentReader")) { System.err.println("You are using an outdated flag: -documentReader " + val); System.err.println("Please use -readerAndWriter instead."); } else if (key.equalsIgnoreCase("deleteBlankLines")) { deleteBlankLines = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("answerFile")) { answerFile = val; } else if (key.equalsIgnoreCase("altAnswerFile")) { altAnswerFile = val; } else if (key.equalsIgnoreCase("loadClassifier")) { loadClassifier = val; } else if (key.equalsIgnoreCase("loadTextClassifier")) { loadTextClassifier = val; } else if (key.equalsIgnoreCase("loadJarClassifier")) { loadJarClassifier = val; } else if (key.equalsIgnoreCase("loadAuxClassifier")) { loadAuxClassifier = val; } else if (key.equalsIgnoreCase("serializeTo")) { serializeTo = val; } else if (key.equalsIgnoreCase("serializeToText")) { serializeToText = val; } else if (key.equalsIgnoreCase("serializeDatasetsDir")) { serializeDatasetsDir = val; } else if (key.equalsIgnoreCase("loadDatasetsDir")) { loadDatasetsDir = val; } else if (key.equalsIgnoreCase("pushDir")) { pushDir = val; } else if (key.equalsIgnoreCase("purgeDatasets")) { purgeDatasets = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepOBInMemory")) { keepOBInMemory = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("fakeDataset")) { fakeDataset = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("numDatasetsPerFile")) { numDatasetsPerFile = Integer.parseInt(val); } else if (key.equalsIgnoreCase("trainFile")) { trainFile = val; } else if (key.equalsIgnoreCase("biasedTrainFile")) { biasedTrainFile = val; } else if (key.equalsIgnoreCase("classBias")) { classBias = val; } else if (key.equalsIgnoreCase("confusionMatrix")) { confusionMatrix = val; } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("devFile")) { devFile = val; } else if (key.equalsIgnoreCase("testFile")) { testFile = val; } else if (key.equalsIgnoreCase("outputFile")) { outputFile = val; } else if (key.equalsIgnoreCase("textFile")) { textFile = val; } else if (key.equalsIgnoreCase("readStdin")) { readStdin = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initialWeights")) { initialWeights = val; } else if (key.equalsIgnoreCase("interimOutputFreq")) { interimOutputFreq = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inputEncoding")) { inputEncoding = val; } else if (key.equalsIgnoreCase("outputEncoding")) { outputEncoding = val; } else if (key.equalsIgnoreCase("gazette")) { useGazettes = true; StringTokenizer st = new StringTokenizer(val, " ,;\t"); if (gazettes == null) { gazettes = new ArrayList(); } // for after deserialization, as gazettes is transient while (st.hasMoreTokens()) { gazettes.add(st.nextToken()); } } else if (key.equalsIgnoreCase("useQN")) { useQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("QNsize")) { QNsize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNsize2")) { QNsize2 = Integer.parseInt(val); } else if (key.equalsIgnoreCase("l1reg")) { useQN = false; l1reg = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useFloat")) { useFloat = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainMap")) { System.err.println("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("testMap")) { System.err.println("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("map")) { map = val; } else if (key.equalsIgnoreCase("useMoreAbbr")) { useMoreAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevVB")) { usePrevVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextVB")) { useNextVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useVB")) { if (Boolean.parseBoolean(val)) { useVB = true; usePrevVB = true; useNextVB = true; } } else if (key.equalsIgnoreCase("useChunks")) { useChunks = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChunkySequences")) { useChunkySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("greekifyNGrams")) { greekifyNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictTransitionsTimit")) { restrictTransitionsTimit = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreTags")) { useMoreTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBeginSent")) { useBeginSent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePosition")) { usePosition = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGenia")) { useGENIA = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstr")) { useABSTR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWeb")) { useWEB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAnte")) { useANTE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAcr")) { useACR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTok")) { useTOK = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbgene")) { useABGENE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreqDict")) { useABSTRFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreq")) { useABSTRFreq = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFreq")) { useFREQ = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usewebfreqdict")) { useWEBFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("bioSubmitOutput")) { bioSubmitOutput = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("subCWGaz")) { subCWGaz = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitOnHead")) { splitOnHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureCountThreshold")) { featureCountThreshold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useWord")) { useWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("memoryThrift")) { memoryThrift = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("timitDatum")) { timitDatum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitDocuments")) { splitDocuments = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureWeightThreshold")) { featureWeightThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("backgroundSymbol")) { backgroundSymbol = val; } else if (key.equalsIgnoreCase("featureFactory")) { featureFactory = val; if (featureFactory.equalsIgnoreCase("SuperSimpleFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.SuperSimpleFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("NERFeatureFactory")) { featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("GazNERFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.GazNERFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("IncludeAllFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.IncludeAllFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("PhraseFeatureFactory")) { featureFactory = "edu.stanford.nlp.article.extraction.PhraseFeatureFactory"; } } else if (key.equalsIgnoreCase("printXML")) { printXML = Boolean.parseBoolean(val); // todo: This appears unused now. // Was it replaced by // outputFormat? } else if (key.equalsIgnoreCase("useSeenFeaturesOnly")) { useSeenFeaturesOnly = Boolean.parseBoolean(val); // chinese word-segmenter features } else if (key.equalsIgnoreCase("useRadical")) { useRadical = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBigramInTwoClique")) { useBigramInTwoClique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverseAffix")) { useReverseAffix = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("charHalfWindow")) { charHalfWindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("purgeFeatures")) { purgeFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("ocrFold")) { ocrFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("morphFeatureFile")) { morphFeatureFile = val; } else if (key.equalsIgnoreCase("svmModelFile")) { svmModelFile = val; /* Dictionary */ } else if (key.equalsIgnoreCase("useDictleng")) { useDictleng = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDict2")) { useDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOutDict2")) { useOutDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outDict2")) { outDict2 = val; } else if (key.equalsIgnoreCase("useDictCTB2")) { useDictCTB2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictASBC2")) { useDictASBC2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictPK2")) { useDictPK2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictHK2")) { useDictHK2 = Boolean.parseBoolean(val); /* N-gram flags */ } else if (key.equalsIgnoreCase("useWord1")) { useWord1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord2")) { useWord2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord3")) { useWord3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord4")) { useWord4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad1")) { useRad1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2")) { useRad2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2b")) { useRad2b = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordn")) { useWordn = Boolean.parseBoolean(val); /* affix flags */ } else if (key.equalsIgnoreCase("useCTBPre1")) { useCTBPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCTBSuf1")) { useCTBSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCPre1")) { useASBCPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCSuf1")) { useASBCSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKPre1")) { useHKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKSuf1")) { useHKSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKPre1")) { usePKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKSuf1")) { usePKSuf1 = Boolean.parseBoolean(val); /* POS flags */ } else if (key.equalsIgnoreCase("useCTBChar2")) { useCTBChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrediction")) { usePrediction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCChar2")) { useASBCChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKChar2")) { useHKChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKChar2")) { usePKChar2 = Boolean.parseBoolean(val); /* Rule flag */ } else if (key.equalsIgnoreCase("useRule2")) { useRule2 = Boolean.parseBoolean(val); /* ASBC and HK */ } else if (key.equalsIgnoreCase("useBig5")) { useBig5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict2")) { useNegDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict3")) { useNegDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict4")) { useNegDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict2")) { useNegCTBDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict3")) { useNegCTBDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict4")) { useNegCTBDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict2")) { useNegASBCDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict3")) { useNegASBCDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict4")) { useNegASBCDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict2")) { useNegPKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict3")) { useNegPKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict4")) { useNegPKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict2")) { useNegHKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict3")) { useNegHKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict4")) { useNegHKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePre")) { usePre = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSuf")) { useSuf = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRule")) { useRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAs")) { useAs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePk")) { usePk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHk")) { useHk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMsr")) { useMsr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMSRChar2")) { useMSRChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC4gram")) { useFeaturesC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC5gram")) { useFeaturesC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC6gram")) { useFeaturesC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC4gram")) { useFeaturesCpC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC5gram")) { useFeaturesCpC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC6gram")) { useFeaturesCpC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType")) { useUnicodeType = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeBlock")) { useUnicodeBlock = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType4gram")) { useUnicodeType4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType5gram")) { useUnicodeType5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings1")) { useShapeStrings1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings3")) { useShapeStrings3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings4")) { useShapeStrings4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings5")) { useShapeStrings5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions2")) { useWordUTypeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions3")) { useWordUTypeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions2")) { useWordShapeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions3")) { useWordShapeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMidDotShape")) { useMidDotShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("augmentedDateChars")) { augmentedDateChars = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("suppressMidDotPostprocessing")) { suppressMidDotPostprocessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printNR")) { printNR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("use4Clique")) { use4Clique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFilter")) { useFilter = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("largeChSegFile")) { largeChSegFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepEnglishWhitespaces")) { keepEnglishWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepAllWhitespaces")) { keepAllWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanPostProcessing")) { sighanPostProcessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChPos")) { useChPos = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanCorporaDict")) { sighanCorporaDict = val; // end chinese word-segmenter features } else if (key.equalsIgnoreCase("useObservedSequencesOnly")) { useObservedSequencesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("maxDocSize")) { maxDocSize = Integer.parseInt(val); splitDocuments = true; } else if (key.equalsIgnoreCase("printProbs")) { printProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printFirstOrderProbs")) { printFirstOrderProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("saveFeatureIndexToDisk")) { saveFeatureIndexToDisk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("removeBackgroundSingletonFeatures")) { removeBackgroundSingletonFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doGibbs")) { doGibbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNERPrior")) { useNERPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAcqPrior")) { useAcqPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSemPrior")) { useSemPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMUCFeatures")) { useMUCFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initViterbi")) { initViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("checkNameList")) { checkNameList = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFirstWord")) { useFirstWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnknown")) { useUnknown = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cacheNGrams")) { cacheNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNumberFeature")) { useNumberFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("annealingRate")) { annealingRate = Double.parseDouble(val); } else if (key.equalsIgnoreCase("annealingType")) { if (val.equalsIgnoreCase("linear") || val.equalsIgnoreCase("exp") || val.equalsIgnoreCase("exponential")) { annealingType = val; } else { System.err.println("unknown annealingType: " + val + ". Please use linear|exp|exponential"); } } else if (key.equalsIgnoreCase("numSamples")) { numSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inferenceType")) { inferenceType = val; } else if (key.equalsIgnoreCase("loadProcessedData")) { loadProcessedData = val; } else if (key.equalsIgnoreCase("normalizationTable")) { normalizationTable = val; } else if (key.equalsIgnoreCase("dictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { dictionary = val; } else { dictionary = null; } } else if (key.equalsIgnoreCase("serDictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { serializedDictionary = val; } else { serializedDictionary = null; } } else if (key.equalsIgnoreCase("dictionary2")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && !"true".equals(val) && !"null".equals(val) && !"false".equals("val")) { dictionary2 = val; } else { dictionary2 = null; } } else if (key.equalsIgnoreCase("normTableEncoding")) { normTableEncoding = val; } else if (key.equalsIgnoreCase("useLemmaAsWord")) { useLemmaAsWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("type")) { type = val; } else if (key.equalsIgnoreCase("readerAndWriter")) { readerAndWriter = val; } else if (key.equalsIgnoreCase("plainTextDocumentReaderAndWriter")) { plainTextDocumentReaderAndWriter = val; } else if (key.equalsIgnoreCase("gazFilesFile")) { gazFilesFile = val; } else if (key.equalsIgnoreCase("baseTrainDir")) { baseTrainDir = val; } else if (key.equalsIgnoreCase("baseTestDir")) { baseTestDir = val; } else if (key.equalsIgnoreCase("trainFiles")) { trainFiles = val; } else if (key.equalsIgnoreCase("trainFileList")) { trainFileList = val; } else if (key.equalsIgnoreCase("trainDirs")) { trainDirs = val; } else if (key.equalsIgnoreCase("testDirs")) { testDirs = val; } else if (key.equalsIgnoreCase("testFiles")) { testFiles = val; } else if (key.equalsIgnoreCase("usePrediction2")) { usePrediction2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useObservedFeaturesOnly")) { useObservedFeaturesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobWrapper")) { iobWrapper = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDistSim")) { useDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("casedDistSim")) { casedDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("distSimFileFormat")) { distSimFileFormat = val; } else if (key.equalsIgnoreCase("distSimMaxBits")) { distSimMaxBits = Integer.parseInt(val); } else if (key.equalsIgnoreCase("numberEquivalenceDistSim")) { numberEquivalenceDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("unknownWordDistSimClass")) { unknownWordDistSimClass = val; } else if (key.equalsIgnoreCase("useOnlySeenWeights")) { useOnlySeenWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("predProp")) { predProp = val; } else if (key.equalsIgnoreCase("distSimLexicon")) { distSimLexicon = val; } else if (key.equalsIgnoreCase("useSegmentation")) { useSegmentation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInternal")) { useInternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExternal")) { useExternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideWord")) { useEitherSideWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideDisjunctive")) { useEitherSideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureDiffThresh")) { featureDiffThresh = Double.parseDouble(val); if (props.getProperty("numTimesPruneFeatures") == null) { numTimesPruneFeatures = 1; } } else if (key.equalsIgnoreCase("numTimesPruneFeatures")) { numTimesPruneFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("newgeneThreshold")) { newgeneThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("doAdaptation")) { doAdaptation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTrainFile")) { selfTrainFile = val; } else if (key.equalsIgnoreCase("selfTrainIterations")) { selfTrainIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainWindowSize")) { selfTrainWindowSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainConfidenceThreshold")) { selfTrainConfidenceThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numFolds")) { numFolds = Integer.parseInt(val); } else if (key.equalsIgnoreCase("startFold")) { startFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("endFold")) { endFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("adaptSigma")) { adaptSigma = Double.parseDouble(val); } else if (key.startsWith("prop") && !key.equals("prop")) { comboProps.add(val); } else if (key.equalsIgnoreCase("outputFormat")) { outputFormat = val; } else if (key.equalsIgnoreCase("useSMD")) { useSMD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useScaledSGD")) { useScaledSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("scaledSGDMethod")) { scaledSGDMethod = Integer.parseInt(val); } else if (key.equalsIgnoreCase("tuneSGD")) { tuneSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("StochasticCalculateMethod")) { if (val.equalsIgnoreCase("AlgorithmicDifferentiation")) { stochasticMethod = StochasticCalculateMethods.AlgorithmicDifferentiation; } else if (val.equalsIgnoreCase("IncorporatedFiniteDifference")) { stochasticMethod = StochasticCalculateMethods.IncorporatedFiniteDifference; } else if (val.equalsIgnoreCase("ExternalFinitedifference")) { stochasticMethod = StochasticCalculateMethods.ExternalFiniteDifference; } } else if (key.equalsIgnoreCase("initialGain")) { initialGain = Double.parseDouble(val); } else if (key.equalsIgnoreCase("stochasticBatchSize")) { stochasticBatchSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("SGD2QNhessSamples")) { SGD2QNhessSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useSGD")) { useSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInPlaceSGD")) { useInPlaceSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSGDtoQN")) { useSGDtoQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("SGDPasses")) { SGDPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNPasses")) { QNPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("gainSGD")) { gainSGD = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useHybrid")) { useHybrid = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("hybridCutoffIteration")) { hybridCutoffIteration = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useStochasticQN")) { useStochasticQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outputIterationsToFile")) { outputIterationsToFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testObjFunction")) { testObjFunction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testVariance")) { testVariance = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("CRForder")) { CRForder = Integer.parseInt(val); } else if (key.equalsIgnoreCase("CRFwindow")) { CRFwindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("testHessSamples")) { testHessSamples = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("estimateInitial")) { estimateInitial = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printLabelValue")) { printLabelValue = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("searchGraphPrefix")) { searchGraphPrefix = val; } else if (key.equalsIgnoreCase("searchGraphPrune")) { searchGraphPrune = Double.parseDouble(val); } else if (key.equalsIgnoreCase("kBest")) { useKBest = true; kBest = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useRobustQN")) { useRobustQN = true; } else if (key.equalsIgnoreCase("combo")) { combo = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verboseForTrueCasing")) { verboseForTrueCasing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainHierarchical")) { trainHierarchical = val; } else if (key.equalsIgnoreCase("domain")) { domain = val; } else if (key.equalsIgnoreCase("baseline")) { baseline = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doFE")) { doFE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictLabels")) { restrictLabels = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("transferSigmas")) { transferSigmas = val; } else if (key.equalsIgnoreCase("announceObjectBankEntries")) { announceObjectBankEntries = true; } else if (key.equalsIgnoreCase("usePos")) { usePos = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAgreement")) { useAgreement = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAccCase")) { useAccCase = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInna")) { useInna = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useConcord")) { useConcord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFirstNgram")) { useFirstNgram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLastNgram")) { useLastNgram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("collapseNN")) { collapseNN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTagsCpC")) { useTagsCpC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTagsCpCp2C")) { useTagsCpCp2C = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTagsCpCp2Cp3C")) { useTagsCpCp2Cp3C = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTagsCpCp2Cp3Cp4C")) { useTagsCpCp2Cp3Cp4C = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("numTags")) { numTags = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useConjBreak")) { useConjBreak = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAuxPairs")) { useAuxPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePPVBPairs")) { usePPVBPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAnnexing")) { useAnnexing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTemporalNN")) { useTemporalNN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("markProperNN")) { markProperNN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePath")) { usePath = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("markMasdar")) { markMasdar = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("innaPPAttach")) { innaPPAttach = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSVO")) { useSVO = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("mixedCaseMapFile")) { mixedCaseMapFile = val; } else if (key.equalsIgnoreCase("auxTrueCaseModels")) { auxTrueCaseModels = val; } else if (key.equalsIgnoreCase("use2W")) { use2W = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLC")) { useLC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useYetMoreCpCShapes")) { useYetMoreCpCShapes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIfInteger")) { useIfInteger = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("twoStage")) { twoStage = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("evaluateIters")) { evaluateIters = Integer.parseInt(val); } else if (key.equalsIgnoreCase("evalCmd")) { evalCmd = val; } else if (key.equalsIgnoreCase("evaluateTrain")) { evaluateTrain = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("evaluateIOB")) { evaluateIOB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tuneSampleSize")) { tuneSampleSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useTopics")) { useTopics = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseFeatures")) { usePhraseFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWords")) { usePhraseWords = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWordTags")) { usePhraseWordTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePhraseWordSpecialTags")) { usePhraseWordSpecialTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useProtoFeatures")) { useProtoFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordnetFeatures")) { useWordnetFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wikiFeatureDbFile")) { wikiFeatureDbFile = val; } else if (key.equalsIgnoreCase("tokenizerOptions")) { tokenizerOptions = val; } else if (key.equalsIgnoreCase("useCommonWordsFeature")) { useCommonWordsFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useYear")) { useYear = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSentenceNumber")) { useSentenceNumber = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLabelSource")) { useLabelSource = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tokenFactory")) { tokenFactory = val; } else if (key.equalsIgnoreCase("tokensAnnotationClassName")) { tokensAnnotationClassName = val; } else if (key.equalsIgnoreCase("useLVMorphoAnalyzer")) { useLVMorphoAnalyzer = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("LVMorphoAnalyzerTag")) { lvMorphoAnalyzerTag = val; } else if (key.equalsIgnoreCase("useLVMorphoAnalyzerPOS")) { useLVMorphoAnalyzerPOS = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLVMorphoAnalyzerTag")) { useLVMorphoAnalyzerTag = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLVMorphoAnalyzerPrev")) { useLVMorphoAnalyzerPrev = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLVMorphoAnalyzerNext")) { useLVMorphoAnalyzerNext = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLVMorphoAnalyzerItemIDs")) { useLVMorphoAnalyzerItemIDs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMorphologyFeatures")) { useMorphologyFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMorphoCase")) { useMorphoCase = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMorphoPOS")) { useMorphoPOS = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMorphoLetaLemma")) { useMorphoLetaLemma = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMorphoNumber")) { useMorphoNumber = Boolean.parseBoolean(val); // ADD VALUE ABOVE HERE } else if (key.length() > 0 && !key.equals("prop")) { System.err.println("Unknown property: |" + key + '|'); } } if (startFold > numFolds) { System.err.println("startFold > numFolds -> setting startFold to 1"); startFold = 1; } if (endFold > numFolds) { System.err.println("endFold > numFolds -> setting to numFolds"); endFold = numFolds; } if (combo) { splitDocuments = false; } stringRep = sb.toString(); } // end setProperties() /** * Print the properties specified by this object. * * @return A String describing the properties specified by this object. */ @Override public String toString() { return stringRep; } /** * note that this does *not* return string representation of arrays, lists and * enums * * @return * @throws IllegalAccessException * @throws IllegalArgumentException */ public String getNotNullTrueStringRep() { try { String rep = ""; String joiner = "\n"; Field[] f = this.getClass().getFields(); for (Field ff : f) { String name = ff.getName(); Class type = ff.getType(); if (type.equals(Boolean.class) || type.equals(boolean.class)) { boolean val = ff.getBoolean(this); if (val == true) rep += joiner + name + "=" + val; } else if (type.equals(String.class)) { String val = (String) ff.get(this); if (val != null) rep += joiner + name + "=" + val; } else if (type.equals(Double.class)) { Double val = (Double) ff.get(this); rep += joiner + name + "=" + val; } else if (type.equals(double.class)) { double val = ff.getDouble(this); rep += joiner + name + "=" + val; } else if (type.equals(Integer.class)) { Integer val = (Integer) ff.get(this); rep += joiner + name + "=" + val; } else if (type.equals(int.class)) { int val = ff.getInt(this); rep += joiner + name + "=" + val; } else if (type.equals(Float.class)) { Float val = (Float) ff.get(this); rep += joiner + name + "=" + val; } else if (type.equals(float.class)) { float val = ff.getFloat(this); rep += joiner + name + "=" + val; } else if (type.equals(Byte.class)) { Byte val = (Byte) ff.get(this); rep += joiner + name + "=" + val; } else if (type.equals(byte.class)) { byte val = ff.getByte(this); rep += joiner + name + "=" + val; } else if (type.equals(char.class)) { char val = ff.getChar(this); rep += joiner + name + "=" + val; } else if (type.equals(Long.class)) { Long val = (Long) ff.get(this); rep += joiner + name + "=" + val; } else if (type.equals(long.class)) { long val = ff.getLong(this); rep += joiner + name + "=" + val; } } return rep; } catch (Exception e) { e.printStackTrace(); } return null; } } // end class SeqClassifierFlags




© 2015 - 2024 Weber Informatics LLC | Privacy Policy