All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sentencepiece.SentencepieceModel Maven / Gradle / Ivy

The newest version!
// Generated by the protocol buffer compiler.  DO NOT EDIT!
// source: sentencepiece_model.proto

// Protobuf Java Version: 3.25.5
package sentencepiece;

public final class SentencepieceModel {
  private SentencepieceModel() {}
  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistryLite registry) {
  }

  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistry registry) {
    registerAllExtensions(
        (com.google.protobuf.ExtensionRegistryLite) registry);
  }
  public interface TrainerSpecOrBuilder extends
      // @@protoc_insertion_point(interface_extends:sentencepiece.TrainerSpec)
      com.google.protobuf.GeneratedMessageV3.
          ExtendableMessageOrBuilder {

    /**
     * 
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @return A list containing the input. */ java.util.List getInputList(); /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @return The count of input. */ int getInputCount(); /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @param index The index of the element to return. * @return The input at the given index. */ java.lang.String getInput(int index); /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @param index The index of the value to return. * @return The bytes of the input at the given index. */ com.google.protobuf.ByteString getInputBytes(int index); /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return Whether the inputFormat field is set. */ boolean hasInputFormat(); /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return The inputFormat. */ java.lang.String getInputFormat(); /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return The bytes for inputFormat. */ com.google.protobuf.ByteString getInputFormatBytes(); /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return Whether the modelPrefix field is set. */ boolean hasModelPrefix(); /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return The modelPrefix. */ java.lang.String getModelPrefix(); /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return The bytes for modelPrefix. */ com.google.protobuf.ByteString getModelPrefixBytes(); /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return Whether the modelType field is set. */ boolean hasModelType(); /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return The modelType. */ sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType(); /** *
     * Vocabulary size. 8k is the default size.
     * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return Whether the vocabSize field is set. */ boolean hasVocabSize(); /** *
     * Vocabulary size. 8k is the default size.
     * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return The vocabSize. */ int getVocabSize(); /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @return A list containing the acceptLanguage. */ java.util.List getAcceptLanguageList(); /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @return The count of acceptLanguage. */ int getAcceptLanguageCount(); /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @param index The index of the element to return. * @return The acceptLanguage at the given index. */ java.lang.String getAcceptLanguage(int index); /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @param index The index of the value to return. * @return The bytes of the acceptLanguage at the given index. */ com.google.protobuf.ByteString getAcceptLanguageBytes(int index); /** *
     * Size of self-test samples, which are encoded in the model file.
     * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return Whether the selfTestSampleSize field is set. */ boolean hasSelfTestSampleSize(); /** *
     * Size of self-test samples, which are encoded in the model file.
     * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return The selfTestSampleSize. */ int getSelfTestSampleSize(); /** *
     */////////////////////////////////////////////////////////////////
     * Training parameters.
     *
     * Uses characters which cover the corpus with the ratio of `chars_coverage`.
     * This parameter determines the set of basic Alphabet of sentence piece.
     * 1.0 - `chars_coverage` characters are treated as UNK.
     * See also required_chars field.
     * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return Whether the characterCoverage field is set. */ boolean hasCharacterCoverage(); /** *
     */////////////////////////////////////////////////////////////////
     * Training parameters.
     *
     * Uses characters which cover the corpus with the ratio of `chars_coverage`.
     * This parameter determines the set of basic Alphabet of sentence piece.
     * 1.0 - `chars_coverage` characters are treated as UNK.
     * See also required_chars field.
     * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return The characterCoverage. */ float getCharacterCoverage(); /** *
     * Maximum size of sentences the trainer loads from `input` parameter.
     * Trainer simply loads the `input` files in sequence.
     * It is better to shuffle the input corpus randomly.
     * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return Whether the inputSentenceSize field is set. */ boolean hasInputSentenceSize(); /** *
     * Maximum size of sentences the trainer loads from `input` parameter.
     * Trainer simply loads the `input` files in sequence.
     * It is better to shuffle the input corpus randomly.
     * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return The inputSentenceSize. */ long getInputSentenceSize(); /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return Whether the shuffleInputSentence field is set. */ boolean hasShuffleInputSentence(); /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return The shuffleInputSentence. */ boolean getShuffleInputSentence(); /** *
     * Maximum size of sentences to make seed sentence pieces.
     * Extended suffix array is constructed to extract frequent
     * sub-strings from the corpus. This uses 20N working space,
     * where N is the size of corpus.
     * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return Whether the miningSentenceSize field is set. */ @java.lang.Deprecated boolean hasMiningSentenceSize(); /** *
     * Maximum size of sentences to make seed sentence pieces.
     * Extended suffix array is constructed to extract frequent
     * sub-strings from the corpus. This uses 20N working space,
     * where N is the size of corpus.
     * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return The miningSentenceSize. */ @java.lang.Deprecated int getMiningSentenceSize(); /** *
     * Maximum size of sentences to train sentence pieces.
     * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return Whether the trainingSentenceSize field is set. */ @java.lang.Deprecated boolean hasTrainingSentenceSize(); /** *
     * Maximum size of sentences to train sentence pieces.
     * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return The trainingSentenceSize. */ @java.lang.Deprecated int getTrainingSentenceSize(); /** *
     * The size of seed sentencepieces.
     * `seed_sentencepiece_size` must be larger than `vocab_size`.
     * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return Whether the seedSentencepieceSize field is set. */ boolean hasSeedSentencepieceSize(); /** *
     * The size of seed sentencepieces.
     * `seed_sentencepiece_size` must be larger than `vocab_size`.
     * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return The seedSentencepieceSize. */ int getSeedSentencepieceSize(); /** *
     * In every EM sub-iterations, keeps top
     * `shrinking_factor` * `current sentencepieces size` with respect to
     * the loss of the sentence piece. This value should be smaller than 1.0.
     * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return Whether the shrinkingFactor field is set. */ boolean hasShrinkingFactor(); /** *
     * In every EM sub-iterations, keeps top
     * `shrinking_factor` * `current sentencepieces size` with respect to
     * the loss of the sentence piece. This value should be smaller than 1.0.
     * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return The shrinkingFactor. */ float getShrinkingFactor(); /** *
     * The maximum sentence length in byte. The sentences with the length
     * larger than `max_sentence_length` is simply ignored.
     * Longer input tends to bring the following risks:
     *  * Overflow during EM training (unigram language model only)
     *  * Performance drop because of O(n log n) cost in BPE.
     * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return Whether the maxSentenceLength field is set. */ boolean hasMaxSentenceLength(); /** *
     * The maximum sentence length in byte. The sentences with the length
     * larger than `max_sentence_length` is simply ignored.
     * Longer input tends to bring the following risks:
     *  * Overflow during EM training (unigram language model only)
     *  * Performance drop because of O(n log n) cost in BPE.
     * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return The maxSentenceLength. */ int getMaxSentenceLength(); /** *
     * Number of threads in the training.
     * 
* * optional int32 num_threads = 16 [default = 16]; * @return Whether the numThreads field is set. */ boolean hasNumThreads(); /** *
     * Number of threads in the training.
     * 
* * optional int32 num_threads = 16 [default = 16]; * @return The numThreads. */ int getNumThreads(); /** *
     * Number of EM sub iterations.
     * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return Whether the numSubIterations field is set. */ boolean hasNumSubIterations(); /** *
     * Number of EM sub iterations.
     * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return The numSubIterations. */ int getNumSubIterations(); /** *
     */////////////////////////////////////////////////////////////////
     * SentencePiece parameters which control the shapes of sentence piece.
     *
     * Maximum length of sentencepiece.
     * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return Whether the maxSentencepieceLength field is set. */ boolean hasMaxSentencepieceLength(); /** *
     */////////////////////////////////////////////////////////////////
     * SentencePiece parameters which control the shapes of sentence piece.
     *
     * Maximum length of sentencepiece.
     * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return The maxSentencepieceLength. */ int getMaxSentencepieceLength(); /** *
     * Uses Unicode script to split sentence pieces.
     * When `split_by_unicode_script` is true, we do not allow sentence piece to
     * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
     * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
     * as one script type, since Japanese word can consist of multiple scripts.
     * This exception is always applied regardless of the accept-language
     * parameter.
     * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return Whether the splitByUnicodeScript field is set. */ boolean hasSplitByUnicodeScript(); /** *
     * Uses Unicode script to split sentence pieces.
     * When `split_by_unicode_script` is true, we do not allow sentence piece to
     * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
     * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
     * as one script type, since Japanese word can consist of multiple scripts.
     * This exception is always applied regardless of the accept-language
     * parameter.
     * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return The splitByUnicodeScript. */ boolean getSplitByUnicodeScript(); /** *
     * When `split_by_number` is true, put a boundary between number and
     * non-number transition. If we want to treat "F1" is one token, set this flag
     * to be false.
     * 
* * optional bool split_by_number = 23 [default = true]; * @return Whether the splitByNumber field is set. */ boolean hasSplitByNumber(); /** *
     * When `split_by_number` is true, put a boundary between number and
     * non-number transition. If we want to treat "F1" is one token, set this flag
     * to be false.
     * 
* * optional bool split_by_number = 23 [default = true]; * @return The splitByNumber. */ boolean getSplitByNumber(); /** *
     * Use a white space to split sentence pieces.
     * When `split_by_whitespace` is false, we may have the piece containing
     * a white space in the middle. e.g., "in_the".
     * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return Whether the splitByWhitespace field is set. */ boolean hasSplitByWhitespace(); /** *
     * Use a white space to split sentence pieces.
     * When `split_by_whitespace` is false, we may have the piece containing
     * a white space in the middle. e.g., "in_the".
     * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return The splitByWhitespace. */ boolean getSplitByWhitespace(); /** *
     * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
     * hello_. When `treat_whitespace_as_suffix` is true,
     * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
     * of sentence.
     * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return Whether the treatWhitespaceAsSuffix field is set. */ boolean hasTreatWhitespaceAsSuffix(); /** *
     * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
     * hello_. When `treat_whitespace_as_suffix` is true,
     * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
     * of sentence.
     * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return The treatWhitespaceAsSuffix. */ boolean getTreatWhitespaceAsSuffix(); /** *
     * Allows pieces that only contain whitespaces instead of appearing only as
     * prefix or suffix of other pieces.
     * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return Whether the allowWhitespaceOnlyPieces field is set. */ boolean hasAllowWhitespaceOnlyPieces(); /** *
     * Allows pieces that only contain whitespaces instead of appearing only as
     * prefix or suffix of other pieces.
     * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return The allowWhitespaceOnlyPieces. */ boolean getAllowWhitespaceOnlyPieces(); /** *
     * Split all digits (0-9) into separate pieces.
     * 
* * optional bool split_digits = 25 [default = false]; * @return Whether the splitDigits field is set. */ boolean hasSplitDigits(); /** *
     * Split all digits (0-9) into separate pieces.
     * 
* * optional bool split_digits = 25 [default = false]; * @return The splitDigits. */ boolean getSplitDigits(); /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @return A list containing the controlSymbols. */ java.util.List getControlSymbolsList(); /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @return The count of controlSymbols. */ int getControlSymbolsCount(); /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @param index The index of the element to return. * @return The controlSymbols at the given index. */ java.lang.String getControlSymbols(int index); /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @param index The index of the value to return. * @return The bytes of the controlSymbols at the given index. */ com.google.protobuf.ByteString getControlSymbolsBytes(int index); /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @return A list containing the userDefinedSymbols. */ java.util.List getUserDefinedSymbolsList(); /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @return The count of userDefinedSymbols. */ int getUserDefinedSymbolsCount(); /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @param index The index of the element to return. * @return The userDefinedSymbols at the given index. */ java.lang.String getUserDefinedSymbols(int index); /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @param index The index of the value to return. * @return The bytes of the userDefinedSymbols at the given index. */ com.google.protobuf.ByteString getUserDefinedSymbolsBytes(int index); /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return Whether the requiredChars field is set. */ boolean hasRequiredChars(); /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return The requiredChars. */ java.lang.String getRequiredChars(); /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return The bytes for requiredChars. */ com.google.protobuf.ByteString getRequiredCharsBytes(); /** *
     * Decomposes unknown pieces into UTF-8 bytes.
     * 
* * optional bool byte_fallback = 35 [default = false]; * @return Whether the byteFallback field is set. */ boolean hasByteFallback(); /** *
     * Decomposes unknown pieces into UTF-8 bytes.
     * 
* * optional bool byte_fallback = 35 [default = false]; * @return The byteFallback. */ boolean getByteFallback(); /** *
     * When creating the vocabulary file, defines whether or not to additionally
     * output the score for each piece.
     * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return Whether the vocabularyOutputPieceScore field is set. */ boolean hasVocabularyOutputPieceScore(); /** *
     * When creating the vocabulary file, defines whether or not to additionally
     * output the score for each piece.
     * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return The vocabularyOutputPieceScore. */ boolean getVocabularyOutputPieceScore(); /** *
     * `vocab_size` is treated as hard limit. Crash if
     * the model can not produce the vocab of size `vocab_size`,
     * When `hard_vocab_limit` is false, vocab_size is treated
     * as soft limit. Note that when model_type=char,
     * always assumes hard_vocab_limit = false.
     * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return Whether the hardVocabLimit field is set. */ boolean hasHardVocabLimit(); /** *
     * `vocab_size` is treated as hard limit. Crash if
     * the model can not produce the vocab of size `vocab_size`,
     * When `hard_vocab_limit` is false, vocab_size is treated
     * as soft limit. Note that when model_type=char,
     * always assumes hard_vocab_limit = false.
     * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return The hardVocabLimit. */ boolean getHardVocabLimit(); /** *
     * use all symbols for vocab extraction. This flag is valid
     * if model type is either CHAR or WORD
     * 
* * optional bool use_all_vocab = 34 [default = false]; * @return Whether the useAllVocab field is set. */ boolean hasUseAllVocab(); /** *
     * use all symbols for vocab extraction. This flag is valid
     * if model type is either CHAR or WORD
     * 
* * optional bool use_all_vocab = 34 [default = false]; * @return The useAllVocab. */ boolean getUseAllVocab(); /** *
     */////////////////////////////////////////////////////////////////
     * Reserved special meta tokens.
     * * -1 is not used.
     * * unk_id must not be -1.
     * Id must starts with 0 and be contigous.
     * 
* * optional int32 unk_id = 40 [default = 0]; * @return Whether the unkId field is set. */ boolean hasUnkId(); /** *
     */////////////////////////////////////////////////////////////////
     * Reserved special meta tokens.
     * * -1 is not used.
     * * unk_id must not be -1.
     * Id must starts with 0 and be contigous.
     * 
* * optional int32 unk_id = 40 [default = 0]; * @return The unkId. */ int getUnkId(); /** *
     * <s>
     * 
* * optional int32 bos_id = 41 [default = 1]; * @return Whether the bosId field is set. */ boolean hasBosId(); /** *
     * <s>
     * 
* * optional int32 bos_id = 41 [default = 1]; * @return The bosId. */ int getBosId(); /** *
     * </s>
     * 
* * optional int32 eos_id = 42 [default = 2]; * @return Whether the eosId field is set. */ boolean hasEosId(); /** *
     * </s>
     * 
* * optional int32 eos_id = 42 [default = 2]; * @return The eosId. */ int getEosId(); /** *
     * <pad> (padding)
     * 
* * optional int32 pad_id = 43 [default = -1]; * @return Whether the padId field is set. */ boolean hasPadId(); /** *
     * <pad> (padding)
     * 
* * optional int32 pad_id = 43 [default = -1]; * @return The padId. */ int getPadId(); /** * optional string unk_piece = 45 [default = "<unk>"]; * @return Whether the unkPiece field is set. */ boolean hasUnkPiece(); /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The unkPiece. */ java.lang.String getUnkPiece(); /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The bytes for unkPiece. */ com.google.protobuf.ByteString getUnkPieceBytes(); /** * optional string bos_piece = 46 [default = "<s>"]; * @return Whether the bosPiece field is set. */ boolean hasBosPiece(); /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bosPiece. */ java.lang.String getBosPiece(); /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bytes for bosPiece. */ com.google.protobuf.ByteString getBosPieceBytes(); /** * optional string eos_piece = 47 [default = "</s>"]; * @return Whether the eosPiece field is set. */ boolean hasEosPiece(); /** * optional string eos_piece = 47 [default = "</s>"]; * @return The eosPiece. */ java.lang.String getEosPiece(); /** * optional string eos_piece = 47 [default = "</s>"]; * @return The bytes for eosPiece. */ com.google.protobuf.ByteString getEosPieceBytes(); /** * optional string pad_piece = 48 [default = "<pad>"]; * @return Whether the padPiece field is set. */ boolean hasPadPiece(); /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The padPiece. */ java.lang.String getPadPiece(); /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The bytes for padPiece. */ com.google.protobuf.ByteString getPadPieceBytes(); /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return Whether the unkSurface field is set. */ boolean hasUnkSurface(); /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The unkSurface. */ java.lang.String getUnkSurface(); /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The bytes for unkSurface. */ com.google.protobuf.ByteString getUnkSurfaceBytes(); /** *
     * Increase bit depth to allow unigram model training on large
     * (>10M sentences) corpora. A Side-effect of enabling this flag
     * is increased memory usage.
     * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return Whether the trainExtremelyLargeCorpus field is set. */ boolean hasTrainExtremelyLargeCorpus(); /** *
     * Increase bit depth to allow unigram model training on large
     * (>10M sentences) corpora. A Side-effect of enabling this flag
     * is increased memory usage.
     * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return The trainExtremelyLargeCorpus. */ boolean getTrainExtremelyLargeCorpus(); } /** *
   * TrainerSpec encodes a various parameters for SentencePiece training.
   * 
* * Protobuf type {@code sentencepiece.TrainerSpec} */ public static final class TrainerSpec extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage< TrainerSpec> implements // @@protoc_insertion_point(message_implements:sentencepiece.TrainerSpec) TrainerSpecOrBuilder { private static final long serialVersionUID = 0L; // Use TrainerSpec.newBuilder() to construct. private TrainerSpec(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) { super(builder); } private TrainerSpec() { input_ = com.google.protobuf.LazyStringArrayList.emptyList(); inputFormat_ = ""; modelPrefix_ = ""; modelType_ = 1; vocabSize_ = 8000; acceptLanguage_ = com.google.protobuf.LazyStringArrayList.emptyList(); characterCoverage_ = 0.9995F; shuffleInputSentence_ = true; seedSentencepieceSize_ = 1000000; shrinkingFactor_ = 0.75F; maxSentenceLength_ = 4192; numThreads_ = 16; numSubIterations_ = 2; maxSentencepieceLength_ = 16; splitByUnicodeScript_ = true; splitByNumber_ = true; splitByWhitespace_ = true; controlSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); userDefinedSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); requiredChars_ = ""; vocabularyOutputPieceScore_ = true; hardVocabLimit_ = true; bosId_ = 1; eosId_ = 2; padId_ = -1; unkPiece_ = ""; bosPiece_ = ""; eosPiece_ = ""; padPiece_ = ""; unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 "); } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new TrainerSpec(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.TrainerSpec.class, sentencepiece.SentencepieceModel.TrainerSpec.Builder.class); } /** *
     * Model type. only have UNIGRAM now.
     * 
* * Protobuf enum {@code sentencepiece.TrainerSpec.ModelType} */ public enum ModelType implements com.google.protobuf.ProtocolMessageEnum { /** *
       * Unigram language model with dynamic algorithm
       * 
* * UNIGRAM = 1; */ UNIGRAM(1), /** *
       * Byte Pair Encoding
       * 
* * BPE = 2; */ BPE(2), /** *
       * Delimitered by whitespace.
       * 
* * WORD = 3; */ WORD(3), /** *
       * tokenizes into character sequence
       * 
* * CHAR = 4; */ CHAR(4), ; /** *
       * Unigram language model with dynamic algorithm
       * 
* * UNIGRAM = 1; */ public static final int UNIGRAM_VALUE = 1; /** *
       * Byte Pair Encoding
       * 
* * BPE = 2; */ public static final int BPE_VALUE = 2; /** *
       * Delimitered by whitespace.
       * 
* * WORD = 3; */ public static final int WORD_VALUE = 3; /** *
       * tokenizes into character sequence
       * 
* * CHAR = 4; */ public static final int CHAR_VALUE = 4; public final int getNumber() { return value; } /** * @param value The numeric wire value of the corresponding enum entry. * @return The enum associated with the given numeric wire value. * @deprecated Use {@link #forNumber(int)} instead. */ @java.lang.Deprecated public static ModelType valueOf(int value) { return forNumber(value); } /** * @param value The numeric wire value of the corresponding enum entry. * @return The enum associated with the given numeric wire value. */ public static ModelType forNumber(int value) { switch (value) { case 1: return UNIGRAM; case 2: return BPE; case 3: return WORD; case 4: return CHAR; default: return null; } } public static com.google.protobuf.Internal.EnumLiteMap internalGetValueMap() { return internalValueMap; } private static final com.google.protobuf.Internal.EnumLiteMap< ModelType> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap() { public ModelType findValueByNumber(int number) { return ModelType.forNumber(number); } }; public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() { return getDescriptor().getValues().get(ordinal()); } public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() { return getDescriptor(); } public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() { return sentencepiece.SentencepieceModel.TrainerSpec.getDescriptor().getEnumTypes().get(0); } private static final ModelType[] VALUES = values(); public static ModelType valueOf( com.google.protobuf.Descriptors.EnumValueDescriptor desc) { if (desc.getType() != getDescriptor()) { throw new java.lang.IllegalArgumentException( "EnumValueDescriptor is not for this type."); } return VALUES[desc.getIndex()]; } private final int value; private ModelType(int value) { this.value = value; } // @@protoc_insertion_point(enum_scope:sentencepiece.TrainerSpec.ModelType) } private int bitField0_; private int bitField1_; public static final int INPUT_FIELD_NUMBER = 1; @SuppressWarnings("serial") private com.google.protobuf.LazyStringArrayList input_ = com.google.protobuf.LazyStringArrayList.emptyList(); /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @return A list containing the input. */ public com.google.protobuf.ProtocolStringList getInputList() { return input_; } /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @return The count of input. */ public int getInputCount() { return input_.size(); } /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @param index The index of the element to return. * @return The input at the given index. */ public java.lang.String getInput(int index) { return input_.get(index); } /** *
     */////////////////////////////////////////////////////////////////
     * General parameters
     *
     * Input corpus files.
     *  Trainer accepts the following two formats:
     *  A) Monolingual: plain text, one sentence per line.
     *  B) Bilingual:   TSV, source sentence <tab> target sentence
     *  When bilingual data is passed, shared vocabulary model is built.
     *  Note that the input file must be raw corpus, not a preprocessed corpus.
     *  Trainer only loads the first `input_sentence_size` sentences specified
     *  with this parameter.
     * 
* * repeated string input = 1; * @param index The index of the value to return. * @return The bytes of the input at the given index. */ public com.google.protobuf.ByteString getInputBytes(int index) { return input_.getByteString(index); } public static final int INPUT_FORMAT_FIELD_NUMBER = 7; @SuppressWarnings("serial") private volatile java.lang.Object inputFormat_ = ""; /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return Whether the inputFormat field is set. */ @java.lang.Override public boolean hasInputFormat() { return ((bitField0_ & 0x00000001) != 0); } /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return The inputFormat. */ @java.lang.Override public java.lang.String getInputFormat() { java.lang.Object ref = inputFormat_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { inputFormat_ = s; } return s; } } /** *
     * Input corpus format:
     * "text": one-sentence-per-line text format (default)
     * "tsv":  sentence <tab> freq
     * 
* * optional string input_format = 7; * @return The bytes for inputFormat. */ @java.lang.Override public com.google.protobuf.ByteString getInputFormatBytes() { java.lang.Object ref = inputFormat_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); inputFormat_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int MODEL_PREFIX_FIELD_NUMBER = 2; @SuppressWarnings("serial") private volatile java.lang.Object modelPrefix_ = ""; /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return Whether the modelPrefix field is set. */ @java.lang.Override public boolean hasModelPrefix() { return ((bitField0_ & 0x00000002) != 0); } /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return The modelPrefix. */ @java.lang.Override public java.lang.String getModelPrefix() { java.lang.Object ref = modelPrefix_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { modelPrefix_ = s; } return s; } } /** *
     * Output model file prefix.
     * <model_prefix>.model and <model_prefix>.vocab are generated.
     * 
* * optional string model_prefix = 2; * @return The bytes for modelPrefix. */ @java.lang.Override public com.google.protobuf.ByteString getModelPrefixBytes() { java.lang.Object ref = modelPrefix_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); modelPrefix_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int MODEL_TYPE_FIELD_NUMBER = 3; private int modelType_ = 1; /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return Whether the modelType field is set. */ @java.lang.Override public boolean hasModelType() { return ((bitField0_ & 0x00000004) != 0); } /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return The modelType. */ @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType() { sentencepiece.SentencepieceModel.TrainerSpec.ModelType result = sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(modelType_); return result == null ? sentencepiece.SentencepieceModel.TrainerSpec.ModelType.UNIGRAM : result; } public static final int VOCAB_SIZE_FIELD_NUMBER = 4; private int vocabSize_ = 8000; /** *
     * Vocabulary size. 8k is the default size.
     * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return Whether the vocabSize field is set. */ @java.lang.Override public boolean hasVocabSize() { return ((bitField0_ & 0x00000008) != 0); } /** *
     * Vocabulary size. 8k is the default size.
     * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return The vocabSize. */ @java.lang.Override public int getVocabSize() { return vocabSize_; } public static final int ACCEPT_LANGUAGE_FIELD_NUMBER = 5; @SuppressWarnings("serial") private com.google.protobuf.LazyStringArrayList acceptLanguage_ = com.google.protobuf.LazyStringArrayList.emptyList(); /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @return A list containing the acceptLanguage. */ public com.google.protobuf.ProtocolStringList getAcceptLanguageList() { return acceptLanguage_; } /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @return The count of acceptLanguage. */ public int getAcceptLanguageCount() { return acceptLanguage_.size(); } /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @param index The index of the element to return. * @return The acceptLanguage at the given index. */ public java.lang.String getAcceptLanguage(int index) { return acceptLanguage_.get(index); } /** *
     * List of the languages this model can accept.
     * Since the model is language-agnostic, this field is used as a reference.
     * 
* * repeated string accept_language = 5; * @param index The index of the value to return. * @return The bytes of the acceptLanguage at the given index. */ public com.google.protobuf.ByteString getAcceptLanguageBytes(int index) { return acceptLanguage_.getByteString(index); } public static final int SELF_TEST_SAMPLE_SIZE_FIELD_NUMBER = 6; private int selfTestSampleSize_ = 0; /** *
     * Size of self-test samples, which are encoded in the model file.
     * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return Whether the selfTestSampleSize field is set. */ @java.lang.Override public boolean hasSelfTestSampleSize() { return ((bitField0_ & 0x00000010) != 0); } /** *
     * Size of self-test samples, which are encoded in the model file.
     * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return The selfTestSampleSize. */ @java.lang.Override public int getSelfTestSampleSize() { return selfTestSampleSize_; } public static final int CHARACTER_COVERAGE_FIELD_NUMBER = 10; private float characterCoverage_ = 0.9995F; /** *
     */////////////////////////////////////////////////////////////////
     * Training parameters.
     *
     * Uses characters which cover the corpus with the ratio of `chars_coverage`.
     * This parameter determines the set of basic Alphabet of sentence piece.
     * 1.0 - `chars_coverage` characters are treated as UNK.
     * See also required_chars field.
     * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return Whether the characterCoverage field is set. */ @java.lang.Override public boolean hasCharacterCoverage() { return ((bitField0_ & 0x00000020) != 0); } /** *
     */////////////////////////////////////////////////////////////////
     * Training parameters.
     *
     * Uses characters which cover the corpus with the ratio of `chars_coverage`.
     * This parameter determines the set of basic Alphabet of sentence piece.
     * 1.0 - `chars_coverage` characters are treated as UNK.
     * See also required_chars field.
     * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return The characterCoverage. */ @java.lang.Override public float getCharacterCoverage() { return characterCoverage_; } public static final int INPUT_SENTENCE_SIZE_FIELD_NUMBER = 11; private long inputSentenceSize_ = 0L; /** *
     * Maximum size of sentences the trainer loads from `input` parameter.
     * Trainer simply loads the `input` files in sequence.
     * It is better to shuffle the input corpus randomly.
     * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return Whether the inputSentenceSize field is set. */ @java.lang.Override public boolean hasInputSentenceSize() { return ((bitField0_ & 0x00000040) != 0); } /** *
     * Maximum size of sentences the trainer loads from `input` parameter.
     * Trainer simply loads the `input` files in sequence.
     * It is better to shuffle the input corpus randomly.
     * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return The inputSentenceSize. */ @java.lang.Override public long getInputSentenceSize() { return inputSentenceSize_; } public static final int SHUFFLE_INPUT_SENTENCE_FIELD_NUMBER = 19; private boolean shuffleInputSentence_ = true; /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return Whether the shuffleInputSentence field is set. */ @java.lang.Override public boolean hasShuffleInputSentence() { return ((bitField0_ & 0x00000080) != 0); } /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return The shuffleInputSentence. */ @java.lang.Override public boolean getShuffleInputSentence() { return shuffleInputSentence_; } public static final int MINING_SENTENCE_SIZE_FIELD_NUMBER = 12; private int miningSentenceSize_ = 0; /** *
     * Maximum size of sentences to make seed sentence pieces.
     * Extended suffix array is constructed to extract frequent
     * sub-strings from the corpus. This uses 20N working space,
     * where N is the size of corpus.
     * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return Whether the miningSentenceSize field is set. */ @java.lang.Override @java.lang.Deprecated public boolean hasMiningSentenceSize() { return ((bitField0_ & 0x00000100) != 0); } /** *
     * Maximum size of sentences to make seed sentence pieces.
     * Extended suffix array is constructed to extract frequent
     * sub-strings from the corpus. This uses 20N working space,
     * where N is the size of corpus.
     * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return The miningSentenceSize. */ @java.lang.Override @java.lang.Deprecated public int getMiningSentenceSize() { return miningSentenceSize_; } public static final int TRAINING_SENTENCE_SIZE_FIELD_NUMBER = 13; private int trainingSentenceSize_ = 0; /** *
     * Maximum size of sentences to train sentence pieces.
     * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return Whether the trainingSentenceSize field is set. */ @java.lang.Override @java.lang.Deprecated public boolean hasTrainingSentenceSize() { return ((bitField0_ & 0x00000200) != 0); } /** *
     * Maximum size of sentences to train sentence pieces.
     * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return The trainingSentenceSize. */ @java.lang.Override @java.lang.Deprecated public int getTrainingSentenceSize() { return trainingSentenceSize_; } public static final int SEED_SENTENCEPIECE_SIZE_FIELD_NUMBER = 14; private int seedSentencepieceSize_ = 1000000; /** *
     * The size of seed sentencepieces.
     * `seed_sentencepiece_size` must be larger than `vocab_size`.
     * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return Whether the seedSentencepieceSize field is set. */ @java.lang.Override public boolean hasSeedSentencepieceSize() { return ((bitField0_ & 0x00000400) != 0); } /** *
     * The size of seed sentencepieces.
     * `seed_sentencepiece_size` must be larger than `vocab_size`.
     * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return The seedSentencepieceSize. */ @java.lang.Override public int getSeedSentencepieceSize() { return seedSentencepieceSize_; } public static final int SHRINKING_FACTOR_FIELD_NUMBER = 15; private float shrinkingFactor_ = 0.75F; /** *
     * In every EM sub-iterations, keeps top
     * `shrinking_factor` * `current sentencepieces size` with respect to
     * the loss of the sentence piece. This value should be smaller than 1.0.
     * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return Whether the shrinkingFactor field is set. */ @java.lang.Override public boolean hasShrinkingFactor() { return ((bitField0_ & 0x00000800) != 0); } /** *
     * In every EM sub-iterations, keeps top
     * `shrinking_factor` * `current sentencepieces size` with respect to
     * the loss of the sentence piece. This value should be smaller than 1.0.
     * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return The shrinkingFactor. */ @java.lang.Override public float getShrinkingFactor() { return shrinkingFactor_; } public static final int MAX_SENTENCE_LENGTH_FIELD_NUMBER = 18; private int maxSentenceLength_ = 4192; /** *
     * The maximum sentence length in byte. The sentences with the length
     * larger than `max_sentence_length` is simply ignored.
     * Longer input tends to bring the following risks:
     *  * Overflow during EM training (unigram language model only)
     *  * Performance drop because of O(n log n) cost in BPE.
     * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return Whether the maxSentenceLength field is set. */ @java.lang.Override public boolean hasMaxSentenceLength() { return ((bitField0_ & 0x00001000) != 0); } /** *
     * The maximum sentence length in byte. The sentences with the length
     * larger than `max_sentence_length` is simply ignored.
     * Longer input tends to bring the following risks:
     *  * Overflow during EM training (unigram language model only)
     *  * Performance drop because of O(n log n) cost in BPE.
     * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return The maxSentenceLength. */ @java.lang.Override public int getMaxSentenceLength() { return maxSentenceLength_; } public static final int NUM_THREADS_FIELD_NUMBER = 16; private int numThreads_ = 16; /** *
     * Number of threads in the training.
     * 
* * optional int32 num_threads = 16 [default = 16]; * @return Whether the numThreads field is set. */ @java.lang.Override public boolean hasNumThreads() { return ((bitField0_ & 0x00002000) != 0); } /** *
     * Number of threads in the training.
     * 
* * optional int32 num_threads = 16 [default = 16]; * @return The numThreads. */ @java.lang.Override public int getNumThreads() { return numThreads_; } public static final int NUM_SUB_ITERATIONS_FIELD_NUMBER = 17; private int numSubIterations_ = 2; /** *
     * Number of EM sub iterations.
     * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return Whether the numSubIterations field is set. */ @java.lang.Override public boolean hasNumSubIterations() { return ((bitField0_ & 0x00004000) != 0); } /** *
     * Number of EM sub iterations.
     * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return The numSubIterations. */ @java.lang.Override public int getNumSubIterations() { return numSubIterations_; } public static final int MAX_SENTENCEPIECE_LENGTH_FIELD_NUMBER = 20; private int maxSentencepieceLength_ = 16; /** *
     */////////////////////////////////////////////////////////////////
     * SentencePiece parameters which control the shapes of sentence piece.
     *
     * Maximum length of sentencepiece.
     * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return Whether the maxSentencepieceLength field is set. */ @java.lang.Override public boolean hasMaxSentencepieceLength() { return ((bitField0_ & 0x00008000) != 0); } /** *
     */////////////////////////////////////////////////////////////////
     * SentencePiece parameters which control the shapes of sentence piece.
     *
     * Maximum length of sentencepiece.
     * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return The maxSentencepieceLength. */ @java.lang.Override public int getMaxSentencepieceLength() { return maxSentencepieceLength_; } public static final int SPLIT_BY_UNICODE_SCRIPT_FIELD_NUMBER = 21; private boolean splitByUnicodeScript_ = true; /** *
     * Uses Unicode script to split sentence pieces.
     * When `split_by_unicode_script` is true, we do not allow sentence piece to
     * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
     * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
     * as one script type, since Japanese word can consist of multiple scripts.
     * This exception is always applied regardless of the accept-language
     * parameter.
     * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return Whether the splitByUnicodeScript field is set. */ @java.lang.Override public boolean hasSplitByUnicodeScript() { return ((bitField0_ & 0x00010000) != 0); } /** *
     * Uses Unicode script to split sentence pieces.
     * When `split_by_unicode_script` is true, we do not allow sentence piece to
     * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
     * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
     * as one script type, since Japanese word can consist of multiple scripts.
     * This exception is always applied regardless of the accept-language
     * parameter.
     * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return The splitByUnicodeScript. */ @java.lang.Override public boolean getSplitByUnicodeScript() { return splitByUnicodeScript_; } public static final int SPLIT_BY_NUMBER_FIELD_NUMBER = 23; private boolean splitByNumber_ = true; /** *
     * When `split_by_number` is true, put a boundary between number and
     * non-number transition. If we want to treat "F1" is one token, set this flag
     * to be false.
     * 
* * optional bool split_by_number = 23 [default = true]; * @return Whether the splitByNumber field is set. */ @java.lang.Override public boolean hasSplitByNumber() { return ((bitField0_ & 0x00020000) != 0); } /** *
     * When `split_by_number` is true, put a boundary between number and
     * non-number transition. If we want to treat "F1" is one token, set this flag
     * to be false.
     * 
* * optional bool split_by_number = 23 [default = true]; * @return The splitByNumber. */ @java.lang.Override public boolean getSplitByNumber() { return splitByNumber_; } public static final int SPLIT_BY_WHITESPACE_FIELD_NUMBER = 22; private boolean splitByWhitespace_ = true; /** *
     * Use a white space to split sentence pieces.
     * When `split_by_whitespace` is false, we may have the piece containing
     * a white space in the middle. e.g., "in_the".
     * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return Whether the splitByWhitespace field is set. */ @java.lang.Override public boolean hasSplitByWhitespace() { return ((bitField0_ & 0x00040000) != 0); } /** *
     * Use a white space to split sentence pieces.
     * When `split_by_whitespace` is false, we may have the piece containing
     * a white space in the middle. e.g., "in_the".
     * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return The splitByWhitespace. */ @java.lang.Override public boolean getSplitByWhitespace() { return splitByWhitespace_; } public static final int TREAT_WHITESPACE_AS_SUFFIX_FIELD_NUMBER = 24; private boolean treatWhitespaceAsSuffix_ = false; /** *
     * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
     * hello_. When `treat_whitespace_as_suffix` is true,
     * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
     * of sentence.
     * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return Whether the treatWhitespaceAsSuffix field is set. */ @java.lang.Override public boolean hasTreatWhitespaceAsSuffix() { return ((bitField0_ & 0x00080000) != 0); } /** *
     * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
     * hello_. When `treat_whitespace_as_suffix` is true,
     * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
     * of sentence.
     * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return The treatWhitespaceAsSuffix. */ @java.lang.Override public boolean getTreatWhitespaceAsSuffix() { return treatWhitespaceAsSuffix_; } public static final int ALLOW_WHITESPACE_ONLY_PIECES_FIELD_NUMBER = 26; private boolean allowWhitespaceOnlyPieces_ = false; /** *
     * Allows pieces that only contain whitespaces instead of appearing only as
     * prefix or suffix of other pieces.
     * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return Whether the allowWhitespaceOnlyPieces field is set. */ @java.lang.Override public boolean hasAllowWhitespaceOnlyPieces() { return ((bitField0_ & 0x00100000) != 0); } /** *
     * Allows pieces that only contain whitespaces instead of appearing only as
     * prefix or suffix of other pieces.
     * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return The allowWhitespaceOnlyPieces. */ @java.lang.Override public boolean getAllowWhitespaceOnlyPieces() { return allowWhitespaceOnlyPieces_; } public static final int SPLIT_DIGITS_FIELD_NUMBER = 25; private boolean splitDigits_ = false; /** *
     * Split all digits (0-9) into separate pieces.
     * 
* * optional bool split_digits = 25 [default = false]; * @return Whether the splitDigits field is set. */ @java.lang.Override public boolean hasSplitDigits() { return ((bitField0_ & 0x00200000) != 0); } /** *
     * Split all digits (0-9) into separate pieces.
     * 
* * optional bool split_digits = 25 [default = false]; * @return The splitDigits. */ @java.lang.Override public boolean getSplitDigits() { return splitDigits_; } public static final int CONTROL_SYMBOLS_FIELD_NUMBER = 30; @SuppressWarnings("serial") private com.google.protobuf.LazyStringArrayList controlSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @return A list containing the controlSymbols. */ public com.google.protobuf.ProtocolStringList getControlSymbolsList() { return controlSymbols_; } /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @return The count of controlSymbols. */ public int getControlSymbolsCount() { return controlSymbols_.size(); } /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @param index The index of the element to return. * @return The controlSymbols at the given index. */ public java.lang.String getControlSymbols(int index) { return controlSymbols_.get(index); } /** *
     */////////////////////////////////////////////////////////////////
     * Vocabulary management
     *
     * Defines control symbols used as an indicator to
     * change the behavior of the decoder. <s> and </s> are pre-defined.
     * We can use this field to encode various meta information,
     * including language indicator in multilingual model.
     * These symbols are not visible to users, but visible to
     * the decoder. Note that when the input sentence contains control symbols,
     * they are not treated as one token, but segmented into normal pieces.
     * Control symbols must be inserted independently from the segmentation.
     * 
* * repeated string control_symbols = 30; * @param index The index of the value to return. * @return The bytes of the controlSymbols at the given index. */ public com.google.protobuf.ByteString getControlSymbolsBytes(int index) { return controlSymbols_.getByteString(index); } public static final int USER_DEFINED_SYMBOLS_FIELD_NUMBER = 31; @SuppressWarnings("serial") private com.google.protobuf.LazyStringArrayList userDefinedSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @return A list containing the userDefinedSymbols. */ public com.google.protobuf.ProtocolStringList getUserDefinedSymbolsList() { return userDefinedSymbols_; } /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @return The count of userDefinedSymbols. */ public int getUserDefinedSymbolsCount() { return userDefinedSymbols_.size(); } /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @param index The index of the element to return. * @return The userDefinedSymbols at the given index. */ public java.lang.String getUserDefinedSymbols(int index) { return userDefinedSymbols_.get(index); } /** *
     * Defines user defined symbols.
     * These symbols are added with extremely high score
     * so they are always treated as one unique symbol in any context.
     * Typical usage of user_defined_symbols is placeholder for named entities.
     * 
* * repeated string user_defined_symbols = 31; * @param index The index of the value to return. * @return The bytes of the userDefinedSymbols at the given index. */ public com.google.protobuf.ByteString getUserDefinedSymbolsBytes(int index) { return userDefinedSymbols_.getByteString(index); } public static final int REQUIRED_CHARS_FIELD_NUMBER = 36; @SuppressWarnings("serial") private volatile java.lang.Object requiredChars_ = ""; /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return Whether the requiredChars field is set. */ @java.lang.Override public boolean hasRequiredChars() { return ((bitField0_ & 0x00400000) != 0); } /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return The requiredChars. */ @java.lang.Override public java.lang.String getRequiredChars() { java.lang.Object ref = requiredChars_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { requiredChars_ = s; } return s; } } /** *
     * Defines required characters. Each UTF8 character in this string is included
     * in the character set regardless of character_coverage value. Unlike
     * user_defined_symbols, these characters have scores based on the frequency
     * on input sentences, and the model can form subwords using characters
     * in this field.
     * 
* * optional string required_chars = 36; * @return The bytes for requiredChars. */ @java.lang.Override public com.google.protobuf.ByteString getRequiredCharsBytes() { java.lang.Object ref = requiredChars_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); requiredChars_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int BYTE_FALLBACK_FIELD_NUMBER = 35; private boolean byteFallback_ = false; /** *
     * Decomposes unknown pieces into UTF-8 bytes.
     * 
* * optional bool byte_fallback = 35 [default = false]; * @return Whether the byteFallback field is set. */ @java.lang.Override public boolean hasByteFallback() { return ((bitField0_ & 0x00800000) != 0); } /** *
     * Decomposes unknown pieces into UTF-8 bytes.
     * 
* * optional bool byte_fallback = 35 [default = false]; * @return The byteFallback. */ @java.lang.Override public boolean getByteFallback() { return byteFallback_; } public static final int VOCABULARY_OUTPUT_PIECE_SCORE_FIELD_NUMBER = 32; private boolean vocabularyOutputPieceScore_ = true; /** *
     * When creating the vocabulary file, defines whether or not to additionally
     * output the score for each piece.
     * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return Whether the vocabularyOutputPieceScore field is set. */ @java.lang.Override public boolean hasVocabularyOutputPieceScore() { return ((bitField0_ & 0x01000000) != 0); } /** *
     * When creating the vocabulary file, defines whether or not to additionally
     * output the score for each piece.
     * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return The vocabularyOutputPieceScore. */ @java.lang.Override public boolean getVocabularyOutputPieceScore() { return vocabularyOutputPieceScore_; } public static final int HARD_VOCAB_LIMIT_FIELD_NUMBER = 33; private boolean hardVocabLimit_ = true; /** *
     * `vocab_size` is treated as hard limit. Crash if
     * the model can not produce the vocab of size `vocab_size`,
     * When `hard_vocab_limit` is false, vocab_size is treated
     * as soft limit. Note that when model_type=char,
     * always assumes hard_vocab_limit = false.
     * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return Whether the hardVocabLimit field is set. */ @java.lang.Override public boolean hasHardVocabLimit() { return ((bitField0_ & 0x02000000) != 0); } /** *
     * `vocab_size` is treated as hard limit. Crash if
     * the model can not produce the vocab of size `vocab_size`,
     * When `hard_vocab_limit` is false, vocab_size is treated
     * as soft limit. Note that when model_type=char,
     * always assumes hard_vocab_limit = false.
     * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return The hardVocabLimit. */ @java.lang.Override public boolean getHardVocabLimit() { return hardVocabLimit_; } public static final int USE_ALL_VOCAB_FIELD_NUMBER = 34; private boolean useAllVocab_ = false; /** *
     * use all symbols for vocab extraction. This flag is valid
     * if model type is either CHAR or WORD
     * 
* * optional bool use_all_vocab = 34 [default = false]; * @return Whether the useAllVocab field is set. */ @java.lang.Override public boolean hasUseAllVocab() { return ((bitField0_ & 0x04000000) != 0); } /** *
     * use all symbols for vocab extraction. This flag is valid
     * if model type is either CHAR or WORD
     * 
* * optional bool use_all_vocab = 34 [default = false]; * @return The useAllVocab. */ @java.lang.Override public boolean getUseAllVocab() { return useAllVocab_; } public static final int UNK_ID_FIELD_NUMBER = 40; private int unkId_ = 0; /** *
     */////////////////////////////////////////////////////////////////
     * Reserved special meta tokens.
     * * -1 is not used.
     * * unk_id must not be -1.
     * Id must starts with 0 and be contigous.
     * 
* * optional int32 unk_id = 40 [default = 0]; * @return Whether the unkId field is set. */ @java.lang.Override public boolean hasUnkId() { return ((bitField0_ & 0x08000000) != 0); } /** *
     */////////////////////////////////////////////////////////////////
     * Reserved special meta tokens.
     * * -1 is not used.
     * * unk_id must not be -1.
     * Id must starts with 0 and be contigous.
     * 
* * optional int32 unk_id = 40 [default = 0]; * @return The unkId. */ @java.lang.Override public int getUnkId() { return unkId_; } public static final int BOS_ID_FIELD_NUMBER = 41; private int bosId_ = 1; /** *
     * <s>
     * 
* * optional int32 bos_id = 41 [default = 1]; * @return Whether the bosId field is set. */ @java.lang.Override public boolean hasBosId() { return ((bitField0_ & 0x10000000) != 0); } /** *
     * <s>
     * 
* * optional int32 bos_id = 41 [default = 1]; * @return The bosId. */ @java.lang.Override public int getBosId() { return bosId_; } public static final int EOS_ID_FIELD_NUMBER = 42; private int eosId_ = 2; /** *
     * </s>
     * 
* * optional int32 eos_id = 42 [default = 2]; * @return Whether the eosId field is set. */ @java.lang.Override public boolean hasEosId() { return ((bitField0_ & 0x20000000) != 0); } /** *
     * </s>
     * 
* * optional int32 eos_id = 42 [default = 2]; * @return The eosId. */ @java.lang.Override public int getEosId() { return eosId_; } public static final int PAD_ID_FIELD_NUMBER = 43; private int padId_ = -1; /** *
     * <pad> (padding)
     * 
* * optional int32 pad_id = 43 [default = -1]; * @return Whether the padId field is set. */ @java.lang.Override public boolean hasPadId() { return ((bitField0_ & 0x40000000) != 0); } /** *
     * <pad> (padding)
     * 
* * optional int32 pad_id = 43 [default = -1]; * @return The padId. */ @java.lang.Override public int getPadId() { return padId_; } public static final int UNK_PIECE_FIELD_NUMBER = 45; @SuppressWarnings("serial") private volatile java.lang.Object unkPiece_ = ""; /** * optional string unk_piece = 45 [default = "<unk>"]; * @return Whether the unkPiece field is set. */ @java.lang.Override public boolean hasUnkPiece() { return ((bitField0_ & 0x80000000) != 0); } /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The unkPiece. */ @java.lang.Override public java.lang.String getUnkPiece() { java.lang.Object ref = unkPiece_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { unkPiece_ = s; } return s; } } /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The bytes for unkPiece. */ @java.lang.Override public com.google.protobuf.ByteString getUnkPieceBytes() { java.lang.Object ref = unkPiece_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); unkPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int BOS_PIECE_FIELD_NUMBER = 46; @SuppressWarnings("serial") private volatile java.lang.Object bosPiece_ = ""; /** * optional string bos_piece = 46 [default = "<s>"]; * @return Whether the bosPiece field is set. */ @java.lang.Override public boolean hasBosPiece() { return ((bitField1_ & 0x00000001) != 0); } /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bosPiece. */ @java.lang.Override public java.lang.String getBosPiece() { java.lang.Object ref = bosPiece_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { bosPiece_ = s; } return s; } } /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bytes for bosPiece. */ @java.lang.Override public com.google.protobuf.ByteString getBosPieceBytes() { java.lang.Object ref = bosPiece_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); bosPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int EOS_PIECE_FIELD_NUMBER = 47; @SuppressWarnings("serial") private volatile java.lang.Object eosPiece_ = ""; /** * optional string eos_piece = 47 [default = "</s>"]; * @return Whether the eosPiece field is set. */ @java.lang.Override public boolean hasEosPiece() { return ((bitField1_ & 0x00000002) != 0); } /** * optional string eos_piece = 47 [default = "</s>"]; * @return The eosPiece. */ @java.lang.Override public java.lang.String getEosPiece() { java.lang.Object ref = eosPiece_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { eosPiece_ = s; } return s; } } /** * optional string eos_piece = 47 [default = "</s>"]; * @return The bytes for eosPiece. */ @java.lang.Override public com.google.protobuf.ByteString getEosPieceBytes() { java.lang.Object ref = eosPiece_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); eosPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int PAD_PIECE_FIELD_NUMBER = 48; @SuppressWarnings("serial") private volatile java.lang.Object padPiece_ = ""; /** * optional string pad_piece = 48 [default = "<pad>"]; * @return Whether the padPiece field is set. */ @java.lang.Override public boolean hasPadPiece() { return ((bitField1_ & 0x00000004) != 0); } /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The padPiece. */ @java.lang.Override public java.lang.String getPadPiece() { java.lang.Object ref = padPiece_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { padPiece_ = s; } return s; } } /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The bytes for padPiece. */ @java.lang.Override public com.google.protobuf.ByteString getPadPieceBytes() { java.lang.Object ref = padPiece_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); padPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int UNK_SURFACE_FIELD_NUMBER = 44; @SuppressWarnings("serial") private volatile java.lang.Object unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 "); /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return Whether the unkSurface field is set. */ @java.lang.Override public boolean hasUnkSurface() { return ((bitField1_ & 0x00000008) != 0); } /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The unkSurface. */ @java.lang.Override public java.lang.String getUnkSurface() { java.lang.Object ref = unkSurface_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { unkSurface_ = s; } return s; } } /** *
     * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
     * since this character can be useful both for user and
     * developer. We can easily figure out that <unk> is emitted.
     * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The bytes for unkSurface. */ @java.lang.Override public com.google.protobuf.ByteString getUnkSurfaceBytes() { java.lang.Object ref = unkSurface_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); unkSurface_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int TRAIN_EXTREMELY_LARGE_CORPUS_FIELD_NUMBER = 49; private boolean trainExtremelyLargeCorpus_ = false; /** *
     * Increase bit depth to allow unigram model training on large
     * (>10M sentences) corpora. A Side-effect of enabling this flag
     * is increased memory usage.
     * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return Whether the trainExtremelyLargeCorpus field is set. */ @java.lang.Override public boolean hasTrainExtremelyLargeCorpus() { return ((bitField1_ & 0x00000010) != 0); } /** *
     * Increase bit depth to allow unigram model training on large
     * (>10M sentences) corpora. A Side-effect of enabling this flag
     * is increased memory usage.
     * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return The trainExtremelyLargeCorpus. */ @java.lang.Override public boolean getTrainExtremelyLargeCorpus() { return trainExtremelyLargeCorpus_; } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; } memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { com.google.protobuf.GeneratedMessageV3 .ExtendableMessage.ExtensionWriter extensionWriter = newExtensionWriter(); for (int i = 0; i < input_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 1, input_.getRaw(i)); } if (((bitField0_ & 0x00000002) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 2, modelPrefix_); } if (((bitField0_ & 0x00000004) != 0)) { output.writeEnum(3, modelType_); } if (((bitField0_ & 0x00000008) != 0)) { output.writeInt32(4, vocabSize_); } for (int i = 0; i < acceptLanguage_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 5, acceptLanguage_.getRaw(i)); } if (((bitField0_ & 0x00000010) != 0)) { output.writeInt32(6, selfTestSampleSize_); } if (((bitField0_ & 0x00000001) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 7, inputFormat_); } if (((bitField0_ & 0x00000020) != 0)) { output.writeFloat(10, characterCoverage_); } if (((bitField0_ & 0x00000040) != 0)) { output.writeUInt64(11, inputSentenceSize_); } if (((bitField0_ & 0x00000100) != 0)) { output.writeInt32(12, miningSentenceSize_); } if (((bitField0_ & 0x00000200) != 0)) { output.writeInt32(13, trainingSentenceSize_); } if (((bitField0_ & 0x00000400) != 0)) { output.writeInt32(14, seedSentencepieceSize_); } if (((bitField0_ & 0x00000800) != 0)) { output.writeFloat(15, shrinkingFactor_); } if (((bitField0_ & 0x00002000) != 0)) { output.writeInt32(16, numThreads_); } if (((bitField0_ & 0x00004000) != 0)) { output.writeInt32(17, numSubIterations_); } if (((bitField0_ & 0x00001000) != 0)) { output.writeInt32(18, maxSentenceLength_); } if (((bitField0_ & 0x00000080) != 0)) { output.writeBool(19, shuffleInputSentence_); } if (((bitField0_ & 0x00008000) != 0)) { output.writeInt32(20, maxSentencepieceLength_); } if (((bitField0_ & 0x00010000) != 0)) { output.writeBool(21, splitByUnicodeScript_); } if (((bitField0_ & 0x00040000) != 0)) { output.writeBool(22, splitByWhitespace_); } if (((bitField0_ & 0x00020000) != 0)) { output.writeBool(23, splitByNumber_); } if (((bitField0_ & 0x00080000) != 0)) { output.writeBool(24, treatWhitespaceAsSuffix_); } if (((bitField0_ & 0x00200000) != 0)) { output.writeBool(25, splitDigits_); } if (((bitField0_ & 0x00100000) != 0)) { output.writeBool(26, allowWhitespaceOnlyPieces_); } for (int i = 0; i < controlSymbols_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 30, controlSymbols_.getRaw(i)); } for (int i = 0; i < userDefinedSymbols_.size(); i++) { com.google.protobuf.GeneratedMessageV3.writeString(output, 31, userDefinedSymbols_.getRaw(i)); } if (((bitField0_ & 0x01000000) != 0)) { output.writeBool(32, vocabularyOutputPieceScore_); } if (((bitField0_ & 0x02000000) != 0)) { output.writeBool(33, hardVocabLimit_); } if (((bitField0_ & 0x04000000) != 0)) { output.writeBool(34, useAllVocab_); } if (((bitField0_ & 0x00800000) != 0)) { output.writeBool(35, byteFallback_); } if (((bitField0_ & 0x00400000) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 36, requiredChars_); } if (((bitField0_ & 0x08000000) != 0)) { output.writeInt32(40, unkId_); } if (((bitField0_ & 0x10000000) != 0)) { output.writeInt32(41, bosId_); } if (((bitField0_ & 0x20000000) != 0)) { output.writeInt32(42, eosId_); } if (((bitField0_ & 0x40000000) != 0)) { output.writeInt32(43, padId_); } if (((bitField1_ & 0x00000008) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 44, unkSurface_); } if (((bitField0_ & 0x80000000) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 45, unkPiece_); } if (((bitField1_ & 0x00000001) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 46, bosPiece_); } if (((bitField1_ & 0x00000002) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 47, eosPiece_); } if (((bitField1_ & 0x00000004) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 48, padPiece_); } if (((bitField1_ & 0x00000010) != 0)) { output.writeBool(49, trainExtremelyLargeCorpus_); } extensionWriter.writeUntil(536870912, output); getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; { int dataSize = 0; for (int i = 0; i < input_.size(); i++) { dataSize += computeStringSizeNoTag(input_.getRaw(i)); } size += dataSize; size += 1 * getInputList().size(); } if (((bitField0_ & 0x00000002) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, modelPrefix_); } if (((bitField0_ & 0x00000004) != 0)) { size += com.google.protobuf.CodedOutputStream .computeEnumSize(3, modelType_); } if (((bitField0_ & 0x00000008) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(4, vocabSize_); } { int dataSize = 0; for (int i = 0; i < acceptLanguage_.size(); i++) { dataSize += computeStringSizeNoTag(acceptLanguage_.getRaw(i)); } size += dataSize; size += 1 * getAcceptLanguageList().size(); } if (((bitField0_ & 0x00000010) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(6, selfTestSampleSize_); } if (((bitField0_ & 0x00000001) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(7, inputFormat_); } if (((bitField0_ & 0x00000020) != 0)) { size += com.google.protobuf.CodedOutputStream .computeFloatSize(10, characterCoverage_); } if (((bitField0_ & 0x00000040) != 0)) { size += com.google.protobuf.CodedOutputStream .computeUInt64Size(11, inputSentenceSize_); } if (((bitField0_ & 0x00000100) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(12, miningSentenceSize_); } if (((bitField0_ & 0x00000200) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(13, trainingSentenceSize_); } if (((bitField0_ & 0x00000400) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(14, seedSentencepieceSize_); } if (((bitField0_ & 0x00000800) != 0)) { size += com.google.protobuf.CodedOutputStream .computeFloatSize(15, shrinkingFactor_); } if (((bitField0_ & 0x00002000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(16, numThreads_); } if (((bitField0_ & 0x00004000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(17, numSubIterations_); } if (((bitField0_ & 0x00001000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(18, maxSentenceLength_); } if (((bitField0_ & 0x00000080) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(19, shuffleInputSentence_); } if (((bitField0_ & 0x00008000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(20, maxSentencepieceLength_); } if (((bitField0_ & 0x00010000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(21, splitByUnicodeScript_); } if (((bitField0_ & 0x00040000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(22, splitByWhitespace_); } if (((bitField0_ & 0x00020000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(23, splitByNumber_); } if (((bitField0_ & 0x00080000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(24, treatWhitespaceAsSuffix_); } if (((bitField0_ & 0x00200000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(25, splitDigits_); } if (((bitField0_ & 0x00100000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(26, allowWhitespaceOnlyPieces_); } { int dataSize = 0; for (int i = 0; i < controlSymbols_.size(); i++) { dataSize += computeStringSizeNoTag(controlSymbols_.getRaw(i)); } size += dataSize; size += 2 * getControlSymbolsList().size(); } { int dataSize = 0; for (int i = 0; i < userDefinedSymbols_.size(); i++) { dataSize += computeStringSizeNoTag(userDefinedSymbols_.getRaw(i)); } size += dataSize; size += 2 * getUserDefinedSymbolsList().size(); } if (((bitField0_ & 0x01000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(32, vocabularyOutputPieceScore_); } if (((bitField0_ & 0x02000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(33, hardVocabLimit_); } if (((bitField0_ & 0x04000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(34, useAllVocab_); } if (((bitField0_ & 0x00800000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(35, byteFallback_); } if (((bitField0_ & 0x00400000) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(36, requiredChars_); } if (((bitField0_ & 0x08000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(40, unkId_); } if (((bitField0_ & 0x10000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(41, bosId_); } if (((bitField0_ & 0x20000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(42, eosId_); } if (((bitField0_ & 0x40000000) != 0)) { size += com.google.protobuf.CodedOutputStream .computeInt32Size(43, padId_); } if (((bitField1_ & 0x00000008) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(44, unkSurface_); } if (((bitField0_ & 0x80000000) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(45, unkPiece_); } if (((bitField1_ & 0x00000001) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(46, bosPiece_); } if (((bitField1_ & 0x00000002) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(47, eosPiece_); } if (((bitField1_ & 0x00000004) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(48, padPiece_); } if (((bitField1_ & 0x00000010) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(49, trainExtremelyLargeCorpus_); } size += extensionsSerializedSize(); size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.TrainerSpec)) { return super.equals(obj); } sentencepiece.SentencepieceModel.TrainerSpec other = (sentencepiece.SentencepieceModel.TrainerSpec) obj; if (!getInputList() .equals(other.getInputList())) return false; if (hasInputFormat() != other.hasInputFormat()) return false; if (hasInputFormat()) { if (!getInputFormat() .equals(other.getInputFormat())) return false; } if (hasModelPrefix() != other.hasModelPrefix()) return false; if (hasModelPrefix()) { if (!getModelPrefix() .equals(other.getModelPrefix())) return false; } if (hasModelType() != other.hasModelType()) return false; if (hasModelType()) { if (modelType_ != other.modelType_) return false; } if (hasVocabSize() != other.hasVocabSize()) return false; if (hasVocabSize()) { if (getVocabSize() != other.getVocabSize()) return false; } if (!getAcceptLanguageList() .equals(other.getAcceptLanguageList())) return false; if (hasSelfTestSampleSize() != other.hasSelfTestSampleSize()) return false; if (hasSelfTestSampleSize()) { if (getSelfTestSampleSize() != other.getSelfTestSampleSize()) return false; } if (hasCharacterCoverage() != other.hasCharacterCoverage()) return false; if (hasCharacterCoverage()) { if (java.lang.Float.floatToIntBits(getCharacterCoverage()) != java.lang.Float.floatToIntBits( other.getCharacterCoverage())) return false; } if (hasInputSentenceSize() != other.hasInputSentenceSize()) return false; if (hasInputSentenceSize()) { if (getInputSentenceSize() != other.getInputSentenceSize()) return false; } if (hasShuffleInputSentence() != other.hasShuffleInputSentence()) return false; if (hasShuffleInputSentence()) { if (getShuffleInputSentence() != other.getShuffleInputSentence()) return false; } if (hasMiningSentenceSize() != other.hasMiningSentenceSize()) return false; if (hasMiningSentenceSize()) { if (getMiningSentenceSize() != other.getMiningSentenceSize()) return false; } if (hasTrainingSentenceSize() != other.hasTrainingSentenceSize()) return false; if (hasTrainingSentenceSize()) { if (getTrainingSentenceSize() != other.getTrainingSentenceSize()) return false; } if (hasSeedSentencepieceSize() != other.hasSeedSentencepieceSize()) return false; if (hasSeedSentencepieceSize()) { if (getSeedSentencepieceSize() != other.getSeedSentencepieceSize()) return false; } if (hasShrinkingFactor() != other.hasShrinkingFactor()) return false; if (hasShrinkingFactor()) { if (java.lang.Float.floatToIntBits(getShrinkingFactor()) != java.lang.Float.floatToIntBits( other.getShrinkingFactor())) return false; } if (hasMaxSentenceLength() != other.hasMaxSentenceLength()) return false; if (hasMaxSentenceLength()) { if (getMaxSentenceLength() != other.getMaxSentenceLength()) return false; } if (hasNumThreads() != other.hasNumThreads()) return false; if (hasNumThreads()) { if (getNumThreads() != other.getNumThreads()) return false; } if (hasNumSubIterations() != other.hasNumSubIterations()) return false; if (hasNumSubIterations()) { if (getNumSubIterations() != other.getNumSubIterations()) return false; } if (hasMaxSentencepieceLength() != other.hasMaxSentencepieceLength()) return false; if (hasMaxSentencepieceLength()) { if (getMaxSentencepieceLength() != other.getMaxSentencepieceLength()) return false; } if (hasSplitByUnicodeScript() != other.hasSplitByUnicodeScript()) return false; if (hasSplitByUnicodeScript()) { if (getSplitByUnicodeScript() != other.getSplitByUnicodeScript()) return false; } if (hasSplitByNumber() != other.hasSplitByNumber()) return false; if (hasSplitByNumber()) { if (getSplitByNumber() != other.getSplitByNumber()) return false; } if (hasSplitByWhitespace() != other.hasSplitByWhitespace()) return false; if (hasSplitByWhitespace()) { if (getSplitByWhitespace() != other.getSplitByWhitespace()) return false; } if (hasTreatWhitespaceAsSuffix() != other.hasTreatWhitespaceAsSuffix()) return false; if (hasTreatWhitespaceAsSuffix()) { if (getTreatWhitespaceAsSuffix() != other.getTreatWhitespaceAsSuffix()) return false; } if (hasAllowWhitespaceOnlyPieces() != other.hasAllowWhitespaceOnlyPieces()) return false; if (hasAllowWhitespaceOnlyPieces()) { if (getAllowWhitespaceOnlyPieces() != other.getAllowWhitespaceOnlyPieces()) return false; } if (hasSplitDigits() != other.hasSplitDigits()) return false; if (hasSplitDigits()) { if (getSplitDigits() != other.getSplitDigits()) return false; } if (!getControlSymbolsList() .equals(other.getControlSymbolsList())) return false; if (!getUserDefinedSymbolsList() .equals(other.getUserDefinedSymbolsList())) return false; if (hasRequiredChars() != other.hasRequiredChars()) return false; if (hasRequiredChars()) { if (!getRequiredChars() .equals(other.getRequiredChars())) return false; } if (hasByteFallback() != other.hasByteFallback()) return false; if (hasByteFallback()) { if (getByteFallback() != other.getByteFallback()) return false; } if (hasVocabularyOutputPieceScore() != other.hasVocabularyOutputPieceScore()) return false; if (hasVocabularyOutputPieceScore()) { if (getVocabularyOutputPieceScore() != other.getVocabularyOutputPieceScore()) return false; } if (hasHardVocabLimit() != other.hasHardVocabLimit()) return false; if (hasHardVocabLimit()) { if (getHardVocabLimit() != other.getHardVocabLimit()) return false; } if (hasUseAllVocab() != other.hasUseAllVocab()) return false; if (hasUseAllVocab()) { if (getUseAllVocab() != other.getUseAllVocab()) return false; } if (hasUnkId() != other.hasUnkId()) return false; if (hasUnkId()) { if (getUnkId() != other.getUnkId()) return false; } if (hasBosId() != other.hasBosId()) return false; if (hasBosId()) { if (getBosId() != other.getBosId()) return false; } if (hasEosId() != other.hasEosId()) return false; if (hasEosId()) { if (getEosId() != other.getEosId()) return false; } if (hasPadId() != other.hasPadId()) return false; if (hasPadId()) { if (getPadId() != other.getPadId()) return false; } if (hasUnkPiece() != other.hasUnkPiece()) return false; if (hasUnkPiece()) { if (!getUnkPiece() .equals(other.getUnkPiece())) return false; } if (hasBosPiece() != other.hasBosPiece()) return false; if (hasBosPiece()) { if (!getBosPiece() .equals(other.getBosPiece())) return false; } if (hasEosPiece() != other.hasEosPiece()) return false; if (hasEosPiece()) { if (!getEosPiece() .equals(other.getEosPiece())) return false; } if (hasPadPiece() != other.hasPadPiece()) return false; if (hasPadPiece()) { if (!getPadPiece() .equals(other.getPadPiece())) return false; } if (hasUnkSurface() != other.hasUnkSurface()) return false; if (hasUnkSurface()) { if (!getUnkSurface() .equals(other.getUnkSurface())) return false; } if (hasTrainExtremelyLargeCorpus() != other.hasTrainExtremelyLargeCorpus()) return false; if (hasTrainExtremelyLargeCorpus()) { if (getTrainExtremelyLargeCorpus() != other.getTrainExtremelyLargeCorpus()) return false; } if (!getUnknownFields().equals(other.getUnknownFields())) return false; if (!getExtensionFields().equals(other.getExtensionFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (getInputCount() > 0) { hash = (37 * hash) + INPUT_FIELD_NUMBER; hash = (53 * hash) + getInputList().hashCode(); } if (hasInputFormat()) { hash = (37 * hash) + INPUT_FORMAT_FIELD_NUMBER; hash = (53 * hash) + getInputFormat().hashCode(); } if (hasModelPrefix()) { hash = (37 * hash) + MODEL_PREFIX_FIELD_NUMBER; hash = (53 * hash) + getModelPrefix().hashCode(); } if (hasModelType()) { hash = (37 * hash) + MODEL_TYPE_FIELD_NUMBER; hash = (53 * hash) + modelType_; } if (hasVocabSize()) { hash = (37 * hash) + VOCAB_SIZE_FIELD_NUMBER; hash = (53 * hash) + getVocabSize(); } if (getAcceptLanguageCount() > 0) { hash = (37 * hash) + ACCEPT_LANGUAGE_FIELD_NUMBER; hash = (53 * hash) + getAcceptLanguageList().hashCode(); } if (hasSelfTestSampleSize()) { hash = (37 * hash) + SELF_TEST_SAMPLE_SIZE_FIELD_NUMBER; hash = (53 * hash) + getSelfTestSampleSize(); } if (hasCharacterCoverage()) { hash = (37 * hash) + CHARACTER_COVERAGE_FIELD_NUMBER; hash = (53 * hash) + java.lang.Float.floatToIntBits( getCharacterCoverage()); } if (hasInputSentenceSize()) { hash = (37 * hash) + INPUT_SENTENCE_SIZE_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashLong( getInputSentenceSize()); } if (hasShuffleInputSentence()) { hash = (37 * hash) + SHUFFLE_INPUT_SENTENCE_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getShuffleInputSentence()); } if (hasMiningSentenceSize()) { hash = (37 * hash) + MINING_SENTENCE_SIZE_FIELD_NUMBER; hash = (53 * hash) + getMiningSentenceSize(); } if (hasTrainingSentenceSize()) { hash = (37 * hash) + TRAINING_SENTENCE_SIZE_FIELD_NUMBER; hash = (53 * hash) + getTrainingSentenceSize(); } if (hasSeedSentencepieceSize()) { hash = (37 * hash) + SEED_SENTENCEPIECE_SIZE_FIELD_NUMBER; hash = (53 * hash) + getSeedSentencepieceSize(); } if (hasShrinkingFactor()) { hash = (37 * hash) + SHRINKING_FACTOR_FIELD_NUMBER; hash = (53 * hash) + java.lang.Float.floatToIntBits( getShrinkingFactor()); } if (hasMaxSentenceLength()) { hash = (37 * hash) + MAX_SENTENCE_LENGTH_FIELD_NUMBER; hash = (53 * hash) + getMaxSentenceLength(); } if (hasNumThreads()) { hash = (37 * hash) + NUM_THREADS_FIELD_NUMBER; hash = (53 * hash) + getNumThreads(); } if (hasNumSubIterations()) { hash = (37 * hash) + NUM_SUB_ITERATIONS_FIELD_NUMBER; hash = (53 * hash) + getNumSubIterations(); } if (hasMaxSentencepieceLength()) { hash = (37 * hash) + MAX_SENTENCEPIECE_LENGTH_FIELD_NUMBER; hash = (53 * hash) + getMaxSentencepieceLength(); } if (hasSplitByUnicodeScript()) { hash = (37 * hash) + SPLIT_BY_UNICODE_SCRIPT_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getSplitByUnicodeScript()); } if (hasSplitByNumber()) { hash = (37 * hash) + SPLIT_BY_NUMBER_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getSplitByNumber()); } if (hasSplitByWhitespace()) { hash = (37 * hash) + SPLIT_BY_WHITESPACE_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getSplitByWhitespace()); } if (hasTreatWhitespaceAsSuffix()) { hash = (37 * hash) + TREAT_WHITESPACE_AS_SUFFIX_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getTreatWhitespaceAsSuffix()); } if (hasAllowWhitespaceOnlyPieces()) { hash = (37 * hash) + ALLOW_WHITESPACE_ONLY_PIECES_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getAllowWhitespaceOnlyPieces()); } if (hasSplitDigits()) { hash = (37 * hash) + SPLIT_DIGITS_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getSplitDigits()); } if (getControlSymbolsCount() > 0) { hash = (37 * hash) + CONTROL_SYMBOLS_FIELD_NUMBER; hash = (53 * hash) + getControlSymbolsList().hashCode(); } if (getUserDefinedSymbolsCount() > 0) { hash = (37 * hash) + USER_DEFINED_SYMBOLS_FIELD_NUMBER; hash = (53 * hash) + getUserDefinedSymbolsList().hashCode(); } if (hasRequiredChars()) { hash = (37 * hash) + REQUIRED_CHARS_FIELD_NUMBER; hash = (53 * hash) + getRequiredChars().hashCode(); } if (hasByteFallback()) { hash = (37 * hash) + BYTE_FALLBACK_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getByteFallback()); } if (hasVocabularyOutputPieceScore()) { hash = (37 * hash) + VOCABULARY_OUTPUT_PIECE_SCORE_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getVocabularyOutputPieceScore()); } if (hasHardVocabLimit()) { hash = (37 * hash) + HARD_VOCAB_LIMIT_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getHardVocabLimit()); } if (hasUseAllVocab()) { hash = (37 * hash) + USE_ALL_VOCAB_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getUseAllVocab()); } if (hasUnkId()) { hash = (37 * hash) + UNK_ID_FIELD_NUMBER; hash = (53 * hash) + getUnkId(); } if (hasBosId()) { hash = (37 * hash) + BOS_ID_FIELD_NUMBER; hash = (53 * hash) + getBosId(); } if (hasEosId()) { hash = (37 * hash) + EOS_ID_FIELD_NUMBER; hash = (53 * hash) + getEosId(); } if (hasPadId()) { hash = (37 * hash) + PAD_ID_FIELD_NUMBER; hash = (53 * hash) + getPadId(); } if (hasUnkPiece()) { hash = (37 * hash) + UNK_PIECE_FIELD_NUMBER; hash = (53 * hash) + getUnkPiece().hashCode(); } if (hasBosPiece()) { hash = (37 * hash) + BOS_PIECE_FIELD_NUMBER; hash = (53 * hash) + getBosPiece().hashCode(); } if (hasEosPiece()) { hash = (37 * hash) + EOS_PIECE_FIELD_NUMBER; hash = (53 * hash) + getEosPiece().hashCode(); } if (hasPadPiece()) { hash = (37 * hash) + PAD_PIECE_FIELD_NUMBER; hash = (53 * hash) + getPadPiece().hashCode(); } if (hasUnkSurface()) { hash = (37 * hash) + UNK_SURFACE_FIELD_NUMBER; hash = (53 * hash) + getUnkSurface().hashCode(); } if (hasTrainExtremelyLargeCorpus()) { hash = (37 * hash) + TRAIN_EXTREMELY_LARGE_CORPUS_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getTrainExtremelyLargeCorpus()); } hash = hashFields(hash, getExtensionFields()); hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.TrainerSpec parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.TrainerSpec parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.TrainerSpec prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** *
     * TrainerSpec encodes a various parameters for SentencePiece training.
     * 
* * Protobuf type {@code sentencepiece.TrainerSpec} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder< sentencepiece.SentencepieceModel.TrainerSpec, Builder> implements // @@protoc_insertion_point(builder_implements:sentencepiece.TrainerSpec) sentencepiece.SentencepieceModel.TrainerSpecOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.TrainerSpec.class, sentencepiece.SentencepieceModel.TrainerSpec.Builder.class); } // Construct using sentencepiece.SentencepieceModel.TrainerSpec.newBuilder() private Builder() { } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; bitField1_ = 0; input_ = com.google.protobuf.LazyStringArrayList.emptyList(); inputFormat_ = ""; modelPrefix_ = ""; modelType_ = 1; vocabSize_ = 8000; acceptLanguage_ = com.google.protobuf.LazyStringArrayList.emptyList(); selfTestSampleSize_ = 0; characterCoverage_ = 0.9995F; inputSentenceSize_ = 0L; shuffleInputSentence_ = true; miningSentenceSize_ = 0; trainingSentenceSize_ = 0; seedSentencepieceSize_ = 1000000; shrinkingFactor_ = 0.75F; maxSentenceLength_ = 4192; numThreads_ = 16; numSubIterations_ = 2; maxSentencepieceLength_ = 16; splitByUnicodeScript_ = true; splitByNumber_ = true; splitByWhitespace_ = true; treatWhitespaceAsSuffix_ = false; allowWhitespaceOnlyPieces_ = false; splitDigits_ = false; controlSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); userDefinedSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); requiredChars_ = ""; byteFallback_ = false; vocabularyOutputPieceScore_ = true; hardVocabLimit_ = true; useAllVocab_ = false; unkId_ = 0; bosId_ = 1; eosId_ = 2; padId_ = -1; unkPiece_ = ""; bosPiece_ = ""; eosPiece_ = ""; padPiece_ = ""; unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 "); trainExtremelyLargeCorpus_ = false; return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec build() { sentencepiece.SentencepieceModel.TrainerSpec result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec buildPartial() { sentencepiece.SentencepieceModel.TrainerSpec result = new sentencepiece.SentencepieceModel.TrainerSpec(this); if (bitField0_ != 0) { buildPartial0(result); } if (bitField1_ != 0) { buildPartial1(result); } onBuilt(); return result; } private void buildPartial0(sentencepiece.SentencepieceModel.TrainerSpec result) { int from_bitField0_ = bitField0_; if (((from_bitField0_ & 0x00000001) != 0)) { input_.makeImmutable(); result.input_ = input_; } int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000002) != 0)) { result.inputFormat_ = inputFormat_; to_bitField0_ |= 0x00000001; } if (((from_bitField0_ & 0x00000004) != 0)) { result.modelPrefix_ = modelPrefix_; to_bitField0_ |= 0x00000002; } if (((from_bitField0_ & 0x00000008) != 0)) { result.modelType_ = modelType_; to_bitField0_ |= 0x00000004; } if (((from_bitField0_ & 0x00000010) != 0)) { result.vocabSize_ = vocabSize_; to_bitField0_ |= 0x00000008; } if (((from_bitField0_ & 0x00000020) != 0)) { acceptLanguage_.makeImmutable(); result.acceptLanguage_ = acceptLanguage_; } if (((from_bitField0_ & 0x00000040) != 0)) { result.selfTestSampleSize_ = selfTestSampleSize_; to_bitField0_ |= 0x00000010; } if (((from_bitField0_ & 0x00000080) != 0)) { result.characterCoverage_ = characterCoverage_; to_bitField0_ |= 0x00000020; } if (((from_bitField0_ & 0x00000100) != 0)) { result.inputSentenceSize_ = inputSentenceSize_; to_bitField0_ |= 0x00000040; } if (((from_bitField0_ & 0x00000200) != 0)) { result.shuffleInputSentence_ = shuffleInputSentence_; to_bitField0_ |= 0x00000080; } if (((from_bitField0_ & 0x00000400) != 0)) { result.miningSentenceSize_ = miningSentenceSize_; to_bitField0_ |= 0x00000100; } if (((from_bitField0_ & 0x00000800) != 0)) { result.trainingSentenceSize_ = trainingSentenceSize_; to_bitField0_ |= 0x00000200; } if (((from_bitField0_ & 0x00001000) != 0)) { result.seedSentencepieceSize_ = seedSentencepieceSize_; to_bitField0_ |= 0x00000400; } if (((from_bitField0_ & 0x00002000) != 0)) { result.shrinkingFactor_ = shrinkingFactor_; to_bitField0_ |= 0x00000800; } if (((from_bitField0_ & 0x00004000) != 0)) { result.maxSentenceLength_ = maxSentenceLength_; to_bitField0_ |= 0x00001000; } if (((from_bitField0_ & 0x00008000) != 0)) { result.numThreads_ = numThreads_; to_bitField0_ |= 0x00002000; } if (((from_bitField0_ & 0x00010000) != 0)) { result.numSubIterations_ = numSubIterations_; to_bitField0_ |= 0x00004000; } if (((from_bitField0_ & 0x00020000) != 0)) { result.maxSentencepieceLength_ = maxSentencepieceLength_; to_bitField0_ |= 0x00008000; } if (((from_bitField0_ & 0x00040000) != 0)) { result.splitByUnicodeScript_ = splitByUnicodeScript_; to_bitField0_ |= 0x00010000; } if (((from_bitField0_ & 0x00080000) != 0)) { result.splitByNumber_ = splitByNumber_; to_bitField0_ |= 0x00020000; } if (((from_bitField0_ & 0x00100000) != 0)) { result.splitByWhitespace_ = splitByWhitespace_; to_bitField0_ |= 0x00040000; } if (((from_bitField0_ & 0x00200000) != 0)) { result.treatWhitespaceAsSuffix_ = treatWhitespaceAsSuffix_; to_bitField0_ |= 0x00080000; } if (((from_bitField0_ & 0x00400000) != 0)) { result.allowWhitespaceOnlyPieces_ = allowWhitespaceOnlyPieces_; to_bitField0_ |= 0x00100000; } if (((from_bitField0_ & 0x00800000) != 0)) { result.splitDigits_ = splitDigits_; to_bitField0_ |= 0x00200000; } if (((from_bitField0_ & 0x01000000) != 0)) { controlSymbols_.makeImmutable(); result.controlSymbols_ = controlSymbols_; } if (((from_bitField0_ & 0x02000000) != 0)) { userDefinedSymbols_.makeImmutable(); result.userDefinedSymbols_ = userDefinedSymbols_; } if (((from_bitField0_ & 0x04000000) != 0)) { result.requiredChars_ = requiredChars_; to_bitField0_ |= 0x00400000; } if (((from_bitField0_ & 0x08000000) != 0)) { result.byteFallback_ = byteFallback_; to_bitField0_ |= 0x00800000; } if (((from_bitField0_ & 0x10000000) != 0)) { result.vocabularyOutputPieceScore_ = vocabularyOutputPieceScore_; to_bitField0_ |= 0x01000000; } if (((from_bitField0_ & 0x20000000) != 0)) { result.hardVocabLimit_ = hardVocabLimit_; to_bitField0_ |= 0x02000000; } if (((from_bitField0_ & 0x40000000) != 0)) { result.useAllVocab_ = useAllVocab_; to_bitField0_ |= 0x04000000; } if (((from_bitField0_ & 0x80000000) != 0)) { result.unkId_ = unkId_; to_bitField0_ |= 0x08000000; } result.bitField0_ |= to_bitField0_; } private void buildPartial1(sentencepiece.SentencepieceModel.TrainerSpec result) { int from_bitField1_ = bitField1_; int to_bitField0_ = 0; if (((from_bitField1_ & 0x00000001) != 0)) { result.bosId_ = bosId_; to_bitField0_ |= 0x10000000; } if (((from_bitField1_ & 0x00000002) != 0)) { result.eosId_ = eosId_; to_bitField0_ |= 0x20000000; } if (((from_bitField1_ & 0x00000004) != 0)) { result.padId_ = padId_; to_bitField0_ |= 0x40000000; } if (((from_bitField1_ & 0x00000008) != 0)) { result.unkPiece_ = unkPiece_; to_bitField0_ |= 0x80000000; } int to_bitField1_ = 0; if (((from_bitField1_ & 0x00000010) != 0)) { result.bosPiece_ = bosPiece_; to_bitField1_ |= 0x00000001; } if (((from_bitField1_ & 0x00000020) != 0)) { result.eosPiece_ = eosPiece_; to_bitField1_ |= 0x00000002; } if (((from_bitField1_ & 0x00000040) != 0)) { result.padPiece_ = padPiece_; to_bitField1_ |= 0x00000004; } if (((from_bitField1_ & 0x00000080) != 0)) { result.unkSurface_ = unkSurface_; to_bitField1_ |= 0x00000008; } if (((from_bitField1_ & 0x00000100) != 0)) { result.trainExtremelyLargeCorpus_ = trainExtremelyLargeCorpus_; to_bitField1_ |= 0x00000010; } result.bitField0_ |= to_bitField0_; result.bitField1_ |= to_bitField1_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.TrainerSpec, Type> extension, Type value) { return super.setExtension(extension, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.TrainerSpec, java.util.List> extension, int index, Type value) { return super.setExtension(extension, index, value); } @java.lang.Override public Builder addExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.TrainerSpec, java.util.List> extension, Type value) { return super.addExtension(extension, value); } @java.lang.Override public Builder clearExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.TrainerSpec, T> extension) { return super.clearExtension(extension); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.TrainerSpec) { return mergeFrom((sentencepiece.SentencepieceModel.TrainerSpec)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.TrainerSpec other) { if (other == sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance()) return this; if (!other.input_.isEmpty()) { if (input_.isEmpty()) { input_ = other.input_; bitField0_ |= 0x00000001; } else { ensureInputIsMutable(); input_.addAll(other.input_); } onChanged(); } if (other.hasInputFormat()) { inputFormat_ = other.inputFormat_; bitField0_ |= 0x00000002; onChanged(); } if (other.hasModelPrefix()) { modelPrefix_ = other.modelPrefix_; bitField0_ |= 0x00000004; onChanged(); } if (other.hasModelType()) { setModelType(other.getModelType()); } if (other.hasVocabSize()) { setVocabSize(other.getVocabSize()); } if (!other.acceptLanguage_.isEmpty()) { if (acceptLanguage_.isEmpty()) { acceptLanguage_ = other.acceptLanguage_; bitField0_ |= 0x00000020; } else { ensureAcceptLanguageIsMutable(); acceptLanguage_.addAll(other.acceptLanguage_); } onChanged(); } if (other.hasSelfTestSampleSize()) { setSelfTestSampleSize(other.getSelfTestSampleSize()); } if (other.hasCharacterCoverage()) { setCharacterCoverage(other.getCharacterCoverage()); } if (other.hasInputSentenceSize()) { setInputSentenceSize(other.getInputSentenceSize()); } if (other.hasShuffleInputSentence()) { setShuffleInputSentence(other.getShuffleInputSentence()); } if (other.hasMiningSentenceSize()) { setMiningSentenceSize(other.getMiningSentenceSize()); } if (other.hasTrainingSentenceSize()) { setTrainingSentenceSize(other.getTrainingSentenceSize()); } if (other.hasSeedSentencepieceSize()) { setSeedSentencepieceSize(other.getSeedSentencepieceSize()); } if (other.hasShrinkingFactor()) { setShrinkingFactor(other.getShrinkingFactor()); } if (other.hasMaxSentenceLength()) { setMaxSentenceLength(other.getMaxSentenceLength()); } if (other.hasNumThreads()) { setNumThreads(other.getNumThreads()); } if (other.hasNumSubIterations()) { setNumSubIterations(other.getNumSubIterations()); } if (other.hasMaxSentencepieceLength()) { setMaxSentencepieceLength(other.getMaxSentencepieceLength()); } if (other.hasSplitByUnicodeScript()) { setSplitByUnicodeScript(other.getSplitByUnicodeScript()); } if (other.hasSplitByNumber()) { setSplitByNumber(other.getSplitByNumber()); } if (other.hasSplitByWhitespace()) { setSplitByWhitespace(other.getSplitByWhitespace()); } if (other.hasTreatWhitespaceAsSuffix()) { setTreatWhitespaceAsSuffix(other.getTreatWhitespaceAsSuffix()); } if (other.hasAllowWhitespaceOnlyPieces()) { setAllowWhitespaceOnlyPieces(other.getAllowWhitespaceOnlyPieces()); } if (other.hasSplitDigits()) { setSplitDigits(other.getSplitDigits()); } if (!other.controlSymbols_.isEmpty()) { if (controlSymbols_.isEmpty()) { controlSymbols_ = other.controlSymbols_; bitField0_ |= 0x01000000; } else { ensureControlSymbolsIsMutable(); controlSymbols_.addAll(other.controlSymbols_); } onChanged(); } if (!other.userDefinedSymbols_.isEmpty()) { if (userDefinedSymbols_.isEmpty()) { userDefinedSymbols_ = other.userDefinedSymbols_; bitField0_ |= 0x02000000; } else { ensureUserDefinedSymbolsIsMutable(); userDefinedSymbols_.addAll(other.userDefinedSymbols_); } onChanged(); } if (other.hasRequiredChars()) { requiredChars_ = other.requiredChars_; bitField0_ |= 0x04000000; onChanged(); } if (other.hasByteFallback()) { setByteFallback(other.getByteFallback()); } if (other.hasVocabularyOutputPieceScore()) { setVocabularyOutputPieceScore(other.getVocabularyOutputPieceScore()); } if (other.hasHardVocabLimit()) { setHardVocabLimit(other.getHardVocabLimit()); } if (other.hasUseAllVocab()) { setUseAllVocab(other.getUseAllVocab()); } if (other.hasUnkId()) { setUnkId(other.getUnkId()); } if (other.hasBosId()) { setBosId(other.getBosId()); } if (other.hasEosId()) { setEosId(other.getEosId()); } if (other.hasPadId()) { setPadId(other.getPadId()); } if (other.hasUnkPiece()) { unkPiece_ = other.unkPiece_; bitField1_ |= 0x00000008; onChanged(); } if (other.hasBosPiece()) { bosPiece_ = other.bosPiece_; bitField1_ |= 0x00000010; onChanged(); } if (other.hasEosPiece()) { eosPiece_ = other.eosPiece_; bitField1_ |= 0x00000020; onChanged(); } if (other.hasPadPiece()) { padPiece_ = other.padPiece_; bitField1_ |= 0x00000040; onChanged(); } if (other.hasUnkSurface()) { unkSurface_ = other.unkSurface_; bitField1_ |= 0x00000080; onChanged(); } if (other.hasTrainExtremelyLargeCorpus()) { setTrainExtremelyLargeCorpus(other.getTrainExtremelyLargeCorpus()); } this.mergeExtensionFields(other); this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { if (!extensionsAreInitialized()) { return false; } return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { com.google.protobuf.ByteString bs = input.readBytes(); ensureInputIsMutable(); input_.add(bs); break; } // case 10 case 18: { modelPrefix_ = input.readBytes(); bitField0_ |= 0x00000004; break; } // case 18 case 24: { int tmpRaw = input.readEnum(); sentencepiece.SentencepieceModel.TrainerSpec.ModelType tmpValue = sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(tmpRaw); if (tmpValue == null) { mergeUnknownVarintField(3, tmpRaw); } else { modelType_ = tmpRaw; bitField0_ |= 0x00000008; } break; } // case 24 case 32: { vocabSize_ = input.readInt32(); bitField0_ |= 0x00000010; break; } // case 32 case 42: { com.google.protobuf.ByteString bs = input.readBytes(); ensureAcceptLanguageIsMutable(); acceptLanguage_.add(bs); break; } // case 42 case 48: { selfTestSampleSize_ = input.readInt32(); bitField0_ |= 0x00000040; break; } // case 48 case 58: { inputFormat_ = input.readBytes(); bitField0_ |= 0x00000002; break; } // case 58 case 85: { characterCoverage_ = input.readFloat(); bitField0_ |= 0x00000080; break; } // case 85 case 88: { inputSentenceSize_ = input.readUInt64(); bitField0_ |= 0x00000100; break; } // case 88 case 96: { miningSentenceSize_ = input.readInt32(); bitField0_ |= 0x00000400; break; } // case 96 case 104: { trainingSentenceSize_ = input.readInt32(); bitField0_ |= 0x00000800; break; } // case 104 case 112: { seedSentencepieceSize_ = input.readInt32(); bitField0_ |= 0x00001000; break; } // case 112 case 125: { shrinkingFactor_ = input.readFloat(); bitField0_ |= 0x00002000; break; } // case 125 case 128: { numThreads_ = input.readInt32(); bitField0_ |= 0x00008000; break; } // case 128 case 136: { numSubIterations_ = input.readInt32(); bitField0_ |= 0x00010000; break; } // case 136 case 144: { maxSentenceLength_ = input.readInt32(); bitField0_ |= 0x00004000; break; } // case 144 case 152: { shuffleInputSentence_ = input.readBool(); bitField0_ |= 0x00000200; break; } // case 152 case 160: { maxSentencepieceLength_ = input.readInt32(); bitField0_ |= 0x00020000; break; } // case 160 case 168: { splitByUnicodeScript_ = input.readBool(); bitField0_ |= 0x00040000; break; } // case 168 case 176: { splitByWhitespace_ = input.readBool(); bitField0_ |= 0x00100000; break; } // case 176 case 184: { splitByNumber_ = input.readBool(); bitField0_ |= 0x00080000; break; } // case 184 case 192: { treatWhitespaceAsSuffix_ = input.readBool(); bitField0_ |= 0x00200000; break; } // case 192 case 200: { splitDigits_ = input.readBool(); bitField0_ |= 0x00800000; break; } // case 200 case 208: { allowWhitespaceOnlyPieces_ = input.readBool(); bitField0_ |= 0x00400000; break; } // case 208 case 242: { com.google.protobuf.ByteString bs = input.readBytes(); ensureControlSymbolsIsMutable(); controlSymbols_.add(bs); break; } // case 242 case 250: { com.google.protobuf.ByteString bs = input.readBytes(); ensureUserDefinedSymbolsIsMutable(); userDefinedSymbols_.add(bs); break; } // case 250 case 256: { vocabularyOutputPieceScore_ = input.readBool(); bitField0_ |= 0x10000000; break; } // case 256 case 264: { hardVocabLimit_ = input.readBool(); bitField0_ |= 0x20000000; break; } // case 264 case 272: { useAllVocab_ = input.readBool(); bitField0_ |= 0x40000000; break; } // case 272 case 280: { byteFallback_ = input.readBool(); bitField0_ |= 0x08000000; break; } // case 280 case 290: { requiredChars_ = input.readBytes(); bitField0_ |= 0x04000000; break; } // case 290 case 320: { unkId_ = input.readInt32(); bitField0_ |= 0x80000000; break; } // case 320 case 328: { bosId_ = input.readInt32(); bitField1_ |= 0x00000001; break; } // case 328 case 336: { eosId_ = input.readInt32(); bitField1_ |= 0x00000002; break; } // case 336 case 344: { padId_ = input.readInt32(); bitField1_ |= 0x00000004; break; } // case 344 case 354: { unkSurface_ = input.readBytes(); bitField1_ |= 0x00000080; break; } // case 354 case 362: { unkPiece_ = input.readBytes(); bitField1_ |= 0x00000008; break; } // case 362 case 370: { bosPiece_ = input.readBytes(); bitField1_ |= 0x00000010; break; } // case 370 case 378: { eosPiece_ = input.readBytes(); bitField1_ |= 0x00000020; break; } // case 378 case 386: { padPiece_ = input.readBytes(); bitField1_ |= 0x00000040; break; } // case 386 case 392: { trainExtremelyLargeCorpus_ = input.readBool(); bitField1_ |= 0x00000100; break; } // case 392 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private int bitField1_; private com.google.protobuf.LazyStringArrayList input_ = com.google.protobuf.LazyStringArrayList.emptyList(); private void ensureInputIsMutable() { if (!input_.isModifiable()) { input_ = new com.google.protobuf.LazyStringArrayList(input_); } bitField0_ |= 0x00000001; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @return A list containing the input. */ public com.google.protobuf.ProtocolStringList getInputList() { input_.makeImmutable(); return input_; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @return The count of input. */ public int getInputCount() { return input_.size(); } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param index The index of the element to return. * @return The input at the given index. */ public java.lang.String getInput(int index) { return input_.get(index); } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param index The index of the value to return. * @return The bytes of the input at the given index. */ public com.google.protobuf.ByteString getInputBytes(int index) { return input_.getByteString(index); } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param index The index to set the value at. * @param value The input to set. * @return This builder for chaining. */ public Builder setInput( int index, java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureInputIsMutable(); input_.set(index, value); bitField0_ |= 0x00000001; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param value The input to add. * @return This builder for chaining. */ public Builder addInput( java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureInputIsMutable(); input_.add(value); bitField0_ |= 0x00000001; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param values The input to add. * @return This builder for chaining. */ public Builder addAllInput( java.lang.Iterable values) { ensureInputIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, input_); bitField0_ |= 0x00000001; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @return This builder for chaining. */ public Builder clearInput() { input_ = com.google.protobuf.LazyStringArrayList.emptyList(); bitField0_ = (bitField0_ & ~0x00000001);; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * General parameters
       *
       * Input corpus files.
       *  Trainer accepts the following two formats:
       *  A) Monolingual: plain text, one sentence per line.
       *  B) Bilingual:   TSV, source sentence <tab> target sentence
       *  When bilingual data is passed, shared vocabulary model is built.
       *  Note that the input file must be raw corpus, not a preprocessed corpus.
       *  Trainer only loads the first `input_sentence_size` sentences specified
       *  with this parameter.
       * 
* * repeated string input = 1; * @param value The bytes of the input to add. * @return This builder for chaining. */ public Builder addInputBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } ensureInputIsMutable(); input_.add(value); bitField0_ |= 0x00000001; onChanged(); return this; } private java.lang.Object inputFormat_ = ""; /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @return Whether the inputFormat field is set. */ public boolean hasInputFormat() { return ((bitField0_ & 0x00000002) != 0); } /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @return The inputFormat. */ public java.lang.String getInputFormat() { java.lang.Object ref = inputFormat_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { inputFormat_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @return The bytes for inputFormat. */ public com.google.protobuf.ByteString getInputFormatBytes() { java.lang.Object ref = inputFormat_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); inputFormat_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @param value The inputFormat to set. * @return This builder for chaining. */ public Builder setInputFormat( java.lang.String value) { if (value == null) { throw new NullPointerException(); } inputFormat_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @return This builder for chaining. */ public Builder clearInputFormat() { inputFormat_ = getDefaultInstance().getInputFormat(); bitField0_ = (bitField0_ & ~0x00000002); onChanged(); return this; } /** *
       * Input corpus format:
       * "text": one-sentence-per-line text format (default)
       * "tsv":  sentence <tab> freq
       * 
* * optional string input_format = 7; * @param value The bytes for inputFormat to set. * @return This builder for chaining. */ public Builder setInputFormatBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } inputFormat_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } private java.lang.Object modelPrefix_ = ""; /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @return Whether the modelPrefix field is set. */ public boolean hasModelPrefix() { return ((bitField0_ & 0x00000004) != 0); } /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @return The modelPrefix. */ public java.lang.String getModelPrefix() { java.lang.Object ref = modelPrefix_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { modelPrefix_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @return The bytes for modelPrefix. */ public com.google.protobuf.ByteString getModelPrefixBytes() { java.lang.Object ref = modelPrefix_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); modelPrefix_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @param value The modelPrefix to set. * @return This builder for chaining. */ public Builder setModelPrefix( java.lang.String value) { if (value == null) { throw new NullPointerException(); } modelPrefix_ = value; bitField0_ |= 0x00000004; onChanged(); return this; } /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @return This builder for chaining. */ public Builder clearModelPrefix() { modelPrefix_ = getDefaultInstance().getModelPrefix(); bitField0_ = (bitField0_ & ~0x00000004); onChanged(); return this; } /** *
       * Output model file prefix.
       * <model_prefix>.model and <model_prefix>.vocab are generated.
       * 
* * optional string model_prefix = 2; * @param value The bytes for modelPrefix to set. * @return This builder for chaining. */ public Builder setModelPrefixBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } modelPrefix_ = value; bitField0_ |= 0x00000004; onChanged(); return this; } private int modelType_ = 1; /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return Whether the modelType field is set. */ @java.lang.Override public boolean hasModelType() { return ((bitField0_ & 0x00000008) != 0); } /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return The modelType. */ @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType() { sentencepiece.SentencepieceModel.TrainerSpec.ModelType result = sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(modelType_); return result == null ? sentencepiece.SentencepieceModel.TrainerSpec.ModelType.UNIGRAM : result; } /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @param value The modelType to set. * @return This builder for chaining. */ public Builder setModelType(sentencepiece.SentencepieceModel.TrainerSpec.ModelType value) { if (value == null) { throw new NullPointerException(); } bitField0_ |= 0x00000008; modelType_ = value.getNumber(); onChanged(); return this; } /** * optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; * @return This builder for chaining. */ public Builder clearModelType() { bitField0_ = (bitField0_ & ~0x00000008); modelType_ = 1; onChanged(); return this; } private int vocabSize_ = 8000; /** *
       * Vocabulary size. 8k is the default size.
       * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return Whether the vocabSize field is set. */ @java.lang.Override public boolean hasVocabSize() { return ((bitField0_ & 0x00000010) != 0); } /** *
       * Vocabulary size. 8k is the default size.
       * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return The vocabSize. */ @java.lang.Override public int getVocabSize() { return vocabSize_; } /** *
       * Vocabulary size. 8k is the default size.
       * 
* * optional int32 vocab_size = 4 [default = 8000]; * @param value The vocabSize to set. * @return This builder for chaining. */ public Builder setVocabSize(int value) { vocabSize_ = value; bitField0_ |= 0x00000010; onChanged(); return this; } /** *
       * Vocabulary size. 8k is the default size.
       * 
* * optional int32 vocab_size = 4 [default = 8000]; * @return This builder for chaining. */ public Builder clearVocabSize() { bitField0_ = (bitField0_ & ~0x00000010); vocabSize_ = 8000; onChanged(); return this; } private com.google.protobuf.LazyStringArrayList acceptLanguage_ = com.google.protobuf.LazyStringArrayList.emptyList(); private void ensureAcceptLanguageIsMutable() { if (!acceptLanguage_.isModifiable()) { acceptLanguage_ = new com.google.protobuf.LazyStringArrayList(acceptLanguage_); } bitField0_ |= 0x00000020; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @return A list containing the acceptLanguage. */ public com.google.protobuf.ProtocolStringList getAcceptLanguageList() { acceptLanguage_.makeImmutable(); return acceptLanguage_; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @return The count of acceptLanguage. */ public int getAcceptLanguageCount() { return acceptLanguage_.size(); } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param index The index of the element to return. * @return The acceptLanguage at the given index. */ public java.lang.String getAcceptLanguage(int index) { return acceptLanguage_.get(index); } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param index The index of the value to return. * @return The bytes of the acceptLanguage at the given index. */ public com.google.protobuf.ByteString getAcceptLanguageBytes(int index) { return acceptLanguage_.getByteString(index); } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param index The index to set the value at. * @param value The acceptLanguage to set. * @return This builder for chaining. */ public Builder setAcceptLanguage( int index, java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureAcceptLanguageIsMutable(); acceptLanguage_.set(index, value); bitField0_ |= 0x00000020; onChanged(); return this; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param value The acceptLanguage to add. * @return This builder for chaining. */ public Builder addAcceptLanguage( java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureAcceptLanguageIsMutable(); acceptLanguage_.add(value); bitField0_ |= 0x00000020; onChanged(); return this; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param values The acceptLanguage to add. * @return This builder for chaining. */ public Builder addAllAcceptLanguage( java.lang.Iterable values) { ensureAcceptLanguageIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, acceptLanguage_); bitField0_ |= 0x00000020; onChanged(); return this; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @return This builder for chaining. */ public Builder clearAcceptLanguage() { acceptLanguage_ = com.google.protobuf.LazyStringArrayList.emptyList(); bitField0_ = (bitField0_ & ~0x00000020);; onChanged(); return this; } /** *
       * List of the languages this model can accept.
       * Since the model is language-agnostic, this field is used as a reference.
       * 
* * repeated string accept_language = 5; * @param value The bytes of the acceptLanguage to add. * @return This builder for chaining. */ public Builder addAcceptLanguageBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } ensureAcceptLanguageIsMutable(); acceptLanguage_.add(value); bitField0_ |= 0x00000020; onChanged(); return this; } private int selfTestSampleSize_ ; /** *
       * Size of self-test samples, which are encoded in the model file.
       * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return Whether the selfTestSampleSize field is set. */ @java.lang.Override public boolean hasSelfTestSampleSize() { return ((bitField0_ & 0x00000040) != 0); } /** *
       * Size of self-test samples, which are encoded in the model file.
       * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return The selfTestSampleSize. */ @java.lang.Override public int getSelfTestSampleSize() { return selfTestSampleSize_; } /** *
       * Size of self-test samples, which are encoded in the model file.
       * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @param value The selfTestSampleSize to set. * @return This builder for chaining. */ public Builder setSelfTestSampleSize(int value) { selfTestSampleSize_ = value; bitField0_ |= 0x00000040; onChanged(); return this; } /** *
       * Size of self-test samples, which are encoded in the model file.
       * 
* * optional int32 self_test_sample_size = 6 [default = 0]; * @return This builder for chaining. */ public Builder clearSelfTestSampleSize() { bitField0_ = (bitField0_ & ~0x00000040); selfTestSampleSize_ = 0; onChanged(); return this; } private float characterCoverage_ = 0.9995F; /** *
       */////////////////////////////////////////////////////////////////
       * Training parameters.
       *
       * Uses characters which cover the corpus with the ratio of `chars_coverage`.
       * This parameter determines the set of basic Alphabet of sentence piece.
       * 1.0 - `chars_coverage` characters are treated as UNK.
       * See also required_chars field.
       * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return Whether the characterCoverage field is set. */ @java.lang.Override public boolean hasCharacterCoverage() { return ((bitField0_ & 0x00000080) != 0); } /** *
       */////////////////////////////////////////////////////////////////
       * Training parameters.
       *
       * Uses characters which cover the corpus with the ratio of `chars_coverage`.
       * This parameter determines the set of basic Alphabet of sentence piece.
       * 1.0 - `chars_coverage` characters are treated as UNK.
       * See also required_chars field.
       * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return The characterCoverage. */ @java.lang.Override public float getCharacterCoverage() { return characterCoverage_; } /** *
       */////////////////////////////////////////////////////////////////
       * Training parameters.
       *
       * Uses characters which cover the corpus with the ratio of `chars_coverage`.
       * This parameter determines the set of basic Alphabet of sentence piece.
       * 1.0 - `chars_coverage` characters are treated as UNK.
       * See also required_chars field.
       * 
* * optional float character_coverage = 10 [default = 0.9995]; * @param value The characterCoverage to set. * @return This builder for chaining. */ public Builder setCharacterCoverage(float value) { characterCoverage_ = value; bitField0_ |= 0x00000080; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Training parameters.
       *
       * Uses characters which cover the corpus with the ratio of `chars_coverage`.
       * This parameter determines the set of basic Alphabet of sentence piece.
       * 1.0 - `chars_coverage` characters are treated as UNK.
       * See also required_chars field.
       * 
* * optional float character_coverage = 10 [default = 0.9995]; * @return This builder for chaining. */ public Builder clearCharacterCoverage() { bitField0_ = (bitField0_ & ~0x00000080); characterCoverage_ = 0.9995F; onChanged(); return this; } private long inputSentenceSize_ ; /** *
       * Maximum size of sentences the trainer loads from `input` parameter.
       * Trainer simply loads the `input` files in sequence.
       * It is better to shuffle the input corpus randomly.
       * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return Whether the inputSentenceSize field is set. */ @java.lang.Override public boolean hasInputSentenceSize() { return ((bitField0_ & 0x00000100) != 0); } /** *
       * Maximum size of sentences the trainer loads from `input` parameter.
       * Trainer simply loads the `input` files in sequence.
       * It is better to shuffle the input corpus randomly.
       * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return The inputSentenceSize. */ @java.lang.Override public long getInputSentenceSize() { return inputSentenceSize_; } /** *
       * Maximum size of sentences the trainer loads from `input` parameter.
       * Trainer simply loads the `input` files in sequence.
       * It is better to shuffle the input corpus randomly.
       * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @param value The inputSentenceSize to set. * @return This builder for chaining. */ public Builder setInputSentenceSize(long value) { inputSentenceSize_ = value; bitField0_ |= 0x00000100; onChanged(); return this; } /** *
       * Maximum size of sentences the trainer loads from `input` parameter.
       * Trainer simply loads the `input` files in sequence.
       * It is better to shuffle the input corpus randomly.
       * 
* * optional uint64 input_sentence_size = 11 [default = 0]; * @return This builder for chaining. */ public Builder clearInputSentenceSize() { bitField0_ = (bitField0_ & ~0x00000100); inputSentenceSize_ = 0L; onChanged(); return this; } private boolean shuffleInputSentence_ = true; /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return Whether the shuffleInputSentence field is set. */ @java.lang.Override public boolean hasShuffleInputSentence() { return ((bitField0_ & 0x00000200) != 0); } /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return The shuffleInputSentence. */ @java.lang.Override public boolean getShuffleInputSentence() { return shuffleInputSentence_; } /** * optional bool shuffle_input_sentence = 19 [default = true]; * @param value The shuffleInputSentence to set. * @return This builder for chaining. */ public Builder setShuffleInputSentence(boolean value) { shuffleInputSentence_ = value; bitField0_ |= 0x00000200; onChanged(); return this; } /** * optional bool shuffle_input_sentence = 19 [default = true]; * @return This builder for chaining. */ public Builder clearShuffleInputSentence() { bitField0_ = (bitField0_ & ~0x00000200); shuffleInputSentence_ = true; onChanged(); return this; } private int miningSentenceSize_ ; /** *
       * Maximum size of sentences to make seed sentence pieces.
       * Extended suffix array is constructed to extract frequent
       * sub-strings from the corpus. This uses 20N working space,
       * where N is the size of corpus.
       * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return Whether the miningSentenceSize field is set. */ @java.lang.Override @java.lang.Deprecated public boolean hasMiningSentenceSize() { return ((bitField0_ & 0x00000400) != 0); } /** *
       * Maximum size of sentences to make seed sentence pieces.
       * Extended suffix array is constructed to extract frequent
       * sub-strings from the corpus. This uses 20N working space,
       * where N is the size of corpus.
       * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated. * See sentencepiece_model.proto;l=83 * @return The miningSentenceSize. */ @java.lang.Override @java.lang.Deprecated public int getMiningSentenceSize() { return miningSentenceSize_; } /** *
       * Maximum size of sentences to make seed sentence pieces.
       * Extended suffix array is constructed to extract frequent
       * sub-strings from the corpus. This uses 20N working space,
       * where N is the size of corpus.
       * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @param value The miningSentenceSize to set. * @return This builder for chaining. */ @java.lang.Deprecated public Builder setMiningSentenceSize(int value) { miningSentenceSize_ = value; bitField0_ |= 0x00000400; onChanged(); return this; } /** *
       * Maximum size of sentences to make seed sentence pieces.
       * Extended suffix array is constructed to extract frequent
       * sub-strings from the corpus. This uses 20N working space,
       * where N is the size of corpus.
       * 
* * optional int32 mining_sentence_size = 12 [deprecated = true]; * @return This builder for chaining. */ @java.lang.Deprecated public Builder clearMiningSentenceSize() { bitField0_ = (bitField0_ & ~0x00000400); miningSentenceSize_ = 0; onChanged(); return this; } private int trainingSentenceSize_ ; /** *
       * Maximum size of sentences to train sentence pieces.
       * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return Whether the trainingSentenceSize field is set. */ @java.lang.Override @java.lang.Deprecated public boolean hasTrainingSentenceSize() { return ((bitField0_ & 0x00000800) != 0); } /** *
       * Maximum size of sentences to train sentence pieces.
       * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated. * See sentencepiece_model.proto;l=86 * @return The trainingSentenceSize. */ @java.lang.Override @java.lang.Deprecated public int getTrainingSentenceSize() { return trainingSentenceSize_; } /** *
       * Maximum size of sentences to train sentence pieces.
       * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @param value The trainingSentenceSize to set. * @return This builder for chaining. */ @java.lang.Deprecated public Builder setTrainingSentenceSize(int value) { trainingSentenceSize_ = value; bitField0_ |= 0x00000800; onChanged(); return this; } /** *
       * Maximum size of sentences to train sentence pieces.
       * 
* * optional int32 training_sentence_size = 13 [deprecated = true]; * @return This builder for chaining. */ @java.lang.Deprecated public Builder clearTrainingSentenceSize() { bitField0_ = (bitField0_ & ~0x00000800); trainingSentenceSize_ = 0; onChanged(); return this; } private int seedSentencepieceSize_ = 1000000; /** *
       * The size of seed sentencepieces.
       * `seed_sentencepiece_size` must be larger than `vocab_size`.
       * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return Whether the seedSentencepieceSize field is set. */ @java.lang.Override public boolean hasSeedSentencepieceSize() { return ((bitField0_ & 0x00001000) != 0); } /** *
       * The size of seed sentencepieces.
       * `seed_sentencepiece_size` must be larger than `vocab_size`.
       * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return The seedSentencepieceSize. */ @java.lang.Override public int getSeedSentencepieceSize() { return seedSentencepieceSize_; } /** *
       * The size of seed sentencepieces.
       * `seed_sentencepiece_size` must be larger than `vocab_size`.
       * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @param value The seedSentencepieceSize to set. * @return This builder for chaining. */ public Builder setSeedSentencepieceSize(int value) { seedSentencepieceSize_ = value; bitField0_ |= 0x00001000; onChanged(); return this; } /** *
       * The size of seed sentencepieces.
       * `seed_sentencepiece_size` must be larger than `vocab_size`.
       * 
* * optional int32 seed_sentencepiece_size = 14 [default = 1000000]; * @return This builder for chaining. */ public Builder clearSeedSentencepieceSize() { bitField0_ = (bitField0_ & ~0x00001000); seedSentencepieceSize_ = 1000000; onChanged(); return this; } private float shrinkingFactor_ = 0.75F; /** *
       * In every EM sub-iterations, keeps top
       * `shrinking_factor` * `current sentencepieces size` with respect to
       * the loss of the sentence piece. This value should be smaller than 1.0.
       * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return Whether the shrinkingFactor field is set. */ @java.lang.Override public boolean hasShrinkingFactor() { return ((bitField0_ & 0x00002000) != 0); } /** *
       * In every EM sub-iterations, keeps top
       * `shrinking_factor` * `current sentencepieces size` with respect to
       * the loss of the sentence piece. This value should be smaller than 1.0.
       * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return The shrinkingFactor. */ @java.lang.Override public float getShrinkingFactor() { return shrinkingFactor_; } /** *
       * In every EM sub-iterations, keeps top
       * `shrinking_factor` * `current sentencepieces size` with respect to
       * the loss of the sentence piece. This value should be smaller than 1.0.
       * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @param value The shrinkingFactor to set. * @return This builder for chaining. */ public Builder setShrinkingFactor(float value) { shrinkingFactor_ = value; bitField0_ |= 0x00002000; onChanged(); return this; } /** *
       * In every EM sub-iterations, keeps top
       * `shrinking_factor` * `current sentencepieces size` with respect to
       * the loss of the sentence piece. This value should be smaller than 1.0.
       * 
* * optional float shrinking_factor = 15 [default = 0.75]; * @return This builder for chaining. */ public Builder clearShrinkingFactor() { bitField0_ = (bitField0_ & ~0x00002000); shrinkingFactor_ = 0.75F; onChanged(); return this; } private int maxSentenceLength_ = 4192; /** *
       * The maximum sentence length in byte. The sentences with the length
       * larger than `max_sentence_length` is simply ignored.
       * Longer input tends to bring the following risks:
       *  * Overflow during EM training (unigram language model only)
       *  * Performance drop because of O(n log n) cost in BPE.
       * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return Whether the maxSentenceLength field is set. */ @java.lang.Override public boolean hasMaxSentenceLength() { return ((bitField0_ & 0x00004000) != 0); } /** *
       * The maximum sentence length in byte. The sentences with the length
       * larger than `max_sentence_length` is simply ignored.
       * Longer input tends to bring the following risks:
       *  * Overflow during EM training (unigram language model only)
       *  * Performance drop because of O(n log n) cost in BPE.
       * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return The maxSentenceLength. */ @java.lang.Override public int getMaxSentenceLength() { return maxSentenceLength_; } /** *
       * The maximum sentence length in byte. The sentences with the length
       * larger than `max_sentence_length` is simply ignored.
       * Longer input tends to bring the following risks:
       *  * Overflow during EM training (unigram language model only)
       *  * Performance drop because of O(n log n) cost in BPE.
       * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @param value The maxSentenceLength to set. * @return This builder for chaining. */ public Builder setMaxSentenceLength(int value) { maxSentenceLength_ = value; bitField0_ |= 0x00004000; onChanged(); return this; } /** *
       * The maximum sentence length in byte. The sentences with the length
       * larger than `max_sentence_length` is simply ignored.
       * Longer input tends to bring the following risks:
       *  * Overflow during EM training (unigram language model only)
       *  * Performance drop because of O(n log n) cost in BPE.
       * 
* * optional int32 max_sentence_length = 18 [default = 4192]; * @return This builder for chaining. */ public Builder clearMaxSentenceLength() { bitField0_ = (bitField0_ & ~0x00004000); maxSentenceLength_ = 4192; onChanged(); return this; } private int numThreads_ = 16; /** *
       * Number of threads in the training.
       * 
* * optional int32 num_threads = 16 [default = 16]; * @return Whether the numThreads field is set. */ @java.lang.Override public boolean hasNumThreads() { return ((bitField0_ & 0x00008000) != 0); } /** *
       * Number of threads in the training.
       * 
* * optional int32 num_threads = 16 [default = 16]; * @return The numThreads. */ @java.lang.Override public int getNumThreads() { return numThreads_; } /** *
       * Number of threads in the training.
       * 
* * optional int32 num_threads = 16 [default = 16]; * @param value The numThreads to set. * @return This builder for chaining. */ public Builder setNumThreads(int value) { numThreads_ = value; bitField0_ |= 0x00008000; onChanged(); return this; } /** *
       * Number of threads in the training.
       * 
* * optional int32 num_threads = 16 [default = 16]; * @return This builder for chaining. */ public Builder clearNumThreads() { bitField0_ = (bitField0_ & ~0x00008000); numThreads_ = 16; onChanged(); return this; } private int numSubIterations_ = 2; /** *
       * Number of EM sub iterations.
       * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return Whether the numSubIterations field is set. */ @java.lang.Override public boolean hasNumSubIterations() { return ((bitField0_ & 0x00010000) != 0); } /** *
       * Number of EM sub iterations.
       * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return The numSubIterations. */ @java.lang.Override public int getNumSubIterations() { return numSubIterations_; } /** *
       * Number of EM sub iterations.
       * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @param value The numSubIterations to set. * @return This builder for chaining. */ public Builder setNumSubIterations(int value) { numSubIterations_ = value; bitField0_ |= 0x00010000; onChanged(); return this; } /** *
       * Number of EM sub iterations.
       * 
* * optional int32 num_sub_iterations = 17 [default = 2]; * @return This builder for chaining. */ public Builder clearNumSubIterations() { bitField0_ = (bitField0_ & ~0x00010000); numSubIterations_ = 2; onChanged(); return this; } private int maxSentencepieceLength_ = 16; /** *
       */////////////////////////////////////////////////////////////////
       * SentencePiece parameters which control the shapes of sentence piece.
       *
       * Maximum length of sentencepiece.
       * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return Whether the maxSentencepieceLength field is set. */ @java.lang.Override public boolean hasMaxSentencepieceLength() { return ((bitField0_ & 0x00020000) != 0); } /** *
       */////////////////////////////////////////////////////////////////
       * SentencePiece parameters which control the shapes of sentence piece.
       *
       * Maximum length of sentencepiece.
       * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return The maxSentencepieceLength. */ @java.lang.Override public int getMaxSentencepieceLength() { return maxSentencepieceLength_; } /** *
       */////////////////////////////////////////////////////////////////
       * SentencePiece parameters which control the shapes of sentence piece.
       *
       * Maximum length of sentencepiece.
       * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @param value The maxSentencepieceLength to set. * @return This builder for chaining. */ public Builder setMaxSentencepieceLength(int value) { maxSentencepieceLength_ = value; bitField0_ |= 0x00020000; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * SentencePiece parameters which control the shapes of sentence piece.
       *
       * Maximum length of sentencepiece.
       * 
* * optional int32 max_sentencepiece_length = 20 [default = 16]; * @return This builder for chaining. */ public Builder clearMaxSentencepieceLength() { bitField0_ = (bitField0_ & ~0x00020000); maxSentencepieceLength_ = 16; onChanged(); return this; } private boolean splitByUnicodeScript_ = true; /** *
       * Uses Unicode script to split sentence pieces.
       * When `split_by_unicode_script` is true, we do not allow sentence piece to
       * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       * as one script type, since Japanese word can consist of multiple scripts.
       * This exception is always applied regardless of the accept-language
       * parameter.
       * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return Whether the splitByUnicodeScript field is set. */ @java.lang.Override public boolean hasSplitByUnicodeScript() { return ((bitField0_ & 0x00040000) != 0); } /** *
       * Uses Unicode script to split sentence pieces.
       * When `split_by_unicode_script` is true, we do not allow sentence piece to
       * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       * as one script type, since Japanese word can consist of multiple scripts.
       * This exception is always applied regardless of the accept-language
       * parameter.
       * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return The splitByUnicodeScript. */ @java.lang.Override public boolean getSplitByUnicodeScript() { return splitByUnicodeScript_; } /** *
       * Uses Unicode script to split sentence pieces.
       * When `split_by_unicode_script` is true, we do not allow sentence piece to
       * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       * as one script type, since Japanese word can consist of multiple scripts.
       * This exception is always applied regardless of the accept-language
       * parameter.
       * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @param value The splitByUnicodeScript to set. * @return This builder for chaining. */ public Builder setSplitByUnicodeScript(boolean value) { splitByUnicodeScript_ = value; bitField0_ |= 0x00040000; onChanged(); return this; } /** *
       * Uses Unicode script to split sentence pieces.
       * When `split_by_unicode_script` is true, we do not allow sentence piece to
       * include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       * Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       * as one script type, since Japanese word can consist of multiple scripts.
       * This exception is always applied regardless of the accept-language
       * parameter.
       * 
* * optional bool split_by_unicode_script = 21 [default = true]; * @return This builder for chaining. */ public Builder clearSplitByUnicodeScript() { bitField0_ = (bitField0_ & ~0x00040000); splitByUnicodeScript_ = true; onChanged(); return this; } private boolean splitByNumber_ = true; /** *
       * When `split_by_number` is true, put a boundary between number and
       * non-number transition. If we want to treat "F1" is one token, set this flag
       * to be false.
       * 
* * optional bool split_by_number = 23 [default = true]; * @return Whether the splitByNumber field is set. */ @java.lang.Override public boolean hasSplitByNumber() { return ((bitField0_ & 0x00080000) != 0); } /** *
       * When `split_by_number` is true, put a boundary between number and
       * non-number transition. If we want to treat "F1" is one token, set this flag
       * to be false.
       * 
* * optional bool split_by_number = 23 [default = true]; * @return The splitByNumber. */ @java.lang.Override public boolean getSplitByNumber() { return splitByNumber_; } /** *
       * When `split_by_number` is true, put a boundary between number and
       * non-number transition. If we want to treat "F1" is one token, set this flag
       * to be false.
       * 
* * optional bool split_by_number = 23 [default = true]; * @param value The splitByNumber to set. * @return This builder for chaining. */ public Builder setSplitByNumber(boolean value) { splitByNumber_ = value; bitField0_ |= 0x00080000; onChanged(); return this; } /** *
       * When `split_by_number` is true, put a boundary between number and
       * non-number transition. If we want to treat "F1" is one token, set this flag
       * to be false.
       * 
* * optional bool split_by_number = 23 [default = true]; * @return This builder for chaining. */ public Builder clearSplitByNumber() { bitField0_ = (bitField0_ & ~0x00080000); splitByNumber_ = true; onChanged(); return this; } private boolean splitByWhitespace_ = true; /** *
       * Use a white space to split sentence pieces.
       * When `split_by_whitespace` is false, we may have the piece containing
       * a white space in the middle. e.g., "in_the".
       * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return Whether the splitByWhitespace field is set. */ @java.lang.Override public boolean hasSplitByWhitespace() { return ((bitField0_ & 0x00100000) != 0); } /** *
       * Use a white space to split sentence pieces.
       * When `split_by_whitespace` is false, we may have the piece containing
       * a white space in the middle. e.g., "in_the".
       * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return The splitByWhitespace. */ @java.lang.Override public boolean getSplitByWhitespace() { return splitByWhitespace_; } /** *
       * Use a white space to split sentence pieces.
       * When `split_by_whitespace` is false, we may have the piece containing
       * a white space in the middle. e.g., "in_the".
       * 
* * optional bool split_by_whitespace = 22 [default = true]; * @param value The splitByWhitespace to set. * @return This builder for chaining. */ public Builder setSplitByWhitespace(boolean value) { splitByWhitespace_ = value; bitField0_ |= 0x00100000; onChanged(); return this; } /** *
       * Use a white space to split sentence pieces.
       * When `split_by_whitespace` is false, we may have the piece containing
       * a white space in the middle. e.g., "in_the".
       * 
* * optional bool split_by_whitespace = 22 [default = true]; * @return This builder for chaining. */ public Builder clearSplitByWhitespace() { bitField0_ = (bitField0_ & ~0x00100000); splitByWhitespace_ = true; onChanged(); return this; } private boolean treatWhitespaceAsSuffix_ ; /** *
       * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       * hello_. When `treat_whitespace_as_suffix` is true,
       * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       * of sentence.
       * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return Whether the treatWhitespaceAsSuffix field is set. */ @java.lang.Override public boolean hasTreatWhitespaceAsSuffix() { return ((bitField0_ & 0x00200000) != 0); } /** *
       * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       * hello_. When `treat_whitespace_as_suffix` is true,
       * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       * of sentence.
       * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return The treatWhitespaceAsSuffix. */ @java.lang.Override public boolean getTreatWhitespaceAsSuffix() { return treatWhitespaceAsSuffix_; } /** *
       * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       * hello_. When `treat_whitespace_as_suffix` is true,
       * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       * of sentence.
       * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @param value The treatWhitespaceAsSuffix to set. * @return This builder for chaining. */ public Builder setTreatWhitespaceAsSuffix(boolean value) { treatWhitespaceAsSuffix_ = value; bitField0_ |= 0x00200000; onChanged(); return this; } /** *
       * Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       * hello_. When `treat_whitespace_as_suffix` is true,
       * NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       * of sentence.
       * 
* * optional bool treat_whitespace_as_suffix = 24 [default = false]; * @return This builder for chaining. */ public Builder clearTreatWhitespaceAsSuffix() { bitField0_ = (bitField0_ & ~0x00200000); treatWhitespaceAsSuffix_ = false; onChanged(); return this; } private boolean allowWhitespaceOnlyPieces_ ; /** *
       * Allows pieces that only contain whitespaces instead of appearing only as
       * prefix or suffix of other pieces.
       * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return Whether the allowWhitespaceOnlyPieces field is set. */ @java.lang.Override public boolean hasAllowWhitespaceOnlyPieces() { return ((bitField0_ & 0x00400000) != 0); } /** *
       * Allows pieces that only contain whitespaces instead of appearing only as
       * prefix or suffix of other pieces.
       * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return The allowWhitespaceOnlyPieces. */ @java.lang.Override public boolean getAllowWhitespaceOnlyPieces() { return allowWhitespaceOnlyPieces_; } /** *
       * Allows pieces that only contain whitespaces instead of appearing only as
       * prefix or suffix of other pieces.
       * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @param value The allowWhitespaceOnlyPieces to set. * @return This builder for chaining. */ public Builder setAllowWhitespaceOnlyPieces(boolean value) { allowWhitespaceOnlyPieces_ = value; bitField0_ |= 0x00400000; onChanged(); return this; } /** *
       * Allows pieces that only contain whitespaces instead of appearing only as
       * prefix or suffix of other pieces.
       * 
* * optional bool allow_whitespace_only_pieces = 26 [default = false]; * @return This builder for chaining. */ public Builder clearAllowWhitespaceOnlyPieces() { bitField0_ = (bitField0_ & ~0x00400000); allowWhitespaceOnlyPieces_ = false; onChanged(); return this; } private boolean splitDigits_ ; /** *
       * Split all digits (0-9) into separate pieces.
       * 
* * optional bool split_digits = 25 [default = false]; * @return Whether the splitDigits field is set. */ @java.lang.Override public boolean hasSplitDigits() { return ((bitField0_ & 0x00800000) != 0); } /** *
       * Split all digits (0-9) into separate pieces.
       * 
* * optional bool split_digits = 25 [default = false]; * @return The splitDigits. */ @java.lang.Override public boolean getSplitDigits() { return splitDigits_; } /** *
       * Split all digits (0-9) into separate pieces.
       * 
* * optional bool split_digits = 25 [default = false]; * @param value The splitDigits to set. * @return This builder for chaining. */ public Builder setSplitDigits(boolean value) { splitDigits_ = value; bitField0_ |= 0x00800000; onChanged(); return this; } /** *
       * Split all digits (0-9) into separate pieces.
       * 
* * optional bool split_digits = 25 [default = false]; * @return This builder for chaining. */ public Builder clearSplitDigits() { bitField0_ = (bitField0_ & ~0x00800000); splitDigits_ = false; onChanged(); return this; } private com.google.protobuf.LazyStringArrayList controlSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); private void ensureControlSymbolsIsMutable() { if (!controlSymbols_.isModifiable()) { controlSymbols_ = new com.google.protobuf.LazyStringArrayList(controlSymbols_); } bitField0_ |= 0x01000000; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @return A list containing the controlSymbols. */ public com.google.protobuf.ProtocolStringList getControlSymbolsList() { controlSymbols_.makeImmutable(); return controlSymbols_; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @return The count of controlSymbols. */ public int getControlSymbolsCount() { return controlSymbols_.size(); } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param index The index of the element to return. * @return The controlSymbols at the given index. */ public java.lang.String getControlSymbols(int index) { return controlSymbols_.get(index); } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param index The index of the value to return. * @return The bytes of the controlSymbols at the given index. */ public com.google.protobuf.ByteString getControlSymbolsBytes(int index) { return controlSymbols_.getByteString(index); } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param index The index to set the value at. * @param value The controlSymbols to set. * @return This builder for chaining. */ public Builder setControlSymbols( int index, java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureControlSymbolsIsMutable(); controlSymbols_.set(index, value); bitField0_ |= 0x01000000; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param value The controlSymbols to add. * @return This builder for chaining. */ public Builder addControlSymbols( java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureControlSymbolsIsMutable(); controlSymbols_.add(value); bitField0_ |= 0x01000000; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param values The controlSymbols to add. * @return This builder for chaining. */ public Builder addAllControlSymbols( java.lang.Iterable values) { ensureControlSymbolsIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, controlSymbols_); bitField0_ |= 0x01000000; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @return This builder for chaining. */ public Builder clearControlSymbols() { controlSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); bitField0_ = (bitField0_ & ~0x01000000);; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Vocabulary management
       *
       * Defines control symbols used as an indicator to
       * change the behavior of the decoder. <s> and </s> are pre-defined.
       * We can use this field to encode various meta information,
       * including language indicator in multilingual model.
       * These symbols are not visible to users, but visible to
       * the decoder. Note that when the input sentence contains control symbols,
       * they are not treated as one token, but segmented into normal pieces.
       * Control symbols must be inserted independently from the segmentation.
       * 
* * repeated string control_symbols = 30; * @param value The bytes of the controlSymbols to add. * @return This builder for chaining. */ public Builder addControlSymbolsBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } ensureControlSymbolsIsMutable(); controlSymbols_.add(value); bitField0_ |= 0x01000000; onChanged(); return this; } private com.google.protobuf.LazyStringArrayList userDefinedSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); private void ensureUserDefinedSymbolsIsMutable() { if (!userDefinedSymbols_.isModifiable()) { userDefinedSymbols_ = new com.google.protobuf.LazyStringArrayList(userDefinedSymbols_); } bitField0_ |= 0x02000000; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @return A list containing the userDefinedSymbols. */ public com.google.protobuf.ProtocolStringList getUserDefinedSymbolsList() { userDefinedSymbols_.makeImmutable(); return userDefinedSymbols_; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @return The count of userDefinedSymbols. */ public int getUserDefinedSymbolsCount() { return userDefinedSymbols_.size(); } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param index The index of the element to return. * @return The userDefinedSymbols at the given index. */ public java.lang.String getUserDefinedSymbols(int index) { return userDefinedSymbols_.get(index); } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param index The index of the value to return. * @return The bytes of the userDefinedSymbols at the given index. */ public com.google.protobuf.ByteString getUserDefinedSymbolsBytes(int index) { return userDefinedSymbols_.getByteString(index); } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param index The index to set the value at. * @param value The userDefinedSymbols to set. * @return This builder for chaining. */ public Builder setUserDefinedSymbols( int index, java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureUserDefinedSymbolsIsMutable(); userDefinedSymbols_.set(index, value); bitField0_ |= 0x02000000; onChanged(); return this; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param value The userDefinedSymbols to add. * @return This builder for chaining. */ public Builder addUserDefinedSymbols( java.lang.String value) { if (value == null) { throw new NullPointerException(); } ensureUserDefinedSymbolsIsMutable(); userDefinedSymbols_.add(value); bitField0_ |= 0x02000000; onChanged(); return this; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param values The userDefinedSymbols to add. * @return This builder for chaining. */ public Builder addAllUserDefinedSymbols( java.lang.Iterable values) { ensureUserDefinedSymbolsIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, userDefinedSymbols_); bitField0_ |= 0x02000000; onChanged(); return this; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @return This builder for chaining. */ public Builder clearUserDefinedSymbols() { userDefinedSymbols_ = com.google.protobuf.LazyStringArrayList.emptyList(); bitField0_ = (bitField0_ & ~0x02000000);; onChanged(); return this; } /** *
       * Defines user defined symbols.
       * These symbols are added with extremely high score
       * so they are always treated as one unique symbol in any context.
       * Typical usage of user_defined_symbols is placeholder for named entities.
       * 
* * repeated string user_defined_symbols = 31; * @param value The bytes of the userDefinedSymbols to add. * @return This builder for chaining. */ public Builder addUserDefinedSymbolsBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } ensureUserDefinedSymbolsIsMutable(); userDefinedSymbols_.add(value); bitField0_ |= 0x02000000; onChanged(); return this; } private java.lang.Object requiredChars_ = ""; /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @return Whether the requiredChars field is set. */ public boolean hasRequiredChars() { return ((bitField0_ & 0x04000000) != 0); } /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @return The requiredChars. */ public java.lang.String getRequiredChars() { java.lang.Object ref = requiredChars_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { requiredChars_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @return The bytes for requiredChars. */ public com.google.protobuf.ByteString getRequiredCharsBytes() { java.lang.Object ref = requiredChars_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); requiredChars_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @param value The requiredChars to set. * @return This builder for chaining. */ public Builder setRequiredChars( java.lang.String value) { if (value == null) { throw new NullPointerException(); } requiredChars_ = value; bitField0_ |= 0x04000000; onChanged(); return this; } /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @return This builder for chaining. */ public Builder clearRequiredChars() { requiredChars_ = getDefaultInstance().getRequiredChars(); bitField0_ = (bitField0_ & ~0x04000000); onChanged(); return this; } /** *
       * Defines required characters. Each UTF8 character in this string is included
       * in the character set regardless of character_coverage value. Unlike
       * user_defined_symbols, these characters have scores based on the frequency
       * on input sentences, and the model can form subwords using characters
       * in this field.
       * 
* * optional string required_chars = 36; * @param value The bytes for requiredChars to set. * @return This builder for chaining. */ public Builder setRequiredCharsBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } requiredChars_ = value; bitField0_ |= 0x04000000; onChanged(); return this; } private boolean byteFallback_ ; /** *
       * Decomposes unknown pieces into UTF-8 bytes.
       * 
* * optional bool byte_fallback = 35 [default = false]; * @return Whether the byteFallback field is set. */ @java.lang.Override public boolean hasByteFallback() { return ((bitField0_ & 0x08000000) != 0); } /** *
       * Decomposes unknown pieces into UTF-8 bytes.
       * 
* * optional bool byte_fallback = 35 [default = false]; * @return The byteFallback. */ @java.lang.Override public boolean getByteFallback() { return byteFallback_; } /** *
       * Decomposes unknown pieces into UTF-8 bytes.
       * 
* * optional bool byte_fallback = 35 [default = false]; * @param value The byteFallback to set. * @return This builder for chaining. */ public Builder setByteFallback(boolean value) { byteFallback_ = value; bitField0_ |= 0x08000000; onChanged(); return this; } /** *
       * Decomposes unknown pieces into UTF-8 bytes.
       * 
* * optional bool byte_fallback = 35 [default = false]; * @return This builder for chaining. */ public Builder clearByteFallback() { bitField0_ = (bitField0_ & ~0x08000000); byteFallback_ = false; onChanged(); return this; } private boolean vocabularyOutputPieceScore_ = true; /** *
       * When creating the vocabulary file, defines whether or not to additionally
       * output the score for each piece.
       * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return Whether the vocabularyOutputPieceScore field is set. */ @java.lang.Override public boolean hasVocabularyOutputPieceScore() { return ((bitField0_ & 0x10000000) != 0); } /** *
       * When creating the vocabulary file, defines whether or not to additionally
       * output the score for each piece.
       * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return The vocabularyOutputPieceScore. */ @java.lang.Override public boolean getVocabularyOutputPieceScore() { return vocabularyOutputPieceScore_; } /** *
       * When creating the vocabulary file, defines whether or not to additionally
       * output the score for each piece.
       * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @param value The vocabularyOutputPieceScore to set. * @return This builder for chaining. */ public Builder setVocabularyOutputPieceScore(boolean value) { vocabularyOutputPieceScore_ = value; bitField0_ |= 0x10000000; onChanged(); return this; } /** *
       * When creating the vocabulary file, defines whether or not to additionally
       * output the score for each piece.
       * 
* * optional bool vocabulary_output_piece_score = 32 [default = true]; * @return This builder for chaining. */ public Builder clearVocabularyOutputPieceScore() { bitField0_ = (bitField0_ & ~0x10000000); vocabularyOutputPieceScore_ = true; onChanged(); return this; } private boolean hardVocabLimit_ = true; /** *
       * `vocab_size` is treated as hard limit. Crash if
       * the model can not produce the vocab of size `vocab_size`,
       * When `hard_vocab_limit` is false, vocab_size is treated
       * as soft limit. Note that when model_type=char,
       * always assumes hard_vocab_limit = false.
       * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return Whether the hardVocabLimit field is set. */ @java.lang.Override public boolean hasHardVocabLimit() { return ((bitField0_ & 0x20000000) != 0); } /** *
       * `vocab_size` is treated as hard limit. Crash if
       * the model can not produce the vocab of size `vocab_size`,
       * When `hard_vocab_limit` is false, vocab_size is treated
       * as soft limit. Note that when model_type=char,
       * always assumes hard_vocab_limit = false.
       * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return The hardVocabLimit. */ @java.lang.Override public boolean getHardVocabLimit() { return hardVocabLimit_; } /** *
       * `vocab_size` is treated as hard limit. Crash if
       * the model can not produce the vocab of size `vocab_size`,
       * When `hard_vocab_limit` is false, vocab_size is treated
       * as soft limit. Note that when model_type=char,
       * always assumes hard_vocab_limit = false.
       * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @param value The hardVocabLimit to set. * @return This builder for chaining. */ public Builder setHardVocabLimit(boolean value) { hardVocabLimit_ = value; bitField0_ |= 0x20000000; onChanged(); return this; } /** *
       * `vocab_size` is treated as hard limit. Crash if
       * the model can not produce the vocab of size `vocab_size`,
       * When `hard_vocab_limit` is false, vocab_size is treated
       * as soft limit. Note that when model_type=char,
       * always assumes hard_vocab_limit = false.
       * 
* * optional bool hard_vocab_limit = 33 [default = true]; * @return This builder for chaining. */ public Builder clearHardVocabLimit() { bitField0_ = (bitField0_ & ~0x20000000); hardVocabLimit_ = true; onChanged(); return this; } private boolean useAllVocab_ ; /** *
       * use all symbols for vocab extraction. This flag is valid
       * if model type is either CHAR or WORD
       * 
* * optional bool use_all_vocab = 34 [default = false]; * @return Whether the useAllVocab field is set. */ @java.lang.Override public boolean hasUseAllVocab() { return ((bitField0_ & 0x40000000) != 0); } /** *
       * use all symbols for vocab extraction. This flag is valid
       * if model type is either CHAR or WORD
       * 
* * optional bool use_all_vocab = 34 [default = false]; * @return The useAllVocab. */ @java.lang.Override public boolean getUseAllVocab() { return useAllVocab_; } /** *
       * use all symbols for vocab extraction. This flag is valid
       * if model type is either CHAR or WORD
       * 
* * optional bool use_all_vocab = 34 [default = false]; * @param value The useAllVocab to set. * @return This builder for chaining. */ public Builder setUseAllVocab(boolean value) { useAllVocab_ = value; bitField0_ |= 0x40000000; onChanged(); return this; } /** *
       * use all symbols for vocab extraction. This flag is valid
       * if model type is either CHAR or WORD
       * 
* * optional bool use_all_vocab = 34 [default = false]; * @return This builder for chaining. */ public Builder clearUseAllVocab() { bitField0_ = (bitField0_ & ~0x40000000); useAllVocab_ = false; onChanged(); return this; } private int unkId_ ; /** *
       */////////////////////////////////////////////////////////////////
       * Reserved special meta tokens.
       * * -1 is not used.
       * * unk_id must not be -1.
       * Id must starts with 0 and be contigous.
       * 
* * optional int32 unk_id = 40 [default = 0]; * @return Whether the unkId field is set. */ @java.lang.Override public boolean hasUnkId() { return ((bitField0_ & 0x80000000) != 0); } /** *
       */////////////////////////////////////////////////////////////////
       * Reserved special meta tokens.
       * * -1 is not used.
       * * unk_id must not be -1.
       * Id must starts with 0 and be contigous.
       * 
* * optional int32 unk_id = 40 [default = 0]; * @return The unkId. */ @java.lang.Override public int getUnkId() { return unkId_; } /** *
       */////////////////////////////////////////////////////////////////
       * Reserved special meta tokens.
       * * -1 is not used.
       * * unk_id must not be -1.
       * Id must starts with 0 and be contigous.
       * 
* * optional int32 unk_id = 40 [default = 0]; * @param value The unkId to set. * @return This builder for chaining. */ public Builder setUnkId(int value) { unkId_ = value; bitField0_ |= 0x80000000; onChanged(); return this; } /** *
       */////////////////////////////////////////////////////////////////
       * Reserved special meta tokens.
       * * -1 is not used.
       * * unk_id must not be -1.
       * Id must starts with 0 and be contigous.
       * 
* * optional int32 unk_id = 40 [default = 0]; * @return This builder for chaining. */ public Builder clearUnkId() { bitField0_ = (bitField0_ & ~0x80000000); unkId_ = 0; onChanged(); return this; } private int bosId_ = 1; /** *
       * <s>
       * 
* * optional int32 bos_id = 41 [default = 1]; * @return Whether the bosId field is set. */ @java.lang.Override public boolean hasBosId() { return ((bitField1_ & 0x00000001) != 0); } /** *
       * <s>
       * 
* * optional int32 bos_id = 41 [default = 1]; * @return The bosId. */ @java.lang.Override public int getBosId() { return bosId_; } /** *
       * <s>
       * 
* * optional int32 bos_id = 41 [default = 1]; * @param value The bosId to set. * @return This builder for chaining. */ public Builder setBosId(int value) { bosId_ = value; bitField1_ |= 0x00000001; onChanged(); return this; } /** *
       * <s>
       * 
* * optional int32 bos_id = 41 [default = 1]; * @return This builder for chaining. */ public Builder clearBosId() { bitField1_ = (bitField1_ & ~0x00000001); bosId_ = 1; onChanged(); return this; } private int eosId_ = 2; /** *
       * </s>
       * 
* * optional int32 eos_id = 42 [default = 2]; * @return Whether the eosId field is set. */ @java.lang.Override public boolean hasEosId() { return ((bitField1_ & 0x00000002) != 0); } /** *
       * </s>
       * 
* * optional int32 eos_id = 42 [default = 2]; * @return The eosId. */ @java.lang.Override public int getEosId() { return eosId_; } /** *
       * </s>
       * 
* * optional int32 eos_id = 42 [default = 2]; * @param value The eosId to set. * @return This builder for chaining. */ public Builder setEosId(int value) { eosId_ = value; bitField1_ |= 0x00000002; onChanged(); return this; } /** *
       * </s>
       * 
* * optional int32 eos_id = 42 [default = 2]; * @return This builder for chaining. */ public Builder clearEosId() { bitField1_ = (bitField1_ & ~0x00000002); eosId_ = 2; onChanged(); return this; } private int padId_ = -1; /** *
       * <pad> (padding)
       * 
* * optional int32 pad_id = 43 [default = -1]; * @return Whether the padId field is set. */ @java.lang.Override public boolean hasPadId() { return ((bitField1_ & 0x00000004) != 0); } /** *
       * <pad> (padding)
       * 
* * optional int32 pad_id = 43 [default = -1]; * @return The padId. */ @java.lang.Override public int getPadId() { return padId_; } /** *
       * <pad> (padding)
       * 
* * optional int32 pad_id = 43 [default = -1]; * @param value The padId to set. * @return This builder for chaining. */ public Builder setPadId(int value) { padId_ = value; bitField1_ |= 0x00000004; onChanged(); return this; } /** *
       * <pad> (padding)
       * 
* * optional int32 pad_id = 43 [default = -1]; * @return This builder for chaining. */ public Builder clearPadId() { bitField1_ = (bitField1_ & ~0x00000004); padId_ = -1; onChanged(); return this; } private java.lang.Object unkPiece_ = ""; /** * optional string unk_piece = 45 [default = "<unk>"]; * @return Whether the unkPiece field is set. */ public boolean hasUnkPiece() { return ((bitField1_ & 0x00000008) != 0); } /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The unkPiece. */ public java.lang.String getUnkPiece() { java.lang.Object ref = unkPiece_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { unkPiece_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string unk_piece = 45 [default = "<unk>"]; * @return The bytes for unkPiece. */ public com.google.protobuf.ByteString getUnkPieceBytes() { java.lang.Object ref = unkPiece_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); unkPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string unk_piece = 45 [default = "<unk>"]; * @param value The unkPiece to set. * @return This builder for chaining. */ public Builder setUnkPiece( java.lang.String value) { if (value == null) { throw new NullPointerException(); } unkPiece_ = value; bitField1_ |= 0x00000008; onChanged(); return this; } /** * optional string unk_piece = 45 [default = "<unk>"]; * @return This builder for chaining. */ public Builder clearUnkPiece() { unkPiece_ = getDefaultInstance().getUnkPiece(); bitField1_ = (bitField1_ & ~0x00000008); onChanged(); return this; } /** * optional string unk_piece = 45 [default = "<unk>"]; * @param value The bytes for unkPiece to set. * @return This builder for chaining. */ public Builder setUnkPieceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } unkPiece_ = value; bitField1_ |= 0x00000008; onChanged(); return this; } private java.lang.Object bosPiece_ = ""; /** * optional string bos_piece = 46 [default = "<s>"]; * @return Whether the bosPiece field is set. */ public boolean hasBosPiece() { return ((bitField1_ & 0x00000010) != 0); } /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bosPiece. */ public java.lang.String getBosPiece() { java.lang.Object ref = bosPiece_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { bosPiece_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string bos_piece = 46 [default = "<s>"]; * @return The bytes for bosPiece. */ public com.google.protobuf.ByteString getBosPieceBytes() { java.lang.Object ref = bosPiece_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); bosPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string bos_piece = 46 [default = "<s>"]; * @param value The bosPiece to set. * @return This builder for chaining. */ public Builder setBosPiece( java.lang.String value) { if (value == null) { throw new NullPointerException(); } bosPiece_ = value; bitField1_ |= 0x00000010; onChanged(); return this; } /** * optional string bos_piece = 46 [default = "<s>"]; * @return This builder for chaining. */ public Builder clearBosPiece() { bosPiece_ = getDefaultInstance().getBosPiece(); bitField1_ = (bitField1_ & ~0x00000010); onChanged(); return this; } /** * optional string bos_piece = 46 [default = "<s>"]; * @param value The bytes for bosPiece to set. * @return This builder for chaining. */ public Builder setBosPieceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } bosPiece_ = value; bitField1_ |= 0x00000010; onChanged(); return this; } private java.lang.Object eosPiece_ = ""; /** * optional string eos_piece = 47 [default = "</s>"]; * @return Whether the eosPiece field is set. */ public boolean hasEosPiece() { return ((bitField1_ & 0x00000020) != 0); } /** * optional string eos_piece = 47 [default = "</s>"]; * @return The eosPiece. */ public java.lang.String getEosPiece() { java.lang.Object ref = eosPiece_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { eosPiece_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string eos_piece = 47 [default = "</s>"]; * @return The bytes for eosPiece. */ public com.google.protobuf.ByteString getEosPieceBytes() { java.lang.Object ref = eosPiece_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); eosPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string eos_piece = 47 [default = "</s>"]; * @param value The eosPiece to set. * @return This builder for chaining. */ public Builder setEosPiece( java.lang.String value) { if (value == null) { throw new NullPointerException(); } eosPiece_ = value; bitField1_ |= 0x00000020; onChanged(); return this; } /** * optional string eos_piece = 47 [default = "</s>"]; * @return This builder for chaining. */ public Builder clearEosPiece() { eosPiece_ = getDefaultInstance().getEosPiece(); bitField1_ = (bitField1_ & ~0x00000020); onChanged(); return this; } /** * optional string eos_piece = 47 [default = "</s>"]; * @param value The bytes for eosPiece to set. * @return This builder for chaining. */ public Builder setEosPieceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } eosPiece_ = value; bitField1_ |= 0x00000020; onChanged(); return this; } private java.lang.Object padPiece_ = ""; /** * optional string pad_piece = 48 [default = "<pad>"]; * @return Whether the padPiece field is set. */ public boolean hasPadPiece() { return ((bitField1_ & 0x00000040) != 0); } /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The padPiece. */ public java.lang.String getPadPiece() { java.lang.Object ref = padPiece_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { padPiece_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string pad_piece = 48 [default = "<pad>"]; * @return The bytes for padPiece. */ public com.google.protobuf.ByteString getPadPieceBytes() { java.lang.Object ref = padPiece_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); padPiece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string pad_piece = 48 [default = "<pad>"]; * @param value The padPiece to set. * @return This builder for chaining. */ public Builder setPadPiece( java.lang.String value) { if (value == null) { throw new NullPointerException(); } padPiece_ = value; bitField1_ |= 0x00000040; onChanged(); return this; } /** * optional string pad_piece = 48 [default = "<pad>"]; * @return This builder for chaining. */ public Builder clearPadPiece() { padPiece_ = getDefaultInstance().getPadPiece(); bitField1_ = (bitField1_ & ~0x00000040); onChanged(); return this; } /** * optional string pad_piece = 48 [default = "<pad>"]; * @param value The bytes for padPiece to set. * @return This builder for chaining. */ public Builder setPadPieceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } padPiece_ = value; bitField1_ |= 0x00000040; onChanged(); return this; } private java.lang.Object unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 "); /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return Whether the unkSurface field is set. */ public boolean hasUnkSurface() { return ((bitField1_ & 0x00000080) != 0); } /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The unkSurface. */ public java.lang.String getUnkSurface() { java.lang.Object ref = unkSurface_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { unkSurface_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return The bytes for unkSurface. */ public com.google.protobuf.ByteString getUnkSurfaceBytes() { java.lang.Object ref = unkSurface_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); unkSurface_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @param value The unkSurface to set. * @return This builder for chaining. */ public Builder setUnkSurface( java.lang.String value) { if (value == null) { throw new NullPointerException(); } unkSurface_ = value; bitField1_ |= 0x00000080; onChanged(); return this; } /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @return This builder for chaining. */ public Builder clearUnkSurface() { unkSurface_ = getDefaultInstance().getUnkSurface(); bitField1_ = (bitField1_ & ~0x00000080); onChanged(); return this; } /** *
       * Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       * since this character can be useful both for user and
       * developer. We can easily figure out that <unk> is emitted.
       * 
* * optional string unk_surface = 44 [default = " \342\201\207 "]; * @param value The bytes for unkSurface to set. * @return This builder for chaining. */ public Builder setUnkSurfaceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } unkSurface_ = value; bitField1_ |= 0x00000080; onChanged(); return this; } private boolean trainExtremelyLargeCorpus_ ; /** *
       * Increase bit depth to allow unigram model training on large
       * (>10M sentences) corpora. A Side-effect of enabling this flag
       * is increased memory usage.
       * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return Whether the trainExtremelyLargeCorpus field is set. */ @java.lang.Override public boolean hasTrainExtremelyLargeCorpus() { return ((bitField1_ & 0x00000100) != 0); } /** *
       * Increase bit depth to allow unigram model training on large
       * (>10M sentences) corpora. A Side-effect of enabling this flag
       * is increased memory usage.
       * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return The trainExtremelyLargeCorpus. */ @java.lang.Override public boolean getTrainExtremelyLargeCorpus() { return trainExtremelyLargeCorpus_; } /** *
       * Increase bit depth to allow unigram model training on large
       * (>10M sentences) corpora. A Side-effect of enabling this flag
       * is increased memory usage.
       * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @param value The trainExtremelyLargeCorpus to set. * @return This builder for chaining. */ public Builder setTrainExtremelyLargeCorpus(boolean value) { trainExtremelyLargeCorpus_ = value; bitField1_ |= 0x00000100; onChanged(); return this; } /** *
       * Increase bit depth to allow unigram model training on large
       * (>10M sentences) corpora. A Side-effect of enabling this flag
       * is increased memory usage.
       * 
* * optional bool train_extremely_large_corpus = 49 [default = false]; * @return This builder for chaining. */ public Builder clearTrainExtremelyLargeCorpus() { bitField1_ = (bitField1_ & ~0x00000100); trainExtremelyLargeCorpus_ = false; onChanged(); return this; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.TrainerSpec) } // @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec) private static final sentencepiece.SentencepieceModel.TrainerSpec DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.TrainerSpec(); } public static sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public TrainerSpec parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } public interface NormalizerSpecOrBuilder extends // @@protoc_insertion_point(interface_extends:sentencepiece.NormalizerSpec) com.google.protobuf.GeneratedMessageV3. ExtendableMessageOrBuilder { /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return Whether the name field is set. */ boolean hasName(); /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return The name. */ java.lang.String getName(); /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return The bytes for name. */ com.google.protobuf.ByteString getNameBytes(); /** *
     * Pre-compiled normalization rule created by
     * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
     * Usually this field is set by Builder::GetNormalizerSpec() method.
     * 
* * optional bytes precompiled_charsmap = 2; * @return Whether the precompiledCharsmap field is set. */ boolean hasPrecompiledCharsmap(); /** *
     * Pre-compiled normalization rule created by
     * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
     * Usually this field is set by Builder::GetNormalizerSpec() method.
     * 
* * optional bytes precompiled_charsmap = 2; * @return The precompiledCharsmap. */ com.google.protobuf.ByteString getPrecompiledCharsmap(); /** *
     * Adds dummy whitespace at the beginning of text in order to
     * treat "world" in "world" and "hello world" in the same way.
     * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return Whether the addDummyPrefix field is set. */ boolean hasAddDummyPrefix(); /** *
     * Adds dummy whitespace at the beginning of text in order to
     * treat "world" in "world" and "hello world" in the same way.
     * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return The addDummyPrefix. */ boolean getAddDummyPrefix(); /** *
     * Removes leading, trailing, and duplicate internal whitespace.
     * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return Whether the removeExtraWhitespaces field is set. */ boolean hasRemoveExtraWhitespaces(); /** *
     * Removes leading, trailing, and duplicate internal whitespace.
     * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return The removeExtraWhitespaces. */ boolean getRemoveExtraWhitespaces(); /** *
     * Replaces whitespace with meta symbol.
     * This field must be true to train sentence piece model.
     * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return Whether the escapeWhitespaces field is set. */ boolean hasEscapeWhitespaces(); /** *
     * Replaces whitespace with meta symbol.
     * This field must be true to train sentence piece model.
     * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return The escapeWhitespaces. */ boolean getEscapeWhitespaces(); /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return Whether the normalizationRuleTsv field is set. */ boolean hasNormalizationRuleTsv(); /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return The normalizationRuleTsv. */ java.lang.String getNormalizationRuleTsv(); /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return The bytes for normalizationRuleTsv. */ com.google.protobuf.ByteString getNormalizationRuleTsvBytes(); } /** *
   * NormalizerSpec encodes a various parameters for string normalizaiton
   * 
* * Protobuf type {@code sentencepiece.NormalizerSpec} */ public static final class NormalizerSpec extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage< NormalizerSpec> implements // @@protoc_insertion_point(message_implements:sentencepiece.NormalizerSpec) NormalizerSpecOrBuilder { private static final long serialVersionUID = 0L; // Use NormalizerSpec.newBuilder() to construct. private NormalizerSpec(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) { super(builder); } private NormalizerSpec() { name_ = ""; precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY; addDummyPrefix_ = true; removeExtraWhitespaces_ = true; escapeWhitespaces_ = true; normalizationRuleTsv_ = ""; } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new NormalizerSpec(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.NormalizerSpec.class, sentencepiece.SentencepieceModel.NormalizerSpec.Builder.class); } private int bitField0_; public static final int NAME_FIELD_NUMBER = 1; @SuppressWarnings("serial") private volatile java.lang.Object name_ = ""; /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return Whether the name field is set. */ @java.lang.Override public boolean hasName() { return ((bitField0_ & 0x00000001) != 0); } /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return The name. */ @java.lang.Override public java.lang.String getName() { java.lang.Object ref = name_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { name_ = s; } return s; } } /** *
     * name of normalization rule.
     * 
* * optional string name = 1; * @return The bytes for name. */ @java.lang.Override public com.google.protobuf.ByteString getNameBytes() { java.lang.Object ref = name_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); name_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int PRECOMPILED_CHARSMAP_FIELD_NUMBER = 2; private com.google.protobuf.ByteString precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY; /** *
     * Pre-compiled normalization rule created by
     * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
     * Usually this field is set by Builder::GetNormalizerSpec() method.
     * 
* * optional bytes precompiled_charsmap = 2; * @return Whether the precompiledCharsmap field is set. */ @java.lang.Override public boolean hasPrecompiledCharsmap() { return ((bitField0_ & 0x00000002) != 0); } /** *
     * Pre-compiled normalization rule created by
     * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
     * Usually this field is set by Builder::GetNormalizerSpec() method.
     * 
* * optional bytes precompiled_charsmap = 2; * @return The precompiledCharsmap. */ @java.lang.Override public com.google.protobuf.ByteString getPrecompiledCharsmap() { return precompiledCharsmap_; } public static final int ADD_DUMMY_PREFIX_FIELD_NUMBER = 3; private boolean addDummyPrefix_ = true; /** *
     * Adds dummy whitespace at the beginning of text in order to
     * treat "world" in "world" and "hello world" in the same way.
     * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return Whether the addDummyPrefix field is set. */ @java.lang.Override public boolean hasAddDummyPrefix() { return ((bitField0_ & 0x00000004) != 0); } /** *
     * Adds dummy whitespace at the beginning of text in order to
     * treat "world" in "world" and "hello world" in the same way.
     * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return The addDummyPrefix. */ @java.lang.Override public boolean getAddDummyPrefix() { return addDummyPrefix_; } public static final int REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER = 4; private boolean removeExtraWhitespaces_ = true; /** *
     * Removes leading, trailing, and duplicate internal whitespace.
     * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return Whether the removeExtraWhitespaces field is set. */ @java.lang.Override public boolean hasRemoveExtraWhitespaces() { return ((bitField0_ & 0x00000008) != 0); } /** *
     * Removes leading, trailing, and duplicate internal whitespace.
     * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return The removeExtraWhitespaces. */ @java.lang.Override public boolean getRemoveExtraWhitespaces() { return removeExtraWhitespaces_; } public static final int ESCAPE_WHITESPACES_FIELD_NUMBER = 5; private boolean escapeWhitespaces_ = true; /** *
     * Replaces whitespace with meta symbol.
     * This field must be true to train sentence piece model.
     * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return Whether the escapeWhitespaces field is set. */ @java.lang.Override public boolean hasEscapeWhitespaces() { return ((bitField0_ & 0x00000010) != 0); } /** *
     * Replaces whitespace with meta symbol.
     * This field must be true to train sentence piece model.
     * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return The escapeWhitespaces. */ @java.lang.Override public boolean getEscapeWhitespaces() { return escapeWhitespaces_; } public static final int NORMALIZATION_RULE_TSV_FIELD_NUMBER = 6; @SuppressWarnings("serial") private volatile java.lang.Object normalizationRuleTsv_ = ""; /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return Whether the normalizationRuleTsv field is set. */ @java.lang.Override public boolean hasNormalizationRuleTsv() { return ((bitField0_ & 0x00000020) != 0); } /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return The normalizationRuleTsv. */ @java.lang.Override public java.lang.String getNormalizationRuleTsv() { java.lang.Object ref = normalizationRuleTsv_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { normalizationRuleTsv_ = s; } return s; } } /** *
     * Custom normalization rule file in TSV format.
     * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
     * This field is only used in SentencePieceTrainer::Train() method, which
     * compiles the rule into the binary rule stored in `precompiled_charsmap`.
     * 
* * optional string normalization_rule_tsv = 6; * @return The bytes for normalizationRuleTsv. */ @java.lang.Override public com.google.protobuf.ByteString getNormalizationRuleTsvBytes() { java.lang.Object ref = normalizationRuleTsv_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); normalizationRuleTsv_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; } memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { com.google.protobuf.GeneratedMessageV3 .ExtendableMessage.ExtensionWriter extensionWriter = newExtensionWriter(); if (((bitField0_ & 0x00000001) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 1, name_); } if (((bitField0_ & 0x00000002) != 0)) { output.writeBytes(2, precompiledCharsmap_); } if (((bitField0_ & 0x00000004) != 0)) { output.writeBool(3, addDummyPrefix_); } if (((bitField0_ & 0x00000008) != 0)) { output.writeBool(4, removeExtraWhitespaces_); } if (((bitField0_ & 0x00000010) != 0)) { output.writeBool(5, escapeWhitespaces_); } if (((bitField0_ & 0x00000020) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 6, normalizationRuleTsv_); } extensionWriter.writeUntil(536870912, output); getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; if (((bitField0_ & 0x00000001) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, name_); } if (((bitField0_ & 0x00000002) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBytesSize(2, precompiledCharsmap_); } if (((bitField0_ & 0x00000004) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(3, addDummyPrefix_); } if (((bitField0_ & 0x00000008) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(4, removeExtraWhitespaces_); } if (((bitField0_ & 0x00000010) != 0)) { size += com.google.protobuf.CodedOutputStream .computeBoolSize(5, escapeWhitespaces_); } if (((bitField0_ & 0x00000020) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, normalizationRuleTsv_); } size += extensionsSerializedSize(); size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.NormalizerSpec)) { return super.equals(obj); } sentencepiece.SentencepieceModel.NormalizerSpec other = (sentencepiece.SentencepieceModel.NormalizerSpec) obj; if (hasName() != other.hasName()) return false; if (hasName()) { if (!getName() .equals(other.getName())) return false; } if (hasPrecompiledCharsmap() != other.hasPrecompiledCharsmap()) return false; if (hasPrecompiledCharsmap()) { if (!getPrecompiledCharsmap() .equals(other.getPrecompiledCharsmap())) return false; } if (hasAddDummyPrefix() != other.hasAddDummyPrefix()) return false; if (hasAddDummyPrefix()) { if (getAddDummyPrefix() != other.getAddDummyPrefix()) return false; } if (hasRemoveExtraWhitespaces() != other.hasRemoveExtraWhitespaces()) return false; if (hasRemoveExtraWhitespaces()) { if (getRemoveExtraWhitespaces() != other.getRemoveExtraWhitespaces()) return false; } if (hasEscapeWhitespaces() != other.hasEscapeWhitespaces()) return false; if (hasEscapeWhitespaces()) { if (getEscapeWhitespaces() != other.getEscapeWhitespaces()) return false; } if (hasNormalizationRuleTsv() != other.hasNormalizationRuleTsv()) return false; if (hasNormalizationRuleTsv()) { if (!getNormalizationRuleTsv() .equals(other.getNormalizationRuleTsv())) return false; } if (!getUnknownFields().equals(other.getUnknownFields())) return false; if (!getExtensionFields().equals(other.getExtensionFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (hasName()) { hash = (37 * hash) + NAME_FIELD_NUMBER; hash = (53 * hash) + getName().hashCode(); } if (hasPrecompiledCharsmap()) { hash = (37 * hash) + PRECOMPILED_CHARSMAP_FIELD_NUMBER; hash = (53 * hash) + getPrecompiledCharsmap().hashCode(); } if (hasAddDummyPrefix()) { hash = (37 * hash) + ADD_DUMMY_PREFIX_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getAddDummyPrefix()); } if (hasRemoveExtraWhitespaces()) { hash = (37 * hash) + REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getRemoveExtraWhitespaces()); } if (hasEscapeWhitespaces()) { hash = (37 * hash) + ESCAPE_WHITESPACES_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean( getEscapeWhitespaces()); } if (hasNormalizationRuleTsv()) { hash = (37 * hash) + NORMALIZATION_RULE_TSV_FIELD_NUMBER; hash = (53 * hash) + getNormalizationRuleTsv().hashCode(); } hash = hashFields(hash, getExtensionFields()); hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.NormalizerSpec prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** *
     * NormalizerSpec encodes a various parameters for string normalizaiton
     * 
* * Protobuf type {@code sentencepiece.NormalizerSpec} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder< sentencepiece.SentencepieceModel.NormalizerSpec, Builder> implements // @@protoc_insertion_point(builder_implements:sentencepiece.NormalizerSpec) sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.NormalizerSpec.class, sentencepiece.SentencepieceModel.NormalizerSpec.Builder.class); } // Construct using sentencepiece.SentencepieceModel.NormalizerSpec.newBuilder() private Builder() { } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; name_ = ""; precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY; addDummyPrefix_ = true; removeExtraWhitespaces_ = true; escapeWhitespaces_ = true; normalizationRuleTsv_ = ""; return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec build() { sentencepiece.SentencepieceModel.NormalizerSpec result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec buildPartial() { sentencepiece.SentencepieceModel.NormalizerSpec result = new sentencepiece.SentencepieceModel.NormalizerSpec(this); if (bitField0_ != 0) { buildPartial0(result); } onBuilt(); return result; } private void buildPartial0(sentencepiece.SentencepieceModel.NormalizerSpec result) { int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000001) != 0)) { result.name_ = name_; to_bitField0_ |= 0x00000001; } if (((from_bitField0_ & 0x00000002) != 0)) { result.precompiledCharsmap_ = precompiledCharsmap_; to_bitField0_ |= 0x00000002; } if (((from_bitField0_ & 0x00000004) != 0)) { result.addDummyPrefix_ = addDummyPrefix_; to_bitField0_ |= 0x00000004; } if (((from_bitField0_ & 0x00000008) != 0)) { result.removeExtraWhitespaces_ = removeExtraWhitespaces_; to_bitField0_ |= 0x00000008; } if (((from_bitField0_ & 0x00000010) != 0)) { result.escapeWhitespaces_ = escapeWhitespaces_; to_bitField0_ |= 0x00000010; } if (((from_bitField0_ & 0x00000020) != 0)) { result.normalizationRuleTsv_ = normalizationRuleTsv_; to_bitField0_ |= 0x00000020; } result.bitField0_ |= to_bitField0_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.NormalizerSpec, Type> extension, Type value) { return super.setExtension(extension, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.NormalizerSpec, java.util.List> extension, int index, Type value) { return super.setExtension(extension, index, value); } @java.lang.Override public Builder addExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.NormalizerSpec, java.util.List> extension, Type value) { return super.addExtension(extension, value); } @java.lang.Override public Builder clearExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.NormalizerSpec, T> extension) { return super.clearExtension(extension); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.NormalizerSpec) { return mergeFrom((sentencepiece.SentencepieceModel.NormalizerSpec)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.NormalizerSpec other) { if (other == sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) return this; if (other.hasName()) { name_ = other.name_; bitField0_ |= 0x00000001; onChanged(); } if (other.hasPrecompiledCharsmap()) { setPrecompiledCharsmap(other.getPrecompiledCharsmap()); } if (other.hasAddDummyPrefix()) { setAddDummyPrefix(other.getAddDummyPrefix()); } if (other.hasRemoveExtraWhitespaces()) { setRemoveExtraWhitespaces(other.getRemoveExtraWhitespaces()); } if (other.hasEscapeWhitespaces()) { setEscapeWhitespaces(other.getEscapeWhitespaces()); } if (other.hasNormalizationRuleTsv()) { normalizationRuleTsv_ = other.normalizationRuleTsv_; bitField0_ |= 0x00000020; onChanged(); } this.mergeExtensionFields(other); this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { if (!extensionsAreInitialized()) { return false; } return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { name_ = input.readBytes(); bitField0_ |= 0x00000001; break; } // case 10 case 18: { precompiledCharsmap_ = input.readBytes(); bitField0_ |= 0x00000002; break; } // case 18 case 24: { addDummyPrefix_ = input.readBool(); bitField0_ |= 0x00000004; break; } // case 24 case 32: { removeExtraWhitespaces_ = input.readBool(); bitField0_ |= 0x00000008; break; } // case 32 case 40: { escapeWhitespaces_ = input.readBool(); bitField0_ |= 0x00000010; break; } // case 40 case 50: { normalizationRuleTsv_ = input.readBytes(); bitField0_ |= 0x00000020; break; } // case 50 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private java.lang.Object name_ = ""; /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @return Whether the name field is set. */ public boolean hasName() { return ((bitField0_ & 0x00000001) != 0); } /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @return The name. */ public java.lang.String getName() { java.lang.Object ref = name_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { name_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @return The bytes for name. */ public com.google.protobuf.ByteString getNameBytes() { java.lang.Object ref = name_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); name_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @param value The name to set. * @return This builder for chaining. */ public Builder setName( java.lang.String value) { if (value == null) { throw new NullPointerException(); } name_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @return This builder for chaining. */ public Builder clearName() { name_ = getDefaultInstance().getName(); bitField0_ = (bitField0_ & ~0x00000001); onChanged(); return this; } /** *
       * name of normalization rule.
       * 
* * optional string name = 1; * @param value The bytes for name to set. * @return This builder for chaining. */ public Builder setNameBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } name_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } private com.google.protobuf.ByteString precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY; /** *
       * Pre-compiled normalization rule created by
       * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       * Usually this field is set by Builder::GetNormalizerSpec() method.
       * 
* * optional bytes precompiled_charsmap = 2; * @return Whether the precompiledCharsmap field is set. */ @java.lang.Override public boolean hasPrecompiledCharsmap() { return ((bitField0_ & 0x00000002) != 0); } /** *
       * Pre-compiled normalization rule created by
       * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       * Usually this field is set by Builder::GetNormalizerSpec() method.
       * 
* * optional bytes precompiled_charsmap = 2; * @return The precompiledCharsmap. */ @java.lang.Override public com.google.protobuf.ByteString getPrecompiledCharsmap() { return precompiledCharsmap_; } /** *
       * Pre-compiled normalization rule created by
       * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       * Usually this field is set by Builder::GetNormalizerSpec() method.
       * 
* * optional bytes precompiled_charsmap = 2; * @param value The precompiledCharsmap to set. * @return This builder for chaining. */ public Builder setPrecompiledCharsmap(com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } precompiledCharsmap_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } /** *
       * Pre-compiled normalization rule created by
       * Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       * Usually this field is set by Builder::GetNormalizerSpec() method.
       * 
* * optional bytes precompiled_charsmap = 2; * @return This builder for chaining. */ public Builder clearPrecompiledCharsmap() { bitField0_ = (bitField0_ & ~0x00000002); precompiledCharsmap_ = getDefaultInstance().getPrecompiledCharsmap(); onChanged(); return this; } private boolean addDummyPrefix_ = true; /** *
       * Adds dummy whitespace at the beginning of text in order to
       * treat "world" in "world" and "hello world" in the same way.
       * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return Whether the addDummyPrefix field is set. */ @java.lang.Override public boolean hasAddDummyPrefix() { return ((bitField0_ & 0x00000004) != 0); } /** *
       * Adds dummy whitespace at the beginning of text in order to
       * treat "world" in "world" and "hello world" in the same way.
       * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return The addDummyPrefix. */ @java.lang.Override public boolean getAddDummyPrefix() { return addDummyPrefix_; } /** *
       * Adds dummy whitespace at the beginning of text in order to
       * treat "world" in "world" and "hello world" in the same way.
       * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @param value The addDummyPrefix to set. * @return This builder for chaining. */ public Builder setAddDummyPrefix(boolean value) { addDummyPrefix_ = value; bitField0_ |= 0x00000004; onChanged(); return this; } /** *
       * Adds dummy whitespace at the beginning of text in order to
       * treat "world" in "world" and "hello world" in the same way.
       * 
* * optional bool add_dummy_prefix = 3 [default = true]; * @return This builder for chaining. */ public Builder clearAddDummyPrefix() { bitField0_ = (bitField0_ & ~0x00000004); addDummyPrefix_ = true; onChanged(); return this; } private boolean removeExtraWhitespaces_ = true; /** *
       * Removes leading, trailing, and duplicate internal whitespace.
       * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return Whether the removeExtraWhitespaces field is set. */ @java.lang.Override public boolean hasRemoveExtraWhitespaces() { return ((bitField0_ & 0x00000008) != 0); } /** *
       * Removes leading, trailing, and duplicate internal whitespace.
       * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return The removeExtraWhitespaces. */ @java.lang.Override public boolean getRemoveExtraWhitespaces() { return removeExtraWhitespaces_; } /** *
       * Removes leading, trailing, and duplicate internal whitespace.
       * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @param value The removeExtraWhitespaces to set. * @return This builder for chaining. */ public Builder setRemoveExtraWhitespaces(boolean value) { removeExtraWhitespaces_ = value; bitField0_ |= 0x00000008; onChanged(); return this; } /** *
       * Removes leading, trailing, and duplicate internal whitespace.
       * 
* * optional bool remove_extra_whitespaces = 4 [default = true]; * @return This builder for chaining. */ public Builder clearRemoveExtraWhitespaces() { bitField0_ = (bitField0_ & ~0x00000008); removeExtraWhitespaces_ = true; onChanged(); return this; } private boolean escapeWhitespaces_ = true; /** *
       * Replaces whitespace with meta symbol.
       * This field must be true to train sentence piece model.
       * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return Whether the escapeWhitespaces field is set. */ @java.lang.Override public boolean hasEscapeWhitespaces() { return ((bitField0_ & 0x00000010) != 0); } /** *
       * Replaces whitespace with meta symbol.
       * This field must be true to train sentence piece model.
       * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return The escapeWhitespaces. */ @java.lang.Override public boolean getEscapeWhitespaces() { return escapeWhitespaces_; } /** *
       * Replaces whitespace with meta symbol.
       * This field must be true to train sentence piece model.
       * 
* * optional bool escape_whitespaces = 5 [default = true]; * @param value The escapeWhitespaces to set. * @return This builder for chaining. */ public Builder setEscapeWhitespaces(boolean value) { escapeWhitespaces_ = value; bitField0_ |= 0x00000010; onChanged(); return this; } /** *
       * Replaces whitespace with meta symbol.
       * This field must be true to train sentence piece model.
       * 
* * optional bool escape_whitespaces = 5 [default = true]; * @return This builder for chaining. */ public Builder clearEscapeWhitespaces() { bitField0_ = (bitField0_ & ~0x00000010); escapeWhitespaces_ = true; onChanged(); return this; } private java.lang.Object normalizationRuleTsv_ = ""; /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @return Whether the normalizationRuleTsv field is set. */ public boolean hasNormalizationRuleTsv() { return ((bitField0_ & 0x00000020) != 0); } /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @return The normalizationRuleTsv. */ public java.lang.String getNormalizationRuleTsv() { java.lang.Object ref = normalizationRuleTsv_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { normalizationRuleTsv_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @return The bytes for normalizationRuleTsv. */ public com.google.protobuf.ByteString getNormalizationRuleTsvBytes() { java.lang.Object ref = normalizationRuleTsv_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); normalizationRuleTsv_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @param value The normalizationRuleTsv to set. * @return This builder for chaining. */ public Builder setNormalizationRuleTsv( java.lang.String value) { if (value == null) { throw new NullPointerException(); } normalizationRuleTsv_ = value; bitField0_ |= 0x00000020; onChanged(); return this; } /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @return This builder for chaining. */ public Builder clearNormalizationRuleTsv() { normalizationRuleTsv_ = getDefaultInstance().getNormalizationRuleTsv(); bitField0_ = (bitField0_ & ~0x00000020); onChanged(); return this; } /** *
       * Custom normalization rule file in TSV format.
       * https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       * This field is only used in SentencePieceTrainer::Train() method, which
       * compiles the rule into the binary rule stored in `precompiled_charsmap`.
       * 
* * optional string normalization_rule_tsv = 6; * @param value The bytes for normalizationRuleTsv to set. * @return This builder for chaining. */ public Builder setNormalizationRuleTsvBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } normalizationRuleTsv_ = value; bitField0_ |= 0x00000020; onChanged(); return this; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.NormalizerSpec) } // @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec) private static final sentencepiece.SentencepieceModel.NormalizerSpec DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.NormalizerSpec(); } public static sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public NormalizerSpec parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } public interface SelfTestDataOrBuilder extends // @@protoc_insertion_point(interface_extends:sentencepiece.SelfTestData) com.google.protobuf.GeneratedMessageV3. ExtendableMessageOrBuilder { /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ java.util.List getSamplesList(); /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index); /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ int getSamplesCount(); /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ java.util.List getSamplesOrBuilderList(); /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder( int index); } /** *
   * Proto to store samples for self-testing.
   * 
* * Protobuf type {@code sentencepiece.SelfTestData} */ public static final class SelfTestData extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage< SelfTestData> implements // @@protoc_insertion_point(message_implements:sentencepiece.SelfTestData) SelfTestDataOrBuilder { private static final long serialVersionUID = 0L; // Use SelfTestData.newBuilder() to construct. private SelfTestData(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) { super(builder); } private SelfTestData() { samples_ = java.util.Collections.emptyList(); } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new SelfTestData(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.SelfTestData.class, sentencepiece.SentencepieceModel.SelfTestData.Builder.class); } public interface SampleOrBuilder extends // @@protoc_insertion_point(interface_extends:sentencepiece.SelfTestData.Sample) com.google.protobuf.MessageOrBuilder { /** * optional string input = 1; * @return Whether the input field is set. */ boolean hasInput(); /** * optional string input = 1; * @return The input. */ java.lang.String getInput(); /** * optional string input = 1; * @return The bytes for input. */ com.google.protobuf.ByteString getInputBytes(); /** * optional string expected = 2; * @return Whether the expected field is set. */ boolean hasExpected(); /** * optional string expected = 2; * @return The expected. */ java.lang.String getExpected(); /** * optional string expected = 2; * @return The bytes for expected. */ com.google.protobuf.ByteString getExpectedBytes(); } /** * Protobuf type {@code sentencepiece.SelfTestData.Sample} */ public static final class Sample extends com.google.protobuf.GeneratedMessageV3 implements // @@protoc_insertion_point(message_implements:sentencepiece.SelfTestData.Sample) SampleOrBuilder { private static final long serialVersionUID = 0L; // Use Sample.newBuilder() to construct. private Sample(com.google.protobuf.GeneratedMessageV3.Builder builder) { super(builder); } private Sample() { input_ = ""; expected_ = ""; } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new Sample(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.SelfTestData.Sample.class, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder.class); } private int bitField0_; public static final int INPUT_FIELD_NUMBER = 1; @SuppressWarnings("serial") private volatile java.lang.Object input_ = ""; /** * optional string input = 1; * @return Whether the input field is set. */ @java.lang.Override public boolean hasInput() { return ((bitField0_ & 0x00000001) != 0); } /** * optional string input = 1; * @return The input. */ @java.lang.Override public java.lang.String getInput() { java.lang.Object ref = input_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { input_ = s; } return s; } } /** * optional string input = 1; * @return The bytes for input. */ @java.lang.Override public com.google.protobuf.ByteString getInputBytes() { java.lang.Object ref = input_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); input_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int EXPECTED_FIELD_NUMBER = 2; @SuppressWarnings("serial") private volatile java.lang.Object expected_ = ""; /** * optional string expected = 2; * @return Whether the expected field is set. */ @java.lang.Override public boolean hasExpected() { return ((bitField0_ & 0x00000002) != 0); } /** * optional string expected = 2; * @return The expected. */ @java.lang.Override public java.lang.String getExpected() { java.lang.Object ref = expected_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { expected_ = s; } return s; } } /** * optional string expected = 2; * @return The bytes for expected. */ @java.lang.Override public com.google.protobuf.ByteString getExpectedBytes() { java.lang.Object ref = expected_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); expected_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { if (((bitField0_ & 0x00000001) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 1, input_); } if (((bitField0_ & 0x00000002) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 2, expected_); } getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; if (((bitField0_ & 0x00000001) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, input_); } if (((bitField0_ & 0x00000002) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, expected_); } size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.SelfTestData.Sample)) { return super.equals(obj); } sentencepiece.SentencepieceModel.SelfTestData.Sample other = (sentencepiece.SentencepieceModel.SelfTestData.Sample) obj; if (hasInput() != other.hasInput()) return false; if (hasInput()) { if (!getInput() .equals(other.getInput())) return false; } if (hasExpected() != other.hasExpected()) return false; if (hasExpected()) { if (!getExpected() .equals(other.getExpected())) return false; } if (!getUnknownFields().equals(other.getUnknownFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (hasInput()) { hash = (37 * hash) + INPUT_FIELD_NUMBER; hash = (53 * hash) + getInput().hashCode(); } if (hasExpected()) { hash = (37 * hash) + EXPECTED_FIELD_NUMBER; hash = (53 * hash) + getExpected().hashCode(); } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.SelfTestData.Sample prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** * Protobuf type {@code sentencepiece.SelfTestData.Sample} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.Builder implements // @@protoc_insertion_point(builder_implements:sentencepiece.SelfTestData.Sample) sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.SelfTestData.Sample.class, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder.class); } // Construct using sentencepiece.SentencepieceModel.SelfTestData.Sample.newBuilder() private Builder() { } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; input_ = ""; expected_ = ""; return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.Sample build() { sentencepiece.SentencepieceModel.SelfTestData.Sample result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.Sample buildPartial() { sentencepiece.SentencepieceModel.SelfTestData.Sample result = new sentencepiece.SentencepieceModel.SelfTestData.Sample(this); if (bitField0_ != 0) { buildPartial0(result); } onBuilt(); return result; } private void buildPartial0(sentencepiece.SentencepieceModel.SelfTestData.Sample result) { int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000001) != 0)) { result.input_ = input_; to_bitField0_ |= 0x00000001; } if (((from_bitField0_ & 0x00000002) != 0)) { result.expected_ = expected_; to_bitField0_ |= 0x00000002; } result.bitField0_ |= to_bitField0_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.SelfTestData.Sample) { return mergeFrom((sentencepiece.SentencepieceModel.SelfTestData.Sample)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.SelfTestData.Sample other) { if (other == sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance()) return this; if (other.hasInput()) { input_ = other.input_; bitField0_ |= 0x00000001; onChanged(); } if (other.hasExpected()) { expected_ = other.expected_; bitField0_ |= 0x00000002; onChanged(); } this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { input_ = input.readBytes(); bitField0_ |= 0x00000001; break; } // case 10 case 18: { expected_ = input.readBytes(); bitField0_ |= 0x00000002; break; } // case 18 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private java.lang.Object input_ = ""; /** * optional string input = 1; * @return Whether the input field is set. */ public boolean hasInput() { return ((bitField0_ & 0x00000001) != 0); } /** * optional string input = 1; * @return The input. */ public java.lang.String getInput() { java.lang.Object ref = input_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { input_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string input = 1; * @return The bytes for input. */ public com.google.protobuf.ByteString getInputBytes() { java.lang.Object ref = input_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); input_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string input = 1; * @param value The input to set. * @return This builder for chaining. */ public Builder setInput( java.lang.String value) { if (value == null) { throw new NullPointerException(); } input_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } /** * optional string input = 1; * @return This builder for chaining. */ public Builder clearInput() { input_ = getDefaultInstance().getInput(); bitField0_ = (bitField0_ & ~0x00000001); onChanged(); return this; } /** * optional string input = 1; * @param value The bytes for input to set. * @return This builder for chaining. */ public Builder setInputBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } input_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } private java.lang.Object expected_ = ""; /** * optional string expected = 2; * @return Whether the expected field is set. */ public boolean hasExpected() { return ((bitField0_ & 0x00000002) != 0); } /** * optional string expected = 2; * @return The expected. */ public java.lang.String getExpected() { java.lang.Object ref = expected_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { expected_ = s; } return s; } else { return (java.lang.String) ref; } } /** * optional string expected = 2; * @return The bytes for expected. */ public com.google.protobuf.ByteString getExpectedBytes() { java.lang.Object ref = expected_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); expected_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** * optional string expected = 2; * @param value The expected to set. * @return This builder for chaining. */ public Builder setExpected( java.lang.String value) { if (value == null) { throw new NullPointerException(); } expected_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } /** * optional string expected = 2; * @return This builder for chaining. */ public Builder clearExpected() { expected_ = getDefaultInstance().getExpected(); bitField0_ = (bitField0_ & ~0x00000002); onChanged(); return this; } /** * optional string expected = 2; * @param value The bytes for expected to set. * @return This builder for chaining. */ public Builder setExpectedBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } expected_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.SelfTestData.Sample) } // @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample) private static final sentencepiece.SentencepieceModel.SelfTestData.Sample DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.SelfTestData.Sample(); } public static sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public Sample parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } public static final int SAMPLES_FIELD_NUMBER = 1; @SuppressWarnings("serial") private java.util.List samples_; /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ @java.lang.Override public java.util.List getSamplesList() { return samples_; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ @java.lang.Override public java.util.List getSamplesOrBuilderList() { return samples_; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ @java.lang.Override public int getSamplesCount() { return samples_.size(); } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index) { return samples_.get(index); } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder( int index) { return samples_.get(index); } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; } memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { com.google.protobuf.GeneratedMessageV3 .ExtendableMessage.ExtensionWriter extensionWriter = newExtensionWriter(); for (int i = 0; i < samples_.size(); i++) { output.writeMessage(1, samples_.get(i)); } extensionWriter.writeUntil(536870912, output); getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; for (int i = 0; i < samples_.size(); i++) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(1, samples_.get(i)); } size += extensionsSerializedSize(); size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.SelfTestData)) { return super.equals(obj); } sentencepiece.SentencepieceModel.SelfTestData other = (sentencepiece.SentencepieceModel.SelfTestData) obj; if (!getSamplesList() .equals(other.getSamplesList())) return false; if (!getUnknownFields().equals(other.getUnknownFields())) return false; if (!getExtensionFields().equals(other.getExtensionFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (getSamplesCount() > 0) { hash = (37 * hash) + SAMPLES_FIELD_NUMBER; hash = (53 * hash) + getSamplesList().hashCode(); } hash = hashFields(hash, getExtensionFields()); hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.SelfTestData parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.SelfTestData prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** *
     * Proto to store samples for self-testing.
     * 
* * Protobuf type {@code sentencepiece.SelfTestData} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder< sentencepiece.SentencepieceModel.SelfTestData, Builder> implements // @@protoc_insertion_point(builder_implements:sentencepiece.SelfTestData) sentencepiece.SentencepieceModel.SelfTestDataOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.SelfTestData.class, sentencepiece.SentencepieceModel.SelfTestData.Builder.class); } // Construct using sentencepiece.SentencepieceModel.SelfTestData.newBuilder() private Builder() { } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; if (samplesBuilder_ == null) { samples_ = java.util.Collections.emptyList(); } else { samples_ = null; samplesBuilder_.clear(); } bitField0_ = (bitField0_ & ~0x00000001); return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData build() { sentencepiece.SentencepieceModel.SelfTestData result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData buildPartial() { sentencepiece.SentencepieceModel.SelfTestData result = new sentencepiece.SentencepieceModel.SelfTestData(this); buildPartialRepeatedFields(result); if (bitField0_ != 0) { buildPartial0(result); } onBuilt(); return result; } private void buildPartialRepeatedFields(sentencepiece.SentencepieceModel.SelfTestData result) { if (samplesBuilder_ == null) { if (((bitField0_ & 0x00000001) != 0)) { samples_ = java.util.Collections.unmodifiableList(samples_); bitField0_ = (bitField0_ & ~0x00000001); } result.samples_ = samples_; } else { result.samples_ = samplesBuilder_.build(); } } private void buildPartial0(sentencepiece.SentencepieceModel.SelfTestData result) { int from_bitField0_ = bitField0_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.SelfTestData, Type> extension, Type value) { return super.setExtension(extension, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.SelfTestData, java.util.List> extension, int index, Type value) { return super.setExtension(extension, index, value); } @java.lang.Override public Builder addExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.SelfTestData, java.util.List> extension, Type value) { return super.addExtension(extension, value); } @java.lang.Override public Builder clearExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.SelfTestData, T> extension) { return super.clearExtension(extension); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.SelfTestData) { return mergeFrom((sentencepiece.SentencepieceModel.SelfTestData)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.SelfTestData other) { if (other == sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance()) return this; if (samplesBuilder_ == null) { if (!other.samples_.isEmpty()) { if (samples_.isEmpty()) { samples_ = other.samples_; bitField0_ = (bitField0_ & ~0x00000001); } else { ensureSamplesIsMutable(); samples_.addAll(other.samples_); } onChanged(); } } else { if (!other.samples_.isEmpty()) { if (samplesBuilder_.isEmpty()) { samplesBuilder_.dispose(); samplesBuilder_ = null; samples_ = other.samples_; bitField0_ = (bitField0_ & ~0x00000001); samplesBuilder_ = com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders ? getSamplesFieldBuilder() : null; } else { samplesBuilder_.addAllMessages(other.samples_); } } } this.mergeExtensionFields(other); this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { if (!extensionsAreInitialized()) { return false; } return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { sentencepiece.SentencepieceModel.SelfTestData.Sample m = input.readMessage( sentencepiece.SentencepieceModel.SelfTestData.Sample.PARSER, extensionRegistry); if (samplesBuilder_ == null) { ensureSamplesIsMutable(); samples_.add(m); } else { samplesBuilder_.addMessage(m); } break; } // case 10 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private java.util.List samples_ = java.util.Collections.emptyList(); private void ensureSamplesIsMutable() { if (!((bitField0_ & 0x00000001) != 0)) { samples_ = new java.util.ArrayList(samples_); bitField0_ |= 0x00000001; } } private com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder> samplesBuilder_; /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public java.util.List getSamplesList() { if (samplesBuilder_ == null) { return java.util.Collections.unmodifiableList(samples_); } else { return samplesBuilder_.getMessageList(); } } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public int getSamplesCount() { if (samplesBuilder_ == null) { return samples_.size(); } else { return samplesBuilder_.getCount(); } } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index) { if (samplesBuilder_ == null) { return samples_.get(index); } else { return samplesBuilder_.getMessage(index); } } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder setSamples( int index, sentencepiece.SentencepieceModel.SelfTestData.Sample value) { if (samplesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensureSamplesIsMutable(); samples_.set(index, value); onChanged(); } else { samplesBuilder_.setMessage(index, value); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder setSamples( int index, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) { if (samplesBuilder_ == null) { ensureSamplesIsMutable(); samples_.set(index, builderForValue.build()); onChanged(); } else { samplesBuilder_.setMessage(index, builderForValue.build()); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder addSamples(sentencepiece.SentencepieceModel.SelfTestData.Sample value) { if (samplesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensureSamplesIsMutable(); samples_.add(value); onChanged(); } else { samplesBuilder_.addMessage(value); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder addSamples( int index, sentencepiece.SentencepieceModel.SelfTestData.Sample value) { if (samplesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensureSamplesIsMutable(); samples_.add(index, value); onChanged(); } else { samplesBuilder_.addMessage(index, value); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder addSamples( sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) { if (samplesBuilder_ == null) { ensureSamplesIsMutable(); samples_.add(builderForValue.build()); onChanged(); } else { samplesBuilder_.addMessage(builderForValue.build()); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder addSamples( int index, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) { if (samplesBuilder_ == null) { ensureSamplesIsMutable(); samples_.add(index, builderForValue.build()); onChanged(); } else { samplesBuilder_.addMessage(index, builderForValue.build()); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder addAllSamples( java.lang.Iterable values) { if (samplesBuilder_ == null) { ensureSamplesIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, samples_); onChanged(); } else { samplesBuilder_.addAllMessages(values); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder clearSamples() { if (samplesBuilder_ == null) { samples_ = java.util.Collections.emptyList(); bitField0_ = (bitField0_ & ~0x00000001); onChanged(); } else { samplesBuilder_.clear(); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public Builder removeSamples(int index) { if (samplesBuilder_ == null) { ensureSamplesIsMutable(); samples_.remove(index); onChanged(); } else { samplesBuilder_.remove(index); } return this; } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder getSamplesBuilder( int index) { return getSamplesFieldBuilder().getBuilder(index); } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder( int index) { if (samplesBuilder_ == null) { return samples_.get(index); } else { return samplesBuilder_.getMessageOrBuilder(index); } } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public java.util.List getSamplesOrBuilderList() { if (samplesBuilder_ != null) { return samplesBuilder_.getMessageOrBuilderList(); } else { return java.util.Collections.unmodifiableList(samples_); } } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder addSamplesBuilder() { return getSamplesFieldBuilder().addBuilder( sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance()); } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder addSamplesBuilder( int index) { return getSamplesFieldBuilder().addBuilder( index, sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance()); } /** * repeated .sentencepiece.SelfTestData.Sample samples = 1; */ public java.util.List getSamplesBuilderList() { return getSamplesFieldBuilder().getBuilderList(); } private com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder> getSamplesFieldBuilder() { if (samplesBuilder_ == null) { samplesBuilder_ = new com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>( samples_, ((bitField0_ & 0x00000001) != 0), getParentForChildren(), isClean()); samples_ = null; } return samplesBuilder_; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.SelfTestData) } // @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData) private static final sentencepiece.SentencepieceModel.SelfTestData DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.SelfTestData(); } public static sentencepiece.SentencepieceModel.SelfTestData getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public SelfTestData parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } public interface ModelProtoOrBuilder extends // @@protoc_insertion_point(interface_extends:sentencepiece.ModelProto) com.google.protobuf.GeneratedMessageV3. ExtendableMessageOrBuilder { /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ java.util.List getPiecesList(); /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index); /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ int getPiecesCount(); /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ java.util.List getPiecesOrBuilderList(); /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder( int index); /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return Whether the trainerSpec field is set. */ boolean hasTrainerSpec(); /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return The trainerSpec. */ sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec(); /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder(); /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return Whether the normalizerSpec field is set. */ boolean hasNormalizerSpec(); /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return The normalizerSpec. */ sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec(); /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder(); /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return Whether the selfTestData field is set. */ boolean hasSelfTestData(); /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return The selfTestData. */ sentencepiece.SentencepieceModel.SelfTestData getSelfTestData(); /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder(); /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return Whether the denormalizerSpec field is set. */ boolean hasDenormalizerSpec(); /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return The denormalizerSpec. */ sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec(); /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder(); } /** *
   * ModelProto stores model parameters.
   * SentencePieceProcessor is supposed to be self-contained.
   * All settings/parameters which may change the behavior must be encoded
   * in ModelProto.
   * 
* * Protobuf type {@code sentencepiece.ModelProto} */ public static final class ModelProto extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage< ModelProto> implements // @@protoc_insertion_point(message_implements:sentencepiece.ModelProto) ModelProtoOrBuilder { private static final long serialVersionUID = 0L; // Use ModelProto.newBuilder() to construct. private ModelProto(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) { super(builder); } private ModelProto() { pieces_ = java.util.Collections.emptyList(); } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new ModelProto(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.ModelProto.class, sentencepiece.SentencepieceModel.ModelProto.Builder.class); } public interface SentencePieceOrBuilder extends // @@protoc_insertion_point(interface_extends:sentencepiece.ModelProto.SentencePiece) com.google.protobuf.GeneratedMessageV3. ExtendableMessageOrBuilder { /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return Whether the piece field is set. */ boolean hasPiece(); /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return The piece. */ java.lang.String getPiece(); /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return The bytes for piece. */ com.google.protobuf.ByteString getPieceBytes(); /** * optional float score = 2; * @return Whether the score field is set. */ boolean hasScore(); /** * optional float score = 2; * @return The score. */ float getScore(); /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return Whether the type field is set. */ boolean hasType(); /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return The type. */ sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType(); } /** * Protobuf type {@code sentencepiece.ModelProto.SentencePiece} */ public static final class SentencePiece extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage< SentencePiece> implements // @@protoc_insertion_point(message_implements:sentencepiece.ModelProto.SentencePiece) SentencePieceOrBuilder { private static final long serialVersionUID = 0L; // Use SentencePiece.newBuilder() to construct. private SentencePiece(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) { super(builder); } private SentencePiece() { piece_ = ""; type_ = 1; } @java.lang.Override @SuppressWarnings({"unused"}) protected java.lang.Object newInstance( UnusedPrivateParameter unused) { return new SentencePiece(); } public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.ModelProto.SentencePiece.class, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder.class); } /** * Protobuf enum {@code sentencepiece.ModelProto.SentencePiece.Type} */ public enum Type implements com.google.protobuf.ProtocolMessageEnum { /** *
         * normal symbol
         * 
* * NORMAL = 1; */ NORMAL(1), /** *
         * unknown symbol. only <unk> for now.
         * 
* * UNKNOWN = 2; */ UNKNOWN(2), /** *
         * control symbols. </s>, <s>, <2ja> etc.
         * 
* * CONTROL = 3; */ CONTROL(3), /** *
         * user defined symbols.
         * 
* * USER_DEFINED = 4; */ USER_DEFINED(4), /** *
         * Typical usage of USER_DEFINED symbol
         * is placeholder.
         * 
* * BYTE = 6; */ BYTE(6), /** *
         * this piece is not used.
         * 
* * UNUSED = 5; */ UNUSED(5), ; /** *
         * normal symbol
         * 
* * NORMAL = 1; */ public static final int NORMAL_VALUE = 1; /** *
         * unknown symbol. only <unk> for now.
         * 
* * UNKNOWN = 2; */ public static final int UNKNOWN_VALUE = 2; /** *
         * control symbols. </s>, <s>, <2ja> etc.
         * 
* * CONTROL = 3; */ public static final int CONTROL_VALUE = 3; /** *
         * user defined symbols.
         * 
* * USER_DEFINED = 4; */ public static final int USER_DEFINED_VALUE = 4; /** *
         * Typical usage of USER_DEFINED symbol
         * is placeholder.
         * 
* * BYTE = 6; */ public static final int BYTE_VALUE = 6; /** *
         * this piece is not used.
         * 
* * UNUSED = 5; */ public static final int UNUSED_VALUE = 5; public final int getNumber() { return value; } /** * @param value The numeric wire value of the corresponding enum entry. * @return The enum associated with the given numeric wire value. * @deprecated Use {@link #forNumber(int)} instead. */ @java.lang.Deprecated public static Type valueOf(int value) { return forNumber(value); } /** * @param value The numeric wire value of the corresponding enum entry. * @return The enum associated with the given numeric wire value. */ public static Type forNumber(int value) { switch (value) { case 1: return NORMAL; case 2: return UNKNOWN; case 3: return CONTROL; case 4: return USER_DEFINED; case 6: return BYTE; case 5: return UNUSED; default: return null; } } public static com.google.protobuf.Internal.EnumLiteMap internalGetValueMap() { return internalValueMap; } private static final com.google.protobuf.Internal.EnumLiteMap< Type> internalValueMap = new com.google.protobuf.Internal.EnumLiteMap() { public Type findValueByNumber(int number) { return Type.forNumber(number); } }; public final com.google.protobuf.Descriptors.EnumValueDescriptor getValueDescriptor() { return getDescriptor().getValues().get(ordinal()); } public final com.google.protobuf.Descriptors.EnumDescriptor getDescriptorForType() { return getDescriptor(); } public static final com.google.protobuf.Descriptors.EnumDescriptor getDescriptor() { return sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDescriptor().getEnumTypes().get(0); } private static final Type[] VALUES = values(); public static Type valueOf( com.google.protobuf.Descriptors.EnumValueDescriptor desc) { if (desc.getType() != getDescriptor()) { throw new java.lang.IllegalArgumentException( "EnumValueDescriptor is not for this type."); } return VALUES[desc.getIndex()]; } private final int value; private Type(int value) { this.value = value; } // @@protoc_insertion_point(enum_scope:sentencepiece.ModelProto.SentencePiece.Type) } private int bitField0_; public static final int PIECE_FIELD_NUMBER = 1; @SuppressWarnings("serial") private volatile java.lang.Object piece_ = ""; /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return Whether the piece field is set. */ @java.lang.Override public boolean hasPiece() { return ((bitField0_ & 0x00000001) != 0); } /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return The piece. */ @java.lang.Override public java.lang.String getPiece() { java.lang.Object ref = piece_; if (ref instanceof java.lang.String) { return (java.lang.String) ref; } else { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { piece_ = s; } return s; } } /** *
       * piece must not be empty.
       * 
* * optional string piece = 1; * @return The bytes for piece. */ @java.lang.Override public com.google.protobuf.ByteString getPieceBytes() { java.lang.Object ref = piece_; if (ref instanceof java.lang.String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); piece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } public static final int SCORE_FIELD_NUMBER = 2; private float score_ = 0F; /** * optional float score = 2; * @return Whether the score field is set. */ @java.lang.Override public boolean hasScore() { return ((bitField0_ & 0x00000002) != 0); } /** * optional float score = 2; * @return The score. */ @java.lang.Override public float getScore() { return score_; } public static final int TYPE_FIELD_NUMBER = 3; private int type_ = 1; /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return Whether the type field is set. */ @java.lang.Override public boolean hasType() { return ((bitField0_ & 0x00000004) != 0); } /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return The type. */ @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType() { sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type result = sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(type_); return result == null ? sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.NORMAL : result; } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; } memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { com.google.protobuf.GeneratedMessageV3 .ExtendableMessage.ExtensionWriter extensionWriter = newExtensionWriter(); if (((bitField0_ & 0x00000001) != 0)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 1, piece_); } if (((bitField0_ & 0x00000002) != 0)) { output.writeFloat(2, score_); } if (((bitField0_ & 0x00000004) != 0)) { output.writeEnum(3, type_); } extensionWriter.writeUntil(536870912, output); getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; if (((bitField0_ & 0x00000001) != 0)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, piece_); } if (((bitField0_ & 0x00000002) != 0)) { size += com.google.protobuf.CodedOutputStream .computeFloatSize(2, score_); } if (((bitField0_ & 0x00000004) != 0)) { size += com.google.protobuf.CodedOutputStream .computeEnumSize(3, type_); } size += extensionsSerializedSize(); size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.ModelProto.SentencePiece)) { return super.equals(obj); } sentencepiece.SentencepieceModel.ModelProto.SentencePiece other = (sentencepiece.SentencepieceModel.ModelProto.SentencePiece) obj; if (hasPiece() != other.hasPiece()) return false; if (hasPiece()) { if (!getPiece() .equals(other.getPiece())) return false; } if (hasScore() != other.hasScore()) return false; if (hasScore()) { if (java.lang.Float.floatToIntBits(getScore()) != java.lang.Float.floatToIntBits( other.getScore())) return false; } if (hasType() != other.hasType()) return false; if (hasType()) { if (type_ != other.type_) return false; } if (!getUnknownFields().equals(other.getUnknownFields())) return false; if (!getExtensionFields().equals(other.getExtensionFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (hasPiece()) { hash = (37 * hash) + PIECE_FIELD_NUMBER; hash = (53 * hash) + getPiece().hashCode(); } if (hasScore()) { hash = (37 * hash) + SCORE_FIELD_NUMBER; hash = (53 * hash) + java.lang.Float.floatToIntBits( getScore()); } if (hasType()) { hash = (37 * hash) + TYPE_FIELD_NUMBER; hash = (53 * hash) + type_; } hash = hashFields(hash, getExtensionFields()); hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.ModelProto.SentencePiece prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** * Protobuf type {@code sentencepiece.ModelProto.SentencePiece} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, Builder> implements // @@protoc_insertion_point(builder_implements:sentencepiece.ModelProto.SentencePiece) sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.ModelProto.SentencePiece.class, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder.class); } // Construct using sentencepiece.SentencepieceModel.ModelProto.SentencePiece.newBuilder() private Builder() { } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; piece_ = ""; score_ = 0F; type_ = 1; return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece build() { sentencepiece.SentencepieceModel.ModelProto.SentencePiece result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece buildPartial() { sentencepiece.SentencepieceModel.ModelProto.SentencePiece result = new sentencepiece.SentencepieceModel.ModelProto.SentencePiece(this); if (bitField0_ != 0) { buildPartial0(result); } onBuilt(); return result; } private void buildPartial0(sentencepiece.SentencepieceModel.ModelProto.SentencePiece result) { int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000001) != 0)) { result.piece_ = piece_; to_bitField0_ |= 0x00000001; } if (((from_bitField0_ & 0x00000002) != 0)) { result.score_ = score_; to_bitField0_ |= 0x00000002; } if (((from_bitField0_ & 0x00000004) != 0)) { result.type_ = type_; to_bitField0_ |= 0x00000004; } result.bitField0_ |= to_bitField0_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, Type> extension, Type value) { return super.setExtension(extension, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, java.util.List> extension, int index, Type value) { return super.setExtension(extension, index, value); } @java.lang.Override public Builder addExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, java.util.List> extension, Type value) { return super.addExtension(extension, value); } @java.lang.Override public Builder clearExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, T> extension) { return super.clearExtension(extension); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.ModelProto.SentencePiece) { return mergeFrom((sentencepiece.SentencepieceModel.ModelProto.SentencePiece)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.ModelProto.SentencePiece other) { if (other == sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance()) return this; if (other.hasPiece()) { piece_ = other.piece_; bitField0_ |= 0x00000001; onChanged(); } if (other.hasScore()) { setScore(other.getScore()); } if (other.hasType()) { setType(other.getType()); } this.mergeExtensionFields(other); this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { if (!extensionsAreInitialized()) { return false; } return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { piece_ = input.readBytes(); bitField0_ |= 0x00000001; break; } // case 10 case 21: { score_ = input.readFloat(); bitField0_ |= 0x00000002; break; } // case 21 case 24: { int tmpRaw = input.readEnum(); sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type tmpValue = sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(tmpRaw); if (tmpValue == null) { mergeUnknownVarintField(3, tmpRaw); } else { type_ = tmpRaw; bitField0_ |= 0x00000004; } break; } // case 24 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private java.lang.Object piece_ = ""; /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @return Whether the piece field is set. */ public boolean hasPiece() { return ((bitField0_ & 0x00000001) != 0); } /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @return The piece. */ public java.lang.String getPiece() { java.lang.Object ref = piece_; if (!(ref instanceof java.lang.String)) { com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; java.lang.String s = bs.toStringUtf8(); if (bs.isValidUtf8()) { piece_ = s; } return s; } else { return (java.lang.String) ref; } } /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @return The bytes for piece. */ public com.google.protobuf.ByteString getPieceBytes() { java.lang.Object ref = piece_; if (ref instanceof String) { com.google.protobuf.ByteString b = com.google.protobuf.ByteString.copyFromUtf8( (java.lang.String) ref); piece_ = b; return b; } else { return (com.google.protobuf.ByteString) ref; } } /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @param value The piece to set. * @return This builder for chaining. */ public Builder setPiece( java.lang.String value) { if (value == null) { throw new NullPointerException(); } piece_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @return This builder for chaining. */ public Builder clearPiece() { piece_ = getDefaultInstance().getPiece(); bitField0_ = (bitField0_ & ~0x00000001); onChanged(); return this; } /** *
         * piece must not be empty.
         * 
* * optional string piece = 1; * @param value The bytes for piece to set. * @return This builder for chaining. */ public Builder setPieceBytes( com.google.protobuf.ByteString value) { if (value == null) { throw new NullPointerException(); } piece_ = value; bitField0_ |= 0x00000001; onChanged(); return this; } private float score_ ; /** * optional float score = 2; * @return Whether the score field is set. */ @java.lang.Override public boolean hasScore() { return ((bitField0_ & 0x00000002) != 0); } /** * optional float score = 2; * @return The score. */ @java.lang.Override public float getScore() { return score_; } /** * optional float score = 2; * @param value The score to set. * @return This builder for chaining. */ public Builder setScore(float value) { score_ = value; bitField0_ |= 0x00000002; onChanged(); return this; } /** * optional float score = 2; * @return This builder for chaining. */ public Builder clearScore() { bitField0_ = (bitField0_ & ~0x00000002); score_ = 0F; onChanged(); return this; } private int type_ = 1; /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return Whether the type field is set. */ @java.lang.Override public boolean hasType() { return ((bitField0_ & 0x00000004) != 0); } /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return The type. */ @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType() { sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type result = sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(type_); return result == null ? sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.NORMAL : result; } /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @param value The type to set. * @return This builder for chaining. */ public Builder setType(sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type value) { if (value == null) { throw new NullPointerException(); } bitField0_ |= 0x00000004; type_ = value.getNumber(); onChanged(); return this; } /** * optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; * @return This builder for chaining. */ public Builder clearType() { bitField0_ = (bitField0_ & ~0x00000004); type_ = 1; onChanged(); return this; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.ModelProto.SentencePiece) } // @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece) private static final sentencepiece.SentencepieceModel.ModelProto.SentencePiece DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.ModelProto.SentencePiece(); } public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public SentencePiece parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } private int bitField0_; public static final int PIECES_FIELD_NUMBER = 1; @SuppressWarnings("serial") private java.util.List pieces_; /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ @java.lang.Override public java.util.List getPiecesList() { return pieces_; } /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ @java.lang.Override public java.util.List getPiecesOrBuilderList() { return pieces_; } /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ @java.lang.Override public int getPiecesCount() { return pieces_.size(); } /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index) { return pieces_.get(index); } /** *
     * Sentence pieces with scores.
     * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder( int index) { return pieces_.get(index); } public static final int TRAINER_SPEC_FIELD_NUMBER = 2; private sentencepiece.SentencepieceModel.TrainerSpec trainerSpec_; /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return Whether the trainerSpec field is set. */ @java.lang.Override public boolean hasTrainerSpec() { return ((bitField0_ & 0x00000001) != 0); } /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return The trainerSpec. */ @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec() { return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_; } /** *
     * Spec used to generate this model file.
     * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ @java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder() { return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_; } public static final int NORMALIZER_SPEC_FIELD_NUMBER = 3; private sentencepiece.SentencepieceModel.NormalizerSpec normalizerSpec_; /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return Whether the normalizerSpec field is set. */ @java.lang.Override public boolean hasNormalizerSpec() { return ((bitField0_ & 0x00000002) != 0); } /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return The normalizerSpec. */ @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec() { return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_; } /** *
     * Spec for text normalization.
     * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder() { return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_; } public static final int SELF_TEST_DATA_FIELD_NUMBER = 4; private sentencepiece.SentencepieceModel.SelfTestData selfTestData_; /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return Whether the selfTestData field is set. */ @java.lang.Override public boolean hasSelfTestData() { return ((bitField0_ & 0x00000004) != 0); } /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return The selfTestData. */ @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestData getSelfTestData() { return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_; } /** *
     * Stores sample input and its expected segmentation to verify the model.
     * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ @java.lang.Override public sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder() { return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_; } public static final int DENORMALIZER_SPEC_FIELD_NUMBER = 5; private sentencepiece.SentencepieceModel.NormalizerSpec denormalizerSpec_; /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return Whether the denormalizerSpec field is set. */ @java.lang.Override public boolean hasDenormalizerSpec() { return ((bitField0_ & 0x00000008) != 0); } /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return The denormalizerSpec. */ @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec() { return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_; } /** *
     * Spec for text de-normalization.
     * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ @java.lang.Override public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder() { return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_; } private byte memoizedIsInitialized = -1; @java.lang.Override public final boolean isInitialized() { byte isInitialized = memoizedIsInitialized; if (isInitialized == 1) return true; if (isInitialized == 0) return false; for (int i = 0; i < getPiecesCount(); i++) { if (!getPieces(i).isInitialized()) { memoizedIsInitialized = 0; return false; } } if (hasTrainerSpec()) { if (!getTrainerSpec().isInitialized()) { memoizedIsInitialized = 0; return false; } } if (hasNormalizerSpec()) { if (!getNormalizerSpec().isInitialized()) { memoizedIsInitialized = 0; return false; } } if (hasSelfTestData()) { if (!getSelfTestData().isInitialized()) { memoizedIsInitialized = 0; return false; } } if (hasDenormalizerSpec()) { if (!getDenormalizerSpec().isInitialized()) { memoizedIsInitialized = 0; return false; } } if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; } memoizedIsInitialized = 1; return true; } @java.lang.Override public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { com.google.protobuf.GeneratedMessageV3 .ExtendableMessage.ExtensionWriter extensionWriter = newExtensionWriter(); for (int i = 0; i < pieces_.size(); i++) { output.writeMessage(1, pieces_.get(i)); } if (((bitField0_ & 0x00000001) != 0)) { output.writeMessage(2, getTrainerSpec()); } if (((bitField0_ & 0x00000002) != 0)) { output.writeMessage(3, getNormalizerSpec()); } if (((bitField0_ & 0x00000004) != 0)) { output.writeMessage(4, getSelfTestData()); } if (((bitField0_ & 0x00000008) != 0)) { output.writeMessage(5, getDenormalizerSpec()); } extensionWriter.writeUntil(536870912, output); getUnknownFields().writeTo(output); } @java.lang.Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; size = 0; for (int i = 0; i < pieces_.size(); i++) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(1, pieces_.get(i)); } if (((bitField0_ & 0x00000001) != 0)) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(2, getTrainerSpec()); } if (((bitField0_ & 0x00000002) != 0)) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(3, getNormalizerSpec()); } if (((bitField0_ & 0x00000004) != 0)) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(4, getSelfTestData()); } if (((bitField0_ & 0x00000008) != 0)) { size += com.google.protobuf.CodedOutputStream .computeMessageSize(5, getDenormalizerSpec()); } size += extensionsSerializedSize(); size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; } @java.lang.Override public boolean equals(final java.lang.Object obj) { if (obj == this) { return true; } if (!(obj instanceof sentencepiece.SentencepieceModel.ModelProto)) { return super.equals(obj); } sentencepiece.SentencepieceModel.ModelProto other = (sentencepiece.SentencepieceModel.ModelProto) obj; if (!getPiecesList() .equals(other.getPiecesList())) return false; if (hasTrainerSpec() != other.hasTrainerSpec()) return false; if (hasTrainerSpec()) { if (!getTrainerSpec() .equals(other.getTrainerSpec())) return false; } if (hasNormalizerSpec() != other.hasNormalizerSpec()) return false; if (hasNormalizerSpec()) { if (!getNormalizerSpec() .equals(other.getNormalizerSpec())) return false; } if (hasSelfTestData() != other.hasSelfTestData()) return false; if (hasSelfTestData()) { if (!getSelfTestData() .equals(other.getSelfTestData())) return false; } if (hasDenormalizerSpec() != other.hasDenormalizerSpec()) return false; if (hasDenormalizerSpec()) { if (!getDenormalizerSpec() .equals(other.getDenormalizerSpec())) return false; } if (!getUnknownFields().equals(other.getUnknownFields())) return false; if (!getExtensionFields().equals(other.getExtensionFields())) return false; return true; } @java.lang.Override public int hashCode() { if (memoizedHashCode != 0) { return memoizedHashCode; } int hash = 41; hash = (19 * hash) + getDescriptor().hashCode(); if (getPiecesCount() > 0) { hash = (37 * hash) + PIECES_FIELD_NUMBER; hash = (53 * hash) + getPiecesList().hashCode(); } if (hasTrainerSpec()) { hash = (37 * hash) + TRAINER_SPEC_FIELD_NUMBER; hash = (53 * hash) + getTrainerSpec().hashCode(); } if (hasNormalizerSpec()) { hash = (37 * hash) + NORMALIZER_SPEC_FIELD_NUMBER; hash = (53 * hash) + getNormalizerSpec().hashCode(); } if (hasSelfTestData()) { hash = (37 * hash) + SELF_TEST_DATA_FIELD_NUMBER; hash = (53 * hash) + getSelfTestData().hashCode(); } if (hasDenormalizerSpec()) { hash = (37 * hash) + DENORMALIZER_SPEC_FIELD_NUMBER; hash = (53 * hash) + getDenormalizerSpec().hashCode(); } hash = hashFields(hash, getExtensionFields()); hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( java.nio.ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( java.nio.ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { return PARSER.parseFrom(data, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto parseDelimitedFrom( java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseDelimitedWithIOException(PARSER, input, extensionRegistry); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( com.google.protobuf.CodedInputStream input) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input); } public static sentencepiece.SentencepieceModel.ModelProto parseFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { return com.google.protobuf.GeneratedMessageV3 .parseWithIOException(PARSER, input, extensionRegistry); } @java.lang.Override public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder() { return DEFAULT_INSTANCE.toBuilder(); } public static Builder newBuilder(sentencepiece.SentencepieceModel.ModelProto prototype) { return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); } @java.lang.Override public Builder toBuilder() { return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); } @java.lang.Override protected Builder newBuilderForType( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { Builder builder = new Builder(parent); return builder; } /** *
     * ModelProto stores model parameters.
     * SentencePieceProcessor is supposed to be self-contained.
     * All settings/parameters which may change the behavior must be encoded
     * in ModelProto.
     * 
* * Protobuf type {@code sentencepiece.ModelProto} */ public static final class Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder< sentencepiece.SentencepieceModel.ModelProto, Builder> implements // @@protoc_insertion_point(builder_implements:sentencepiece.ModelProto) sentencepiece.SentencepieceModel.ModelProtoOrBuilder { public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor; } @java.lang.Override protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_fieldAccessorTable .ensureFieldAccessorsInitialized( sentencepiece.SentencepieceModel.ModelProto.class, sentencepiece.SentencepieceModel.ModelProto.Builder.class); } // Construct using sentencepiece.SentencepieceModel.ModelProto.newBuilder() private Builder() { maybeForceBuilderInitialization(); } private Builder( com.google.protobuf.GeneratedMessageV3.BuilderParent parent) { super(parent); maybeForceBuilderInitialization(); } private void maybeForceBuilderInitialization() { if (com.google.protobuf.GeneratedMessageV3 .alwaysUseFieldBuilders) { getPiecesFieldBuilder(); getTrainerSpecFieldBuilder(); getNormalizerSpecFieldBuilder(); getSelfTestDataFieldBuilder(); getDenormalizerSpecFieldBuilder(); } } @java.lang.Override public Builder clear() { super.clear(); bitField0_ = 0; if (piecesBuilder_ == null) { pieces_ = java.util.Collections.emptyList(); } else { pieces_ = null; piecesBuilder_.clear(); } bitField0_ = (bitField0_ & ~0x00000001); trainerSpec_ = null; if (trainerSpecBuilder_ != null) { trainerSpecBuilder_.dispose(); trainerSpecBuilder_ = null; } normalizerSpec_ = null; if (normalizerSpecBuilder_ != null) { normalizerSpecBuilder_.dispose(); normalizerSpecBuilder_ = null; } selfTestData_ = null; if (selfTestDataBuilder_ != null) { selfTestDataBuilder_.dispose(); selfTestDataBuilder_ = null; } denormalizerSpec_ = null; if (denormalizerSpecBuilder_ != null) { denormalizerSpecBuilder_.dispose(); denormalizerSpecBuilder_ = null; } return this; } @java.lang.Override public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto getDefaultInstanceForType() { return sentencepiece.SentencepieceModel.ModelProto.getDefaultInstance(); } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto build() { sentencepiece.SentencepieceModel.ModelProto result = buildPartial(); if (!result.isInitialized()) { throw newUninitializedMessageException(result); } return result; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto buildPartial() { sentencepiece.SentencepieceModel.ModelProto result = new sentencepiece.SentencepieceModel.ModelProto(this); buildPartialRepeatedFields(result); if (bitField0_ != 0) { buildPartial0(result); } onBuilt(); return result; } private void buildPartialRepeatedFields(sentencepiece.SentencepieceModel.ModelProto result) { if (piecesBuilder_ == null) { if (((bitField0_ & 0x00000001) != 0)) { pieces_ = java.util.Collections.unmodifiableList(pieces_); bitField0_ = (bitField0_ & ~0x00000001); } result.pieces_ = pieces_; } else { result.pieces_ = piecesBuilder_.build(); } } private void buildPartial0(sentencepiece.SentencepieceModel.ModelProto result) { int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000002) != 0)) { result.trainerSpec_ = trainerSpecBuilder_ == null ? trainerSpec_ : trainerSpecBuilder_.build(); to_bitField0_ |= 0x00000001; } if (((from_bitField0_ & 0x00000004) != 0)) { result.normalizerSpec_ = normalizerSpecBuilder_ == null ? normalizerSpec_ : normalizerSpecBuilder_.build(); to_bitField0_ |= 0x00000002; } if (((from_bitField0_ & 0x00000008) != 0)) { result.selfTestData_ = selfTestDataBuilder_ == null ? selfTestData_ : selfTestDataBuilder_.build(); to_bitField0_ |= 0x00000004; } if (((from_bitField0_ & 0x00000010) != 0)) { result.denormalizerSpec_ = denormalizerSpecBuilder_ == null ? denormalizerSpec_ : denormalizerSpecBuilder_.build(); to_bitField0_ |= 0x00000008; } result.bitField0_ |= to_bitField0_; } @java.lang.Override public Builder clone() { return super.clone(); } @java.lang.Override public Builder setField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.setField(field, value); } @java.lang.Override public Builder clearField( com.google.protobuf.Descriptors.FieldDescriptor field) { return super.clearField(field); } @java.lang.Override public Builder clearOneof( com.google.protobuf.Descriptors.OneofDescriptor oneof) { return super.clearOneof(oneof); } @java.lang.Override public Builder setRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, int index, java.lang.Object value) { return super.setRepeatedField(field, index, value); } @java.lang.Override public Builder addRepeatedField( com.google.protobuf.Descriptors.FieldDescriptor field, java.lang.Object value) { return super.addRepeatedField(field, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto, Type> extension, Type value) { return super.setExtension(extension, value); } @java.lang.Override public Builder setExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto, java.util.List> extension, int index, Type value) { return super.setExtension(extension, index, value); } @java.lang.Override public Builder addExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto, java.util.List> extension, Type value) { return super.addExtension(extension, value); } @java.lang.Override public Builder clearExtension( com.google.protobuf.GeneratedMessage.GeneratedExtension< sentencepiece.SentencepieceModel.ModelProto, T> extension) { return super.clearExtension(extension); } @java.lang.Override public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof sentencepiece.SentencepieceModel.ModelProto) { return mergeFrom((sentencepiece.SentencepieceModel.ModelProto)other); } else { super.mergeFrom(other); return this; } } public Builder mergeFrom(sentencepiece.SentencepieceModel.ModelProto other) { if (other == sentencepiece.SentencepieceModel.ModelProto.getDefaultInstance()) return this; if (piecesBuilder_ == null) { if (!other.pieces_.isEmpty()) { if (pieces_.isEmpty()) { pieces_ = other.pieces_; bitField0_ = (bitField0_ & ~0x00000001); } else { ensurePiecesIsMutable(); pieces_.addAll(other.pieces_); } onChanged(); } } else { if (!other.pieces_.isEmpty()) { if (piecesBuilder_.isEmpty()) { piecesBuilder_.dispose(); piecesBuilder_ = null; pieces_ = other.pieces_; bitField0_ = (bitField0_ & ~0x00000001); piecesBuilder_ = com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders ? getPiecesFieldBuilder() : null; } else { piecesBuilder_.addAllMessages(other.pieces_); } } } if (other.hasTrainerSpec()) { mergeTrainerSpec(other.getTrainerSpec()); } if (other.hasNormalizerSpec()) { mergeNormalizerSpec(other.getNormalizerSpec()); } if (other.hasSelfTestData()) { mergeSelfTestData(other.getSelfTestData()); } if (other.hasDenormalizerSpec()) { mergeDenormalizerSpec(other.getDenormalizerSpec()); } this.mergeExtensionFields(other); this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; } @java.lang.Override public final boolean isInitialized() { for (int i = 0; i < getPiecesCount(); i++) { if (!getPieces(i).isInitialized()) { return false; } } if (hasTrainerSpec()) { if (!getTrainerSpec().isInitialized()) { return false; } } if (hasNormalizerSpec()) { if (!getNormalizerSpec().isInitialized()) { return false; } } if (hasSelfTestData()) { if (!getSelfTestData().isInitialized()) { return false; } } if (hasDenormalizerSpec()) { if (!getDenormalizerSpec().isInitialized()) { return false; } } if (!extensionsAreInitialized()) { return false; } return true; } @java.lang.Override public Builder mergeFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { if (extensionRegistry == null) { throw new java.lang.NullPointerException(); } try { boolean done = false; while (!done) { int tag = input.readTag(); switch (tag) { case 0: done = true; break; case 10: { sentencepiece.SentencepieceModel.ModelProto.SentencePiece m = input.readMessage( sentencepiece.SentencepieceModel.ModelProto.SentencePiece.PARSER, extensionRegistry); if (piecesBuilder_ == null) { ensurePiecesIsMutable(); pieces_.add(m); } else { piecesBuilder_.addMessage(m); } break; } // case 10 case 18: { input.readMessage( getTrainerSpecFieldBuilder().getBuilder(), extensionRegistry); bitField0_ |= 0x00000002; break; } // case 18 case 26: { input.readMessage( getNormalizerSpecFieldBuilder().getBuilder(), extensionRegistry); bitField0_ |= 0x00000004; break; } // case 26 case 34: { input.readMessage( getSelfTestDataFieldBuilder().getBuilder(), extensionRegistry); bitField0_ |= 0x00000008; break; } // case 34 case 42: { input.readMessage( getDenormalizerSpecFieldBuilder().getBuilder(), extensionRegistry); bitField0_ |= 0x00000010; break; } // case 42 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { done = true; // was an endgroup tag } break; } // default: } // switch (tag) } // while (!done) } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.unwrapIOException(); } finally { onChanged(); } // finally return this; } private int bitField0_; private java.util.List pieces_ = java.util.Collections.emptyList(); private void ensurePiecesIsMutable() { if (!((bitField0_ & 0x00000001) != 0)) { pieces_ = new java.util.ArrayList(pieces_); bitField0_ |= 0x00000001; } } private com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder> piecesBuilder_; /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public java.util.List getPiecesList() { if (piecesBuilder_ == null) { return java.util.Collections.unmodifiableList(pieces_); } else { return piecesBuilder_.getMessageList(); } } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public int getPiecesCount() { if (piecesBuilder_ == null) { return pieces_.size(); } else { return piecesBuilder_.getCount(); } } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index) { if (piecesBuilder_ == null) { return pieces_.get(index); } else { return piecesBuilder_.getMessage(index); } } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder setPieces( int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) { if (piecesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensurePiecesIsMutable(); pieces_.set(index, value); onChanged(); } else { piecesBuilder_.setMessage(index, value); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder setPieces( int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) { if (piecesBuilder_ == null) { ensurePiecesIsMutable(); pieces_.set(index, builderForValue.build()); onChanged(); } else { piecesBuilder_.setMessage(index, builderForValue.build()); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder addPieces(sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) { if (piecesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensurePiecesIsMutable(); pieces_.add(value); onChanged(); } else { piecesBuilder_.addMessage(value); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder addPieces( int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) { if (piecesBuilder_ == null) { if (value == null) { throw new NullPointerException(); } ensurePiecesIsMutable(); pieces_.add(index, value); onChanged(); } else { piecesBuilder_.addMessage(index, value); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder addPieces( sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) { if (piecesBuilder_ == null) { ensurePiecesIsMutable(); pieces_.add(builderForValue.build()); onChanged(); } else { piecesBuilder_.addMessage(builderForValue.build()); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder addPieces( int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) { if (piecesBuilder_ == null) { ensurePiecesIsMutable(); pieces_.add(index, builderForValue.build()); onChanged(); } else { piecesBuilder_.addMessage(index, builderForValue.build()); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder addAllPieces( java.lang.Iterable values) { if (piecesBuilder_ == null) { ensurePiecesIsMutable(); com.google.protobuf.AbstractMessageLite.Builder.addAll( values, pieces_); onChanged(); } else { piecesBuilder_.addAllMessages(values); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder clearPieces() { if (piecesBuilder_ == null) { pieces_ = java.util.Collections.emptyList(); bitField0_ = (bitField0_ & ~0x00000001); onChanged(); } else { piecesBuilder_.clear(); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public Builder removePieces(int index) { if (piecesBuilder_ == null) { ensurePiecesIsMutable(); pieces_.remove(index); onChanged(); } else { piecesBuilder_.remove(index); } return this; } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder getPiecesBuilder( int index) { return getPiecesFieldBuilder().getBuilder(index); } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder( int index) { if (piecesBuilder_ == null) { return pieces_.get(index); } else { return piecesBuilder_.getMessageOrBuilder(index); } } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public java.util.List getPiecesOrBuilderList() { if (piecesBuilder_ != null) { return piecesBuilder_.getMessageOrBuilderList(); } else { return java.util.Collections.unmodifiableList(pieces_); } } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder addPiecesBuilder() { return getPiecesFieldBuilder().addBuilder( sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance()); } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder addPiecesBuilder( int index) { return getPiecesFieldBuilder().addBuilder( index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance()); } /** *
       * Sentence pieces with scores.
       * 
* * repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; */ public java.util.List getPiecesBuilderList() { return getPiecesFieldBuilder().getBuilderList(); } private com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder> getPiecesFieldBuilder() { if (piecesBuilder_ == null) { piecesBuilder_ = new com.google.protobuf.RepeatedFieldBuilderV3< sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>( pieces_, ((bitField0_ & 0x00000001) != 0), getParentForChildren(), isClean()); pieces_ = null; } return piecesBuilder_; } private sentencepiece.SentencepieceModel.TrainerSpec trainerSpec_; private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder> trainerSpecBuilder_; /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return Whether the trainerSpec field is set. */ public boolean hasTrainerSpec() { return ((bitField0_ & 0x00000002) != 0); } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; * @return The trainerSpec. */ public sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec() { if (trainerSpecBuilder_ == null) { return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_; } else { return trainerSpecBuilder_.getMessage(); } } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public Builder setTrainerSpec(sentencepiece.SentencepieceModel.TrainerSpec value) { if (trainerSpecBuilder_ == null) { if (value == null) { throw new NullPointerException(); } trainerSpec_ = value; } else { trainerSpecBuilder_.setMessage(value); } bitField0_ |= 0x00000002; onChanged(); return this; } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public Builder setTrainerSpec( sentencepiece.SentencepieceModel.TrainerSpec.Builder builderForValue) { if (trainerSpecBuilder_ == null) { trainerSpec_ = builderForValue.build(); } else { trainerSpecBuilder_.setMessage(builderForValue.build()); } bitField0_ |= 0x00000002; onChanged(); return this; } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public Builder mergeTrainerSpec(sentencepiece.SentencepieceModel.TrainerSpec value) { if (trainerSpecBuilder_ == null) { if (((bitField0_ & 0x00000002) != 0) && trainerSpec_ != null && trainerSpec_ != sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance()) { getTrainerSpecBuilder().mergeFrom(value); } else { trainerSpec_ = value; } } else { trainerSpecBuilder_.mergeFrom(value); } if (trainerSpec_ != null) { bitField0_ |= 0x00000002; onChanged(); } return this; } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public Builder clearTrainerSpec() { bitField0_ = (bitField0_ & ~0x00000002); trainerSpec_ = null; if (trainerSpecBuilder_ != null) { trainerSpecBuilder_.dispose(); trainerSpecBuilder_ = null; } onChanged(); return this; } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public sentencepiece.SentencepieceModel.TrainerSpec.Builder getTrainerSpecBuilder() { bitField0_ |= 0x00000002; onChanged(); return getTrainerSpecFieldBuilder().getBuilder(); } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ public sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder() { if (trainerSpecBuilder_ != null) { return trainerSpecBuilder_.getMessageOrBuilder(); } else { return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_; } } /** *
       * Spec used to generate this model file.
       * 
* * optional .sentencepiece.TrainerSpec trainer_spec = 2; */ private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder> getTrainerSpecFieldBuilder() { if (trainerSpecBuilder_ == null) { trainerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder>( getTrainerSpec(), getParentForChildren(), isClean()); trainerSpec_ = null; } return trainerSpecBuilder_; } private sentencepiece.SentencepieceModel.NormalizerSpec normalizerSpec_; private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> normalizerSpecBuilder_; /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return Whether the normalizerSpec field is set. */ public boolean hasNormalizerSpec() { return ((bitField0_ & 0x00000004) != 0); } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; * @return The normalizerSpec. */ public sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec() { if (normalizerSpecBuilder_ == null) { return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_; } else { return normalizerSpecBuilder_.getMessage(); } } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public Builder setNormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) { if (normalizerSpecBuilder_ == null) { if (value == null) { throw new NullPointerException(); } normalizerSpec_ = value; } else { normalizerSpecBuilder_.setMessage(value); } bitField0_ |= 0x00000004; onChanged(); return this; } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public Builder setNormalizerSpec( sentencepiece.SentencepieceModel.NormalizerSpec.Builder builderForValue) { if (normalizerSpecBuilder_ == null) { normalizerSpec_ = builderForValue.build(); } else { normalizerSpecBuilder_.setMessage(builderForValue.build()); } bitField0_ |= 0x00000004; onChanged(); return this; } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public Builder mergeNormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) { if (normalizerSpecBuilder_ == null) { if (((bitField0_ & 0x00000004) != 0) && normalizerSpec_ != null && normalizerSpec_ != sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) { getNormalizerSpecBuilder().mergeFrom(value); } else { normalizerSpec_ = value; } } else { normalizerSpecBuilder_.mergeFrom(value); } if (normalizerSpec_ != null) { bitField0_ |= 0x00000004; onChanged(); } return this; } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public Builder clearNormalizerSpec() { bitField0_ = (bitField0_ & ~0x00000004); normalizerSpec_ = null; if (normalizerSpecBuilder_ != null) { normalizerSpecBuilder_.dispose(); normalizerSpecBuilder_ = null; } onChanged(); return this; } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public sentencepiece.SentencepieceModel.NormalizerSpec.Builder getNormalizerSpecBuilder() { bitField0_ |= 0x00000004; onChanged(); return getNormalizerSpecFieldBuilder().getBuilder(); } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder() { if (normalizerSpecBuilder_ != null) { return normalizerSpecBuilder_.getMessageOrBuilder(); } else { return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_; } } /** *
       * Spec for text normalization.
       * 
* * optional .sentencepiece.NormalizerSpec normalizer_spec = 3; */ private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> getNormalizerSpecFieldBuilder() { if (normalizerSpecBuilder_ == null) { normalizerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>( getNormalizerSpec(), getParentForChildren(), isClean()); normalizerSpec_ = null; } return normalizerSpecBuilder_; } private sentencepiece.SentencepieceModel.SelfTestData selfTestData_; private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder> selfTestDataBuilder_; /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return Whether the selfTestData field is set. */ public boolean hasSelfTestData() { return ((bitField0_ & 0x00000008) != 0); } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; * @return The selfTestData. */ public sentencepiece.SentencepieceModel.SelfTestData getSelfTestData() { if (selfTestDataBuilder_ == null) { return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_; } else { return selfTestDataBuilder_.getMessage(); } } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public Builder setSelfTestData(sentencepiece.SentencepieceModel.SelfTestData value) { if (selfTestDataBuilder_ == null) { if (value == null) { throw new NullPointerException(); } selfTestData_ = value; } else { selfTestDataBuilder_.setMessage(value); } bitField0_ |= 0x00000008; onChanged(); return this; } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public Builder setSelfTestData( sentencepiece.SentencepieceModel.SelfTestData.Builder builderForValue) { if (selfTestDataBuilder_ == null) { selfTestData_ = builderForValue.build(); } else { selfTestDataBuilder_.setMessage(builderForValue.build()); } bitField0_ |= 0x00000008; onChanged(); return this; } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public Builder mergeSelfTestData(sentencepiece.SentencepieceModel.SelfTestData value) { if (selfTestDataBuilder_ == null) { if (((bitField0_ & 0x00000008) != 0) && selfTestData_ != null && selfTestData_ != sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance()) { getSelfTestDataBuilder().mergeFrom(value); } else { selfTestData_ = value; } } else { selfTestDataBuilder_.mergeFrom(value); } if (selfTestData_ != null) { bitField0_ |= 0x00000008; onChanged(); } return this; } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public Builder clearSelfTestData() { bitField0_ = (bitField0_ & ~0x00000008); selfTestData_ = null; if (selfTestDataBuilder_ != null) { selfTestDataBuilder_.dispose(); selfTestDataBuilder_ = null; } onChanged(); return this; } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public sentencepiece.SentencepieceModel.SelfTestData.Builder getSelfTestDataBuilder() { bitField0_ |= 0x00000008; onChanged(); return getSelfTestDataFieldBuilder().getBuilder(); } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ public sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder() { if (selfTestDataBuilder_ != null) { return selfTestDataBuilder_.getMessageOrBuilder(); } else { return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_; } } /** *
       * Stores sample input and its expected segmentation to verify the model.
       * 
* * optional .sentencepiece.SelfTestData self_test_data = 4; */ private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder> getSelfTestDataFieldBuilder() { if (selfTestDataBuilder_ == null) { selfTestDataBuilder_ = new com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder>( getSelfTestData(), getParentForChildren(), isClean()); selfTestData_ = null; } return selfTestDataBuilder_; } private sentencepiece.SentencepieceModel.NormalizerSpec denormalizerSpec_; private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> denormalizerSpecBuilder_; /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return Whether the denormalizerSpec field is set. */ public boolean hasDenormalizerSpec() { return ((bitField0_ & 0x00000010) != 0); } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; * @return The denormalizerSpec. */ public sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec() { if (denormalizerSpecBuilder_ == null) { return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_; } else { return denormalizerSpecBuilder_.getMessage(); } } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public Builder setDenormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) { if (denormalizerSpecBuilder_ == null) { if (value == null) { throw new NullPointerException(); } denormalizerSpec_ = value; } else { denormalizerSpecBuilder_.setMessage(value); } bitField0_ |= 0x00000010; onChanged(); return this; } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public Builder setDenormalizerSpec( sentencepiece.SentencepieceModel.NormalizerSpec.Builder builderForValue) { if (denormalizerSpecBuilder_ == null) { denormalizerSpec_ = builderForValue.build(); } else { denormalizerSpecBuilder_.setMessage(builderForValue.build()); } bitField0_ |= 0x00000010; onChanged(); return this; } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public Builder mergeDenormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) { if (denormalizerSpecBuilder_ == null) { if (((bitField0_ & 0x00000010) != 0) && denormalizerSpec_ != null && denormalizerSpec_ != sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) { getDenormalizerSpecBuilder().mergeFrom(value); } else { denormalizerSpec_ = value; } } else { denormalizerSpecBuilder_.mergeFrom(value); } if (denormalizerSpec_ != null) { bitField0_ |= 0x00000010; onChanged(); } return this; } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public Builder clearDenormalizerSpec() { bitField0_ = (bitField0_ & ~0x00000010); denormalizerSpec_ = null; if (denormalizerSpecBuilder_ != null) { denormalizerSpecBuilder_.dispose(); denormalizerSpecBuilder_ = null; } onChanged(); return this; } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public sentencepiece.SentencepieceModel.NormalizerSpec.Builder getDenormalizerSpecBuilder() { bitField0_ |= 0x00000010; onChanged(); return getDenormalizerSpecFieldBuilder().getBuilder(); } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder() { if (denormalizerSpecBuilder_ != null) { return denormalizerSpecBuilder_.getMessageOrBuilder(); } else { return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_; } } /** *
       * Spec for text de-normalization.
       * 
* * optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; */ private com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> getDenormalizerSpecFieldBuilder() { if (denormalizerSpecBuilder_ == null) { denormalizerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3< sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>( getDenormalizerSpec(), getParentForChildren(), isClean()); denormalizerSpec_ = null; } return denormalizerSpecBuilder_; } @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.setUnknownFields(unknownFields); } @java.lang.Override public final Builder mergeUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { return super.mergeUnknownFields(unknownFields); } // @@protoc_insertion_point(builder_scope:sentencepiece.ModelProto) } // @@protoc_insertion_point(class_scope:sentencepiece.ModelProto) private static final sentencepiece.SentencepieceModel.ModelProto DEFAULT_INSTANCE; static { DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.ModelProto(); } public static sentencepiece.SentencepieceModel.ModelProto getDefaultInstance() { return DEFAULT_INSTANCE; } @java.lang.Deprecated public static final com.google.protobuf.Parser PARSER = new com.google.protobuf.AbstractParser() { @java.lang.Override public ModelProto parsePartialFrom( com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { Builder builder = newBuilder(); try { builder.mergeFrom(input, extensionRegistry); } catch (com.google.protobuf.InvalidProtocolBufferException e) { throw e.setUnfinishedMessage(builder.buildPartial()); } catch (com.google.protobuf.UninitializedMessageException e) { throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial()); } catch (java.io.IOException e) { throw new com.google.protobuf.InvalidProtocolBufferException(e) .setUnfinishedMessage(builder.buildPartial()); } return builder.buildPartial(); } }; public static com.google.protobuf.Parser parser() { return PARSER; } @java.lang.Override public com.google.protobuf.Parser getParserForType() { return PARSER; } @java.lang.Override public sentencepiece.SentencepieceModel.ModelProto getDefaultInstanceForType() { return DEFAULT_INSTANCE; } } private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_TrainerSpec_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_TrainerSpec_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_NormalizerSpec_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_SelfTestData_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_SelfTestData_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_SelfTestData_Sample_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_ModelProto_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_ModelProto_fieldAccessorTable; private static final com.google.protobuf.Descriptors.Descriptor internal_static_sentencepiece_ModelProto_SentencePiece_descriptor; private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable; public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { return descriptor; } private static com.google.protobuf.Descriptors.FileDescriptor descriptor; static { java.lang.String[] descriptorData = { "\n\031sentencepiece_model.proto\022\rsentencepie" + "ce\"\316\n\n\013TrainerSpec\022\r\n\005input\030\001 \003(\t\022\024\n\014inp" + "ut_format\030\007 \001(\t\022\024\n\014model_prefix\030\002 \001(\t\022A\n" + "\nmodel_type\030\003 \001(\0162$.sentencepiece.Traine" + "rSpec.ModelType:\007UNIGRAM\022\030\n\nvocab_size\030\004" + " \001(\005:\0048000\022\027\n\017accept_language\030\005 \003(\t\022 \n\025s" + "elf_test_sample_size\030\006 \001(\005:\0010\022\"\n\022charact" + "er_coverage\030\n \001(\002:\0060.9995\022\036\n\023input_sente" + "nce_size\030\013 \001(\004:\0010\022$\n\026shuffle_input_sente" + "nce\030\023 \001(\010:\004true\022 \n\024mining_sentence_size\030" + "\014 \001(\005B\002\030\001\022\"\n\026training_sentence_size\030\r \001(" + "\005B\002\030\001\022(\n\027seed_sentencepiece_size\030\016 \001(\005:\007" + "1000000\022\036\n\020shrinking_factor\030\017 \001(\002:\0040.75\022" + "!\n\023max_sentence_length\030\022 \001(\005:\0044192\022\027\n\013nu" + "m_threads\030\020 \001(\005:\00216\022\035\n\022num_sub_iteration" + "s\030\021 \001(\005:\0012\022$\n\030max_sentencepiece_length\030\024" + " \001(\005:\00216\022%\n\027split_by_unicode_script\030\025 \001(" + "\010:\004true\022\035\n\017split_by_number\030\027 \001(\010:\004true\022!" + "\n\023split_by_whitespace\030\026 \001(\010:\004true\022)\n\032tre" + "at_whitespace_as_suffix\030\030 \001(\010:\005false\022+\n\034" + "allow_whitespace_only_pieces\030\032 \001(\010:\005fals" + "e\022\033\n\014split_digits\030\031 \001(\010:\005false\022\027\n\017contro" + "l_symbols\030\036 \003(\t\022\034\n\024user_defined_symbols\030" + "\037 \003(\t\022\026\n\016required_chars\030$ \001(\t\022\034\n\rbyte_fa" + "llback\030# \001(\010:\005false\022+\n\035vocabulary_output" + "_piece_score\030 \001(\010:\004true\022\036\n\020hard_vocab_l" + "imit\030! \001(\010:\004true\022\034\n\ruse_all_vocab\030\" \001(\010:" + "\005false\022\021\n\006unk_id\030( \001(\005:\0010\022\021\n\006bos_id\030) \001(" + "\005:\0011\022\021\n\006eos_id\030* \001(\005:\0012\022\022\n\006pad_id\030+ \001(\005:" + "\002-1\022\030\n\tunk_piece\030- \001(\t:\005\022\026\n\tbos_pie" + "ce\030. \001(\t:\003\022\027\n\teos_piece\030/ \001(\t:\004\022\030" + "\n\tpad_piece\0300 \001(\t:\005\022\032\n\013unk_surface\030" + ", \001(\t:\005 \342\201\207 \022+\n\034train_extremely_large_co" + "rpus\0301 \001(\010:\005false\"5\n\tModelType\022\013\n\007UNIGRA" + "M\020\001\022\007\n\003BPE\020\002\022\010\n\004WORD\020\003\022\010\n\004CHAR\020\004*\t\010\310\001\020\200\200" + "\200\200\002\"\321\001\n\016NormalizerSpec\022\014\n\004name\030\001 \001(\t\022\034\n\024" + "precompiled_charsmap\030\002 \001(\014\022\036\n\020add_dummy_" + "prefix\030\003 \001(\010:\004true\022&\n\030remove_extra_white" + "spaces\030\004 \001(\010:\004true\022 \n\022escape_whitespaces" + "\030\005 \001(\010:\004true\022\036\n\026normalization_rule_tsv\030\006" + " \001(\t*\t\010\310\001\020\200\200\200\200\002\"y\n\014SelfTestData\0223\n\007sampl" + "es\030\001 \003(\0132\".sentencepiece.SelfTestData.Sa" + "mple\032)\n\006Sample\022\r\n\005input\030\001 \001(\t\022\020\n\010expecte" + "d\030\002 \001(\t*\t\010\310\001\020\200\200\200\200\002\"\376\003\n\nModelProto\0227\n\006pie" + "ces\030\001 \003(\0132\'.sentencepiece.ModelProto.Sen" + "tencePiece\0220\n\014trainer_spec\030\002 \001(\0132\032.sente" + "ncepiece.TrainerSpec\0226\n\017normalizer_spec\030" + "\003 \001(\0132\035.sentencepiece.NormalizerSpec\0223\n\016" + "self_test_data\030\004 \001(\0132\033.sentencepiece.Sel" + "fTestData\0228\n\021denormalizer_spec\030\005 \001(\0132\035.s" + "entencepiece.NormalizerSpec\032\322\001\n\rSentence" + "Piece\022\r\n\005piece\030\001 \001(\t\022\r\n\005score\030\002 \001(\002\022B\n\004t" + "ype\030\003 \001(\0162,.sentencepiece.ModelProto.Sen" + "tencePiece.Type:\006NORMAL\"T\n\004Type\022\n\n\006NORMA" + "L\020\001\022\013\n\007UNKNOWN\020\002\022\013\n\007CONTROL\020\003\022\020\n\014USER_DE" + "FINED\020\004\022\010\n\004BYTE\020\006\022\n\n\006UNUSED\020\005*\t\010\310\001\020\200\200\200\200\002" + "*\t\010\310\001\020\200\200\200\200\002B\002H\003" }; descriptor = com.google.protobuf.Descriptors.FileDescriptor .internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] { }); internal_static_sentencepiece_TrainerSpec_descriptor = getDescriptor().getMessageTypes().get(0); internal_static_sentencepiece_TrainerSpec_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_TrainerSpec_descriptor, new java.lang.String[] { "Input", "InputFormat", "ModelPrefix", "ModelType", "VocabSize", "AcceptLanguage", "SelfTestSampleSize", "CharacterCoverage", "InputSentenceSize", "ShuffleInputSentence", "MiningSentenceSize", "TrainingSentenceSize", "SeedSentencepieceSize", "ShrinkingFactor", "MaxSentenceLength", "NumThreads", "NumSubIterations", "MaxSentencepieceLength", "SplitByUnicodeScript", "SplitByNumber", "SplitByWhitespace", "TreatWhitespaceAsSuffix", "AllowWhitespaceOnlyPieces", "SplitDigits", "ControlSymbols", "UserDefinedSymbols", "RequiredChars", "ByteFallback", "VocabularyOutputPieceScore", "HardVocabLimit", "UseAllVocab", "UnkId", "BosId", "EosId", "PadId", "UnkPiece", "BosPiece", "EosPiece", "PadPiece", "UnkSurface", "TrainExtremelyLargeCorpus", }); internal_static_sentencepiece_NormalizerSpec_descriptor = getDescriptor().getMessageTypes().get(1); internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_NormalizerSpec_descriptor, new java.lang.String[] { "Name", "PrecompiledCharsmap", "AddDummyPrefix", "RemoveExtraWhitespaces", "EscapeWhitespaces", "NormalizationRuleTsv", }); internal_static_sentencepiece_SelfTestData_descriptor = getDescriptor().getMessageTypes().get(2); internal_static_sentencepiece_SelfTestData_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_SelfTestData_descriptor, new java.lang.String[] { "Samples", }); internal_static_sentencepiece_SelfTestData_Sample_descriptor = internal_static_sentencepiece_SelfTestData_descriptor.getNestedTypes().get(0); internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_SelfTestData_Sample_descriptor, new java.lang.String[] { "Input", "Expected", }); internal_static_sentencepiece_ModelProto_descriptor = getDescriptor().getMessageTypes().get(3); internal_static_sentencepiece_ModelProto_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_ModelProto_descriptor, new java.lang.String[] { "Pieces", "TrainerSpec", "NormalizerSpec", "SelfTestData", "DenormalizerSpec", }); internal_static_sentencepiece_ModelProto_SentencePiece_descriptor = internal_static_sentencepiece_ModelProto_descriptor.getNestedTypes().get(0); internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable = new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_sentencepiece_ModelProto_SentencePiece_descriptor, new java.lang.String[] { "Piece", "Score", "Type", }); } // @@protoc_insertion_point(outer_class_scope) }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy