sentencepiece.SentencepieceModel Maven / Gradle / Ivy
The newest version!
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: sentencepiece_model.proto
// Protobuf Java Version: 3.25.5
package sentencepiece;
public final class SentencepieceModel {
private SentencepieceModel() {}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface TrainerSpecOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.TrainerSpec)
com.google.protobuf.GeneratedMessageV3.
ExtendableMessageOrBuilder {
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return A list containing the input.
*/
java.util.List
getInputList();
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return The count of input.
*/
int getInputCount();
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the element to return.
* @return The input at the given index.
*/
java.lang.String getInput(int index);
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the value to return.
* @return The bytes of the input at the given index.
*/
com.google.protobuf.ByteString
getInputBytes(int index);
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return Whether the inputFormat field is set.
*/
boolean hasInputFormat();
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The inputFormat.
*/
java.lang.String getInputFormat();
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The bytes for inputFormat.
*/
com.google.protobuf.ByteString
getInputFormatBytes();
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return Whether the modelPrefix field is set.
*/
boolean hasModelPrefix();
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The modelPrefix.
*/
java.lang.String getModelPrefix();
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The bytes for modelPrefix.
*/
com.google.protobuf.ByteString
getModelPrefixBytes();
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return Whether the modelType field is set.
*/
boolean hasModelType();
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return The modelType.
*/
sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType();
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return Whether the vocabSize field is set.
*/
boolean hasVocabSize();
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return The vocabSize.
*/
int getVocabSize();
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return A list containing the acceptLanguage.
*/
java.util.List
getAcceptLanguageList();
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return The count of acceptLanguage.
*/
int getAcceptLanguageCount();
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the element to return.
* @return The acceptLanguage at the given index.
*/
java.lang.String getAcceptLanguage(int index);
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the value to return.
* @return The bytes of the acceptLanguage at the given index.
*/
com.google.protobuf.ByteString
getAcceptLanguageBytes(int index);
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return Whether the selfTestSampleSize field is set.
*/
boolean hasSelfTestSampleSize();
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return The selfTestSampleSize.
*/
int getSelfTestSampleSize();
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return Whether the characterCoverage field is set.
*/
boolean hasCharacterCoverage();
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return The characterCoverage.
*/
float getCharacterCoverage();
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return Whether the inputSentenceSize field is set.
*/
boolean hasInputSentenceSize();
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return The inputSentenceSize.
*/
long getInputSentenceSize();
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return Whether the shuffleInputSentence field is set.
*/
boolean hasShuffleInputSentence();
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return The shuffleInputSentence.
*/
boolean getShuffleInputSentence();
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return Whether the miningSentenceSize field is set.
*/
@java.lang.Deprecated boolean hasMiningSentenceSize();
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return The miningSentenceSize.
*/
@java.lang.Deprecated int getMiningSentenceSize();
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return Whether the trainingSentenceSize field is set.
*/
@java.lang.Deprecated boolean hasTrainingSentenceSize();
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return The trainingSentenceSize.
*/
@java.lang.Deprecated int getTrainingSentenceSize();
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return Whether the seedSentencepieceSize field is set.
*/
boolean hasSeedSentencepieceSize();
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return The seedSentencepieceSize.
*/
int getSeedSentencepieceSize();
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return Whether the shrinkingFactor field is set.
*/
boolean hasShrinkingFactor();
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return The shrinkingFactor.
*/
float getShrinkingFactor();
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return Whether the maxSentenceLength field is set.
*/
boolean hasMaxSentenceLength();
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return The maxSentenceLength.
*/
int getMaxSentenceLength();
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return Whether the numThreads field is set.
*/
boolean hasNumThreads();
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return The numThreads.
*/
int getNumThreads();
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return Whether the numSubIterations field is set.
*/
boolean hasNumSubIterations();
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return The numSubIterations.
*/
int getNumSubIterations();
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return Whether the maxSentencepieceLength field is set.
*/
boolean hasMaxSentencepieceLength();
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return The maxSentencepieceLength.
*/
int getMaxSentencepieceLength();
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return Whether the splitByUnicodeScript field is set.
*/
boolean hasSplitByUnicodeScript();
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return The splitByUnicodeScript.
*/
boolean getSplitByUnicodeScript();
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return Whether the splitByNumber field is set.
*/
boolean hasSplitByNumber();
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return The splitByNumber.
*/
boolean getSplitByNumber();
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return Whether the splitByWhitespace field is set.
*/
boolean hasSplitByWhitespace();
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return The splitByWhitespace.
*/
boolean getSplitByWhitespace();
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return Whether the treatWhitespaceAsSuffix field is set.
*/
boolean hasTreatWhitespaceAsSuffix();
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return The treatWhitespaceAsSuffix.
*/
boolean getTreatWhitespaceAsSuffix();
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return Whether the allowWhitespaceOnlyPieces field is set.
*/
boolean hasAllowWhitespaceOnlyPieces();
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return The allowWhitespaceOnlyPieces.
*/
boolean getAllowWhitespaceOnlyPieces();
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return Whether the splitDigits field is set.
*/
boolean hasSplitDigits();
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return The splitDigits.
*/
boolean getSplitDigits();
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return A list containing the controlSymbols.
*/
java.util.List
getControlSymbolsList();
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return The count of controlSymbols.
*/
int getControlSymbolsCount();
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the element to return.
* @return The controlSymbols at the given index.
*/
java.lang.String getControlSymbols(int index);
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the value to return.
* @return The bytes of the controlSymbols at the given index.
*/
com.google.protobuf.ByteString
getControlSymbolsBytes(int index);
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return A list containing the userDefinedSymbols.
*/
java.util.List
getUserDefinedSymbolsList();
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return The count of userDefinedSymbols.
*/
int getUserDefinedSymbolsCount();
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the element to return.
* @return The userDefinedSymbols at the given index.
*/
java.lang.String getUserDefinedSymbols(int index);
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the value to return.
* @return The bytes of the userDefinedSymbols at the given index.
*/
com.google.protobuf.ByteString
getUserDefinedSymbolsBytes(int index);
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return Whether the requiredChars field is set.
*/
boolean hasRequiredChars();
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The requiredChars.
*/
java.lang.String getRequiredChars();
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The bytes for requiredChars.
*/
com.google.protobuf.ByteString
getRequiredCharsBytes();
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return Whether the byteFallback field is set.
*/
boolean hasByteFallback();
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return The byteFallback.
*/
boolean getByteFallback();
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return Whether the vocabularyOutputPieceScore field is set.
*/
boolean hasVocabularyOutputPieceScore();
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return The vocabularyOutputPieceScore.
*/
boolean getVocabularyOutputPieceScore();
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return Whether the hardVocabLimit field is set.
*/
boolean hasHardVocabLimit();
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return The hardVocabLimit.
*/
boolean getHardVocabLimit();
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return Whether the useAllVocab field is set.
*/
boolean hasUseAllVocab();
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return The useAllVocab.
*/
boolean getUseAllVocab();
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return Whether the unkId field is set.
*/
boolean hasUnkId();
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return The unkId.
*/
int getUnkId();
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return Whether the bosId field is set.
*/
boolean hasBosId();
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return The bosId.
*/
int getBosId();
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return Whether the eosId field is set.
*/
boolean hasEosId();
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return The eosId.
*/
int getEosId();
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return Whether the padId field is set.
*/
boolean hasPadId();
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return The padId.
*/
int getPadId();
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return Whether the unkPiece field is set.
*/
boolean hasUnkPiece();
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The unkPiece.
*/
java.lang.String getUnkPiece();
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The bytes for unkPiece.
*/
com.google.protobuf.ByteString
getUnkPieceBytes();
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return Whether the bosPiece field is set.
*/
boolean hasBosPiece();
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bosPiece.
*/
java.lang.String getBosPiece();
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bytes for bosPiece.
*/
com.google.protobuf.ByteString
getBosPieceBytes();
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return Whether the eosPiece field is set.
*/
boolean hasEosPiece();
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The eosPiece.
*/
java.lang.String getEosPiece();
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The bytes for eosPiece.
*/
com.google.protobuf.ByteString
getEosPieceBytes();
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return Whether the padPiece field is set.
*/
boolean hasPadPiece();
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The padPiece.
*/
java.lang.String getPadPiece();
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The bytes for padPiece.
*/
com.google.protobuf.ByteString
getPadPieceBytes();
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return Whether the unkSurface field is set.
*/
boolean hasUnkSurface();
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The unkSurface.
*/
java.lang.String getUnkSurface();
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The bytes for unkSurface.
*/
com.google.protobuf.ByteString
getUnkSurfaceBytes();
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return Whether the trainExtremelyLargeCorpus field is set.
*/
boolean hasTrainExtremelyLargeCorpus();
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return The trainExtremelyLargeCorpus.
*/
boolean getTrainExtremelyLargeCorpus();
}
/**
*
* TrainerSpec encodes a various parameters for SentencePiece training.
*
*
* Protobuf type {@code sentencepiece.TrainerSpec}
*/
public static final class TrainerSpec extends
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<
TrainerSpec> implements
// @@protoc_insertion_point(message_implements:sentencepiece.TrainerSpec)
TrainerSpecOrBuilder {
private static final long serialVersionUID = 0L;
// Use TrainerSpec.newBuilder() to construct.
private TrainerSpec(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) {
super(builder);
}
private TrainerSpec() {
input_ =
com.google.protobuf.LazyStringArrayList.emptyList();
inputFormat_ = "";
modelPrefix_ = "";
modelType_ = 1;
vocabSize_ = 8000;
acceptLanguage_ =
com.google.protobuf.LazyStringArrayList.emptyList();
characterCoverage_ = 0.9995F;
shuffleInputSentence_ = true;
seedSentencepieceSize_ = 1000000;
shrinkingFactor_ = 0.75F;
maxSentenceLength_ = 4192;
numThreads_ = 16;
numSubIterations_ = 2;
maxSentencepieceLength_ = 16;
splitByUnicodeScript_ = true;
splitByNumber_ = true;
splitByWhitespace_ = true;
controlSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
userDefinedSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
requiredChars_ = "";
vocabularyOutputPieceScore_ = true;
hardVocabLimit_ = true;
bosId_ = 1;
eosId_ = 2;
padId_ = -1;
unkPiece_ = "";
bosPiece_ = "";
eosPiece_ = "";
padPiece_ = "";
unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 ");
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new TrainerSpec();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.TrainerSpec.class, sentencepiece.SentencepieceModel.TrainerSpec.Builder.class);
}
/**
*
* Model type. only have UNIGRAM now.
*
*
* Protobuf enum {@code sentencepiece.TrainerSpec.ModelType}
*/
public enum ModelType
implements com.google.protobuf.ProtocolMessageEnum {
/**
*
* Unigram language model with dynamic algorithm
*
*
* UNIGRAM = 1;
*/
UNIGRAM(1),
/**
*
* Byte Pair Encoding
*
*
* BPE = 2;
*/
BPE(2),
/**
*
* Delimitered by whitespace.
*
*
* WORD = 3;
*/
WORD(3),
/**
*
* tokenizes into character sequence
*
*
* CHAR = 4;
*/
CHAR(4),
;
/**
*
* Unigram language model with dynamic algorithm
*
*
* UNIGRAM = 1;
*/
public static final int UNIGRAM_VALUE = 1;
/**
*
* Byte Pair Encoding
*
*
* BPE = 2;
*/
public static final int BPE_VALUE = 2;
/**
*
* Delimitered by whitespace.
*
*
* WORD = 3;
*/
public static final int WORD_VALUE = 3;
/**
*
* tokenizes into character sequence
*
*
* CHAR = 4;
*/
public static final int CHAR_VALUE = 4;
public final int getNumber() {
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static ModelType valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static ModelType forNumber(int value) {
switch (value) {
case 1: return UNIGRAM;
case 2: return BPE;
case 3: return WORD;
case 4: return CHAR;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
ModelType> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap() {
public ModelType findValueByNumber(int number) {
return ModelType.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.TrainerSpec.getDescriptor().getEnumTypes().get(0);
}
private static final ModelType[] VALUES = values();
public static ModelType valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
return VALUES[desc.getIndex()];
}
private final int value;
private ModelType(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:sentencepiece.TrainerSpec.ModelType)
}
private int bitField0_;
private int bitField1_;
public static final int INPUT_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private com.google.protobuf.LazyStringArrayList input_ =
com.google.protobuf.LazyStringArrayList.emptyList();
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return A list containing the input.
*/
public com.google.protobuf.ProtocolStringList
getInputList() {
return input_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return The count of input.
*/
public int getInputCount() {
return input_.size();
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the element to return.
* @return The input at the given index.
*/
public java.lang.String getInput(int index) {
return input_.get(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the value to return.
* @return The bytes of the input at the given index.
*/
public com.google.protobuf.ByteString
getInputBytes(int index) {
return input_.getByteString(index);
}
public static final int INPUT_FORMAT_FIELD_NUMBER = 7;
@SuppressWarnings("serial")
private volatile java.lang.Object inputFormat_ = "";
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return Whether the inputFormat field is set.
*/
@java.lang.Override
public boolean hasInputFormat() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The inputFormat.
*/
@java.lang.Override
public java.lang.String getInputFormat() {
java.lang.Object ref = inputFormat_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
inputFormat_ = s;
}
return s;
}
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The bytes for inputFormat.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getInputFormatBytes() {
java.lang.Object ref = inputFormat_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
inputFormat_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int MODEL_PREFIX_FIELD_NUMBER = 2;
@SuppressWarnings("serial")
private volatile java.lang.Object modelPrefix_ = "";
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return Whether the modelPrefix field is set.
*/
@java.lang.Override
public boolean hasModelPrefix() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The modelPrefix.
*/
@java.lang.Override
public java.lang.String getModelPrefix() {
java.lang.Object ref = modelPrefix_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
modelPrefix_ = s;
}
return s;
}
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The bytes for modelPrefix.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getModelPrefixBytes() {
java.lang.Object ref = modelPrefix_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
modelPrefix_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int MODEL_TYPE_FIELD_NUMBER = 3;
private int modelType_ = 1;
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return Whether the modelType field is set.
*/
@java.lang.Override public boolean hasModelType() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return The modelType.
*/
@java.lang.Override public sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType() {
sentencepiece.SentencepieceModel.TrainerSpec.ModelType result = sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(modelType_);
return result == null ? sentencepiece.SentencepieceModel.TrainerSpec.ModelType.UNIGRAM : result;
}
public static final int VOCAB_SIZE_FIELD_NUMBER = 4;
private int vocabSize_ = 8000;
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return Whether the vocabSize field is set.
*/
@java.lang.Override
public boolean hasVocabSize() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return The vocabSize.
*/
@java.lang.Override
public int getVocabSize() {
return vocabSize_;
}
public static final int ACCEPT_LANGUAGE_FIELD_NUMBER = 5;
@SuppressWarnings("serial")
private com.google.protobuf.LazyStringArrayList acceptLanguage_ =
com.google.protobuf.LazyStringArrayList.emptyList();
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return A list containing the acceptLanguage.
*/
public com.google.protobuf.ProtocolStringList
getAcceptLanguageList() {
return acceptLanguage_;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return The count of acceptLanguage.
*/
public int getAcceptLanguageCount() {
return acceptLanguage_.size();
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the element to return.
* @return The acceptLanguage at the given index.
*/
public java.lang.String getAcceptLanguage(int index) {
return acceptLanguage_.get(index);
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the value to return.
* @return The bytes of the acceptLanguage at the given index.
*/
public com.google.protobuf.ByteString
getAcceptLanguageBytes(int index) {
return acceptLanguage_.getByteString(index);
}
public static final int SELF_TEST_SAMPLE_SIZE_FIELD_NUMBER = 6;
private int selfTestSampleSize_ = 0;
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return Whether the selfTestSampleSize field is set.
*/
@java.lang.Override
public boolean hasSelfTestSampleSize() {
return ((bitField0_ & 0x00000010) != 0);
}
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return The selfTestSampleSize.
*/
@java.lang.Override
public int getSelfTestSampleSize() {
return selfTestSampleSize_;
}
public static final int CHARACTER_COVERAGE_FIELD_NUMBER = 10;
private float characterCoverage_ = 0.9995F;
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return Whether the characterCoverage field is set.
*/
@java.lang.Override
public boolean hasCharacterCoverage() {
return ((bitField0_ & 0x00000020) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return The characterCoverage.
*/
@java.lang.Override
public float getCharacterCoverage() {
return characterCoverage_;
}
public static final int INPUT_SENTENCE_SIZE_FIELD_NUMBER = 11;
private long inputSentenceSize_ = 0L;
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return Whether the inputSentenceSize field is set.
*/
@java.lang.Override
public boolean hasInputSentenceSize() {
return ((bitField0_ & 0x00000040) != 0);
}
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return The inputSentenceSize.
*/
@java.lang.Override
public long getInputSentenceSize() {
return inputSentenceSize_;
}
public static final int SHUFFLE_INPUT_SENTENCE_FIELD_NUMBER = 19;
private boolean shuffleInputSentence_ = true;
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return Whether the shuffleInputSentence field is set.
*/
@java.lang.Override
public boolean hasShuffleInputSentence() {
return ((bitField0_ & 0x00000080) != 0);
}
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return The shuffleInputSentence.
*/
@java.lang.Override
public boolean getShuffleInputSentence() {
return shuffleInputSentence_;
}
public static final int MINING_SENTENCE_SIZE_FIELD_NUMBER = 12;
private int miningSentenceSize_ = 0;
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return Whether the miningSentenceSize field is set.
*/
@java.lang.Override
@java.lang.Deprecated public boolean hasMiningSentenceSize() {
return ((bitField0_ & 0x00000100) != 0);
}
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return The miningSentenceSize.
*/
@java.lang.Override
@java.lang.Deprecated public int getMiningSentenceSize() {
return miningSentenceSize_;
}
public static final int TRAINING_SENTENCE_SIZE_FIELD_NUMBER = 13;
private int trainingSentenceSize_ = 0;
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return Whether the trainingSentenceSize field is set.
*/
@java.lang.Override
@java.lang.Deprecated public boolean hasTrainingSentenceSize() {
return ((bitField0_ & 0x00000200) != 0);
}
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return The trainingSentenceSize.
*/
@java.lang.Override
@java.lang.Deprecated public int getTrainingSentenceSize() {
return trainingSentenceSize_;
}
public static final int SEED_SENTENCEPIECE_SIZE_FIELD_NUMBER = 14;
private int seedSentencepieceSize_ = 1000000;
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return Whether the seedSentencepieceSize field is set.
*/
@java.lang.Override
public boolean hasSeedSentencepieceSize() {
return ((bitField0_ & 0x00000400) != 0);
}
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return The seedSentencepieceSize.
*/
@java.lang.Override
public int getSeedSentencepieceSize() {
return seedSentencepieceSize_;
}
public static final int SHRINKING_FACTOR_FIELD_NUMBER = 15;
private float shrinkingFactor_ = 0.75F;
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return Whether the shrinkingFactor field is set.
*/
@java.lang.Override
public boolean hasShrinkingFactor() {
return ((bitField0_ & 0x00000800) != 0);
}
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return The shrinkingFactor.
*/
@java.lang.Override
public float getShrinkingFactor() {
return shrinkingFactor_;
}
public static final int MAX_SENTENCE_LENGTH_FIELD_NUMBER = 18;
private int maxSentenceLength_ = 4192;
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return Whether the maxSentenceLength field is set.
*/
@java.lang.Override
public boolean hasMaxSentenceLength() {
return ((bitField0_ & 0x00001000) != 0);
}
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return The maxSentenceLength.
*/
@java.lang.Override
public int getMaxSentenceLength() {
return maxSentenceLength_;
}
public static final int NUM_THREADS_FIELD_NUMBER = 16;
private int numThreads_ = 16;
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return Whether the numThreads field is set.
*/
@java.lang.Override
public boolean hasNumThreads() {
return ((bitField0_ & 0x00002000) != 0);
}
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return The numThreads.
*/
@java.lang.Override
public int getNumThreads() {
return numThreads_;
}
public static final int NUM_SUB_ITERATIONS_FIELD_NUMBER = 17;
private int numSubIterations_ = 2;
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return Whether the numSubIterations field is set.
*/
@java.lang.Override
public boolean hasNumSubIterations() {
return ((bitField0_ & 0x00004000) != 0);
}
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return The numSubIterations.
*/
@java.lang.Override
public int getNumSubIterations() {
return numSubIterations_;
}
public static final int MAX_SENTENCEPIECE_LENGTH_FIELD_NUMBER = 20;
private int maxSentencepieceLength_ = 16;
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return Whether the maxSentencepieceLength field is set.
*/
@java.lang.Override
public boolean hasMaxSentencepieceLength() {
return ((bitField0_ & 0x00008000) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return The maxSentencepieceLength.
*/
@java.lang.Override
public int getMaxSentencepieceLength() {
return maxSentencepieceLength_;
}
public static final int SPLIT_BY_UNICODE_SCRIPT_FIELD_NUMBER = 21;
private boolean splitByUnicodeScript_ = true;
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return Whether the splitByUnicodeScript field is set.
*/
@java.lang.Override
public boolean hasSplitByUnicodeScript() {
return ((bitField0_ & 0x00010000) != 0);
}
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return The splitByUnicodeScript.
*/
@java.lang.Override
public boolean getSplitByUnicodeScript() {
return splitByUnicodeScript_;
}
public static final int SPLIT_BY_NUMBER_FIELD_NUMBER = 23;
private boolean splitByNumber_ = true;
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return Whether the splitByNumber field is set.
*/
@java.lang.Override
public boolean hasSplitByNumber() {
return ((bitField0_ & 0x00020000) != 0);
}
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return The splitByNumber.
*/
@java.lang.Override
public boolean getSplitByNumber() {
return splitByNumber_;
}
public static final int SPLIT_BY_WHITESPACE_FIELD_NUMBER = 22;
private boolean splitByWhitespace_ = true;
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return Whether the splitByWhitespace field is set.
*/
@java.lang.Override
public boolean hasSplitByWhitespace() {
return ((bitField0_ & 0x00040000) != 0);
}
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return The splitByWhitespace.
*/
@java.lang.Override
public boolean getSplitByWhitespace() {
return splitByWhitespace_;
}
public static final int TREAT_WHITESPACE_AS_SUFFIX_FIELD_NUMBER = 24;
private boolean treatWhitespaceAsSuffix_ = false;
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return Whether the treatWhitespaceAsSuffix field is set.
*/
@java.lang.Override
public boolean hasTreatWhitespaceAsSuffix() {
return ((bitField0_ & 0x00080000) != 0);
}
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return The treatWhitespaceAsSuffix.
*/
@java.lang.Override
public boolean getTreatWhitespaceAsSuffix() {
return treatWhitespaceAsSuffix_;
}
public static final int ALLOW_WHITESPACE_ONLY_PIECES_FIELD_NUMBER = 26;
private boolean allowWhitespaceOnlyPieces_ = false;
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return Whether the allowWhitespaceOnlyPieces field is set.
*/
@java.lang.Override
public boolean hasAllowWhitespaceOnlyPieces() {
return ((bitField0_ & 0x00100000) != 0);
}
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return The allowWhitespaceOnlyPieces.
*/
@java.lang.Override
public boolean getAllowWhitespaceOnlyPieces() {
return allowWhitespaceOnlyPieces_;
}
public static final int SPLIT_DIGITS_FIELD_NUMBER = 25;
private boolean splitDigits_ = false;
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return Whether the splitDigits field is set.
*/
@java.lang.Override
public boolean hasSplitDigits() {
return ((bitField0_ & 0x00200000) != 0);
}
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return The splitDigits.
*/
@java.lang.Override
public boolean getSplitDigits() {
return splitDigits_;
}
public static final int CONTROL_SYMBOLS_FIELD_NUMBER = 30;
@SuppressWarnings("serial")
private com.google.protobuf.LazyStringArrayList controlSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return A list containing the controlSymbols.
*/
public com.google.protobuf.ProtocolStringList
getControlSymbolsList() {
return controlSymbols_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return The count of controlSymbols.
*/
public int getControlSymbolsCount() {
return controlSymbols_.size();
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the element to return.
* @return The controlSymbols at the given index.
*/
public java.lang.String getControlSymbols(int index) {
return controlSymbols_.get(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the value to return.
* @return The bytes of the controlSymbols at the given index.
*/
public com.google.protobuf.ByteString
getControlSymbolsBytes(int index) {
return controlSymbols_.getByteString(index);
}
public static final int USER_DEFINED_SYMBOLS_FIELD_NUMBER = 31;
@SuppressWarnings("serial")
private com.google.protobuf.LazyStringArrayList userDefinedSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return A list containing the userDefinedSymbols.
*/
public com.google.protobuf.ProtocolStringList
getUserDefinedSymbolsList() {
return userDefinedSymbols_;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return The count of userDefinedSymbols.
*/
public int getUserDefinedSymbolsCount() {
return userDefinedSymbols_.size();
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the element to return.
* @return The userDefinedSymbols at the given index.
*/
public java.lang.String getUserDefinedSymbols(int index) {
return userDefinedSymbols_.get(index);
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the value to return.
* @return The bytes of the userDefinedSymbols at the given index.
*/
public com.google.protobuf.ByteString
getUserDefinedSymbolsBytes(int index) {
return userDefinedSymbols_.getByteString(index);
}
public static final int REQUIRED_CHARS_FIELD_NUMBER = 36;
@SuppressWarnings("serial")
private volatile java.lang.Object requiredChars_ = "";
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return Whether the requiredChars field is set.
*/
@java.lang.Override
public boolean hasRequiredChars() {
return ((bitField0_ & 0x00400000) != 0);
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The requiredChars.
*/
@java.lang.Override
public java.lang.String getRequiredChars() {
java.lang.Object ref = requiredChars_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
requiredChars_ = s;
}
return s;
}
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The bytes for requiredChars.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getRequiredCharsBytes() {
java.lang.Object ref = requiredChars_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
requiredChars_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int BYTE_FALLBACK_FIELD_NUMBER = 35;
private boolean byteFallback_ = false;
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return Whether the byteFallback field is set.
*/
@java.lang.Override
public boolean hasByteFallback() {
return ((bitField0_ & 0x00800000) != 0);
}
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return The byteFallback.
*/
@java.lang.Override
public boolean getByteFallback() {
return byteFallback_;
}
public static final int VOCABULARY_OUTPUT_PIECE_SCORE_FIELD_NUMBER = 32;
private boolean vocabularyOutputPieceScore_ = true;
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return Whether the vocabularyOutputPieceScore field is set.
*/
@java.lang.Override
public boolean hasVocabularyOutputPieceScore() {
return ((bitField0_ & 0x01000000) != 0);
}
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return The vocabularyOutputPieceScore.
*/
@java.lang.Override
public boolean getVocabularyOutputPieceScore() {
return vocabularyOutputPieceScore_;
}
public static final int HARD_VOCAB_LIMIT_FIELD_NUMBER = 33;
private boolean hardVocabLimit_ = true;
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return Whether the hardVocabLimit field is set.
*/
@java.lang.Override
public boolean hasHardVocabLimit() {
return ((bitField0_ & 0x02000000) != 0);
}
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return The hardVocabLimit.
*/
@java.lang.Override
public boolean getHardVocabLimit() {
return hardVocabLimit_;
}
public static final int USE_ALL_VOCAB_FIELD_NUMBER = 34;
private boolean useAllVocab_ = false;
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return Whether the useAllVocab field is set.
*/
@java.lang.Override
public boolean hasUseAllVocab() {
return ((bitField0_ & 0x04000000) != 0);
}
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return The useAllVocab.
*/
@java.lang.Override
public boolean getUseAllVocab() {
return useAllVocab_;
}
public static final int UNK_ID_FIELD_NUMBER = 40;
private int unkId_ = 0;
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return Whether the unkId field is set.
*/
@java.lang.Override
public boolean hasUnkId() {
return ((bitField0_ & 0x08000000) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return The unkId.
*/
@java.lang.Override
public int getUnkId() {
return unkId_;
}
public static final int BOS_ID_FIELD_NUMBER = 41;
private int bosId_ = 1;
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return Whether the bosId field is set.
*/
@java.lang.Override
public boolean hasBosId() {
return ((bitField0_ & 0x10000000) != 0);
}
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return The bosId.
*/
@java.lang.Override
public int getBosId() {
return bosId_;
}
public static final int EOS_ID_FIELD_NUMBER = 42;
private int eosId_ = 2;
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return Whether the eosId field is set.
*/
@java.lang.Override
public boolean hasEosId() {
return ((bitField0_ & 0x20000000) != 0);
}
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return The eosId.
*/
@java.lang.Override
public int getEosId() {
return eosId_;
}
public static final int PAD_ID_FIELD_NUMBER = 43;
private int padId_ = -1;
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return Whether the padId field is set.
*/
@java.lang.Override
public boolean hasPadId() {
return ((bitField0_ & 0x40000000) != 0);
}
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return The padId.
*/
@java.lang.Override
public int getPadId() {
return padId_;
}
public static final int UNK_PIECE_FIELD_NUMBER = 45;
@SuppressWarnings("serial")
private volatile java.lang.Object unkPiece_ = "";
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return Whether the unkPiece field is set.
*/
@java.lang.Override
public boolean hasUnkPiece() {
return ((bitField0_ & 0x80000000) != 0);
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The unkPiece.
*/
@java.lang.Override
public java.lang.String getUnkPiece() {
java.lang.Object ref = unkPiece_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
unkPiece_ = s;
}
return s;
}
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The bytes for unkPiece.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getUnkPieceBytes() {
java.lang.Object ref = unkPiece_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
unkPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int BOS_PIECE_FIELD_NUMBER = 46;
@SuppressWarnings("serial")
private volatile java.lang.Object bosPiece_ = "";
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return Whether the bosPiece field is set.
*/
@java.lang.Override
public boolean hasBosPiece() {
return ((bitField1_ & 0x00000001) != 0);
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bosPiece.
*/
@java.lang.Override
public java.lang.String getBosPiece() {
java.lang.Object ref = bosPiece_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
bosPiece_ = s;
}
return s;
}
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bytes for bosPiece.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getBosPieceBytes() {
java.lang.Object ref = bosPiece_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
bosPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int EOS_PIECE_FIELD_NUMBER = 47;
@SuppressWarnings("serial")
private volatile java.lang.Object eosPiece_ = "";
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return Whether the eosPiece field is set.
*/
@java.lang.Override
public boolean hasEosPiece() {
return ((bitField1_ & 0x00000002) != 0);
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The eosPiece.
*/
@java.lang.Override
public java.lang.String getEosPiece() {
java.lang.Object ref = eosPiece_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
eosPiece_ = s;
}
return s;
}
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The bytes for eosPiece.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getEosPieceBytes() {
java.lang.Object ref = eosPiece_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
eosPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int PAD_PIECE_FIELD_NUMBER = 48;
@SuppressWarnings("serial")
private volatile java.lang.Object padPiece_ = "";
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return Whether the padPiece field is set.
*/
@java.lang.Override
public boolean hasPadPiece() {
return ((bitField1_ & 0x00000004) != 0);
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The padPiece.
*/
@java.lang.Override
public java.lang.String getPadPiece() {
java.lang.Object ref = padPiece_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
padPiece_ = s;
}
return s;
}
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The bytes for padPiece.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getPadPieceBytes() {
java.lang.Object ref = padPiece_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
padPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int UNK_SURFACE_FIELD_NUMBER = 44;
@SuppressWarnings("serial")
private volatile java.lang.Object unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 ");
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return Whether the unkSurface field is set.
*/
@java.lang.Override
public boolean hasUnkSurface() {
return ((bitField1_ & 0x00000008) != 0);
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The unkSurface.
*/
@java.lang.Override
public java.lang.String getUnkSurface() {
java.lang.Object ref = unkSurface_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
unkSurface_ = s;
}
return s;
}
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The bytes for unkSurface.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getUnkSurfaceBytes() {
java.lang.Object ref = unkSurface_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
unkSurface_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int TRAIN_EXTREMELY_LARGE_CORPUS_FIELD_NUMBER = 49;
private boolean trainExtremelyLargeCorpus_ = false;
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return Whether the trainExtremelyLargeCorpus field is set.
*/
@java.lang.Override
public boolean hasTrainExtremelyLargeCorpus() {
return ((bitField1_ & 0x00000010) != 0);
}
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return The trainExtremelyLargeCorpus.
*/
@java.lang.Override
public boolean getTrainExtremelyLargeCorpus() {
return trainExtremelyLargeCorpus_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
if (!extensionsAreInitialized()) {
memoizedIsInitialized = 0;
return false;
}
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
com.google.protobuf.GeneratedMessageV3
.ExtendableMessage.ExtensionWriter
extensionWriter = newExtensionWriter();
for (int i = 0; i < input_.size(); i++) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 1, input_.getRaw(i));
}
if (((bitField0_ & 0x00000002) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 2, modelPrefix_);
}
if (((bitField0_ & 0x00000004) != 0)) {
output.writeEnum(3, modelType_);
}
if (((bitField0_ & 0x00000008) != 0)) {
output.writeInt32(4, vocabSize_);
}
for (int i = 0; i < acceptLanguage_.size(); i++) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 5, acceptLanguage_.getRaw(i));
}
if (((bitField0_ & 0x00000010) != 0)) {
output.writeInt32(6, selfTestSampleSize_);
}
if (((bitField0_ & 0x00000001) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 7, inputFormat_);
}
if (((bitField0_ & 0x00000020) != 0)) {
output.writeFloat(10, characterCoverage_);
}
if (((bitField0_ & 0x00000040) != 0)) {
output.writeUInt64(11, inputSentenceSize_);
}
if (((bitField0_ & 0x00000100) != 0)) {
output.writeInt32(12, miningSentenceSize_);
}
if (((bitField0_ & 0x00000200) != 0)) {
output.writeInt32(13, trainingSentenceSize_);
}
if (((bitField0_ & 0x00000400) != 0)) {
output.writeInt32(14, seedSentencepieceSize_);
}
if (((bitField0_ & 0x00000800) != 0)) {
output.writeFloat(15, shrinkingFactor_);
}
if (((bitField0_ & 0x00002000) != 0)) {
output.writeInt32(16, numThreads_);
}
if (((bitField0_ & 0x00004000) != 0)) {
output.writeInt32(17, numSubIterations_);
}
if (((bitField0_ & 0x00001000) != 0)) {
output.writeInt32(18, maxSentenceLength_);
}
if (((bitField0_ & 0x00000080) != 0)) {
output.writeBool(19, shuffleInputSentence_);
}
if (((bitField0_ & 0x00008000) != 0)) {
output.writeInt32(20, maxSentencepieceLength_);
}
if (((bitField0_ & 0x00010000) != 0)) {
output.writeBool(21, splitByUnicodeScript_);
}
if (((bitField0_ & 0x00040000) != 0)) {
output.writeBool(22, splitByWhitespace_);
}
if (((bitField0_ & 0x00020000) != 0)) {
output.writeBool(23, splitByNumber_);
}
if (((bitField0_ & 0x00080000) != 0)) {
output.writeBool(24, treatWhitespaceAsSuffix_);
}
if (((bitField0_ & 0x00200000) != 0)) {
output.writeBool(25, splitDigits_);
}
if (((bitField0_ & 0x00100000) != 0)) {
output.writeBool(26, allowWhitespaceOnlyPieces_);
}
for (int i = 0; i < controlSymbols_.size(); i++) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 30, controlSymbols_.getRaw(i));
}
for (int i = 0; i < userDefinedSymbols_.size(); i++) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 31, userDefinedSymbols_.getRaw(i));
}
if (((bitField0_ & 0x01000000) != 0)) {
output.writeBool(32, vocabularyOutputPieceScore_);
}
if (((bitField0_ & 0x02000000) != 0)) {
output.writeBool(33, hardVocabLimit_);
}
if (((bitField0_ & 0x04000000) != 0)) {
output.writeBool(34, useAllVocab_);
}
if (((bitField0_ & 0x00800000) != 0)) {
output.writeBool(35, byteFallback_);
}
if (((bitField0_ & 0x00400000) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 36, requiredChars_);
}
if (((bitField0_ & 0x08000000) != 0)) {
output.writeInt32(40, unkId_);
}
if (((bitField0_ & 0x10000000) != 0)) {
output.writeInt32(41, bosId_);
}
if (((bitField0_ & 0x20000000) != 0)) {
output.writeInt32(42, eosId_);
}
if (((bitField0_ & 0x40000000) != 0)) {
output.writeInt32(43, padId_);
}
if (((bitField1_ & 0x00000008) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 44, unkSurface_);
}
if (((bitField0_ & 0x80000000) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 45, unkPiece_);
}
if (((bitField1_ & 0x00000001) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 46, bosPiece_);
}
if (((bitField1_ & 0x00000002) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 47, eosPiece_);
}
if (((bitField1_ & 0x00000004) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 48, padPiece_);
}
if (((bitField1_ & 0x00000010) != 0)) {
output.writeBool(49, trainExtremelyLargeCorpus_);
}
extensionWriter.writeUntil(536870912, output);
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
{
int dataSize = 0;
for (int i = 0; i < input_.size(); i++) {
dataSize += computeStringSizeNoTag(input_.getRaw(i));
}
size += dataSize;
size += 1 * getInputList().size();
}
if (((bitField0_ & 0x00000002) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, modelPrefix_);
}
if (((bitField0_ & 0x00000004) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeEnumSize(3, modelType_);
}
if (((bitField0_ & 0x00000008) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(4, vocabSize_);
}
{
int dataSize = 0;
for (int i = 0; i < acceptLanguage_.size(); i++) {
dataSize += computeStringSizeNoTag(acceptLanguage_.getRaw(i));
}
size += dataSize;
size += 1 * getAcceptLanguageList().size();
}
if (((bitField0_ & 0x00000010) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(6, selfTestSampleSize_);
}
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(7, inputFormat_);
}
if (((bitField0_ & 0x00000020) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeFloatSize(10, characterCoverage_);
}
if (((bitField0_ & 0x00000040) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeUInt64Size(11, inputSentenceSize_);
}
if (((bitField0_ & 0x00000100) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(12, miningSentenceSize_);
}
if (((bitField0_ & 0x00000200) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(13, trainingSentenceSize_);
}
if (((bitField0_ & 0x00000400) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(14, seedSentencepieceSize_);
}
if (((bitField0_ & 0x00000800) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeFloatSize(15, shrinkingFactor_);
}
if (((bitField0_ & 0x00002000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(16, numThreads_);
}
if (((bitField0_ & 0x00004000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(17, numSubIterations_);
}
if (((bitField0_ & 0x00001000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(18, maxSentenceLength_);
}
if (((bitField0_ & 0x00000080) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(19, shuffleInputSentence_);
}
if (((bitField0_ & 0x00008000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(20, maxSentencepieceLength_);
}
if (((bitField0_ & 0x00010000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(21, splitByUnicodeScript_);
}
if (((bitField0_ & 0x00040000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(22, splitByWhitespace_);
}
if (((bitField0_ & 0x00020000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(23, splitByNumber_);
}
if (((bitField0_ & 0x00080000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(24, treatWhitespaceAsSuffix_);
}
if (((bitField0_ & 0x00200000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(25, splitDigits_);
}
if (((bitField0_ & 0x00100000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(26, allowWhitespaceOnlyPieces_);
}
{
int dataSize = 0;
for (int i = 0; i < controlSymbols_.size(); i++) {
dataSize += computeStringSizeNoTag(controlSymbols_.getRaw(i));
}
size += dataSize;
size += 2 * getControlSymbolsList().size();
}
{
int dataSize = 0;
for (int i = 0; i < userDefinedSymbols_.size(); i++) {
dataSize += computeStringSizeNoTag(userDefinedSymbols_.getRaw(i));
}
size += dataSize;
size += 2 * getUserDefinedSymbolsList().size();
}
if (((bitField0_ & 0x01000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(32, vocabularyOutputPieceScore_);
}
if (((bitField0_ & 0x02000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(33, hardVocabLimit_);
}
if (((bitField0_ & 0x04000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(34, useAllVocab_);
}
if (((bitField0_ & 0x00800000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(35, byteFallback_);
}
if (((bitField0_ & 0x00400000) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(36, requiredChars_);
}
if (((bitField0_ & 0x08000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(40, unkId_);
}
if (((bitField0_ & 0x10000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(41, bosId_);
}
if (((bitField0_ & 0x20000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(42, eosId_);
}
if (((bitField0_ & 0x40000000) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeInt32Size(43, padId_);
}
if (((bitField1_ & 0x00000008) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(44, unkSurface_);
}
if (((bitField0_ & 0x80000000) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(45, unkPiece_);
}
if (((bitField1_ & 0x00000001) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(46, bosPiece_);
}
if (((bitField1_ & 0x00000002) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(47, eosPiece_);
}
if (((bitField1_ & 0x00000004) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(48, padPiece_);
}
if (((bitField1_ & 0x00000010) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(49, trainExtremelyLargeCorpus_);
}
size += extensionsSerializedSize();
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.TrainerSpec)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.TrainerSpec other = (sentencepiece.SentencepieceModel.TrainerSpec) obj;
if (!getInputList()
.equals(other.getInputList())) return false;
if (hasInputFormat() != other.hasInputFormat()) return false;
if (hasInputFormat()) {
if (!getInputFormat()
.equals(other.getInputFormat())) return false;
}
if (hasModelPrefix() != other.hasModelPrefix()) return false;
if (hasModelPrefix()) {
if (!getModelPrefix()
.equals(other.getModelPrefix())) return false;
}
if (hasModelType() != other.hasModelType()) return false;
if (hasModelType()) {
if (modelType_ != other.modelType_) return false;
}
if (hasVocabSize() != other.hasVocabSize()) return false;
if (hasVocabSize()) {
if (getVocabSize()
!= other.getVocabSize()) return false;
}
if (!getAcceptLanguageList()
.equals(other.getAcceptLanguageList())) return false;
if (hasSelfTestSampleSize() != other.hasSelfTestSampleSize()) return false;
if (hasSelfTestSampleSize()) {
if (getSelfTestSampleSize()
!= other.getSelfTestSampleSize()) return false;
}
if (hasCharacterCoverage() != other.hasCharacterCoverage()) return false;
if (hasCharacterCoverage()) {
if (java.lang.Float.floatToIntBits(getCharacterCoverage())
!= java.lang.Float.floatToIntBits(
other.getCharacterCoverage())) return false;
}
if (hasInputSentenceSize() != other.hasInputSentenceSize()) return false;
if (hasInputSentenceSize()) {
if (getInputSentenceSize()
!= other.getInputSentenceSize()) return false;
}
if (hasShuffleInputSentence() != other.hasShuffleInputSentence()) return false;
if (hasShuffleInputSentence()) {
if (getShuffleInputSentence()
!= other.getShuffleInputSentence()) return false;
}
if (hasMiningSentenceSize() != other.hasMiningSentenceSize()) return false;
if (hasMiningSentenceSize()) {
if (getMiningSentenceSize()
!= other.getMiningSentenceSize()) return false;
}
if (hasTrainingSentenceSize() != other.hasTrainingSentenceSize()) return false;
if (hasTrainingSentenceSize()) {
if (getTrainingSentenceSize()
!= other.getTrainingSentenceSize()) return false;
}
if (hasSeedSentencepieceSize() != other.hasSeedSentencepieceSize()) return false;
if (hasSeedSentencepieceSize()) {
if (getSeedSentencepieceSize()
!= other.getSeedSentencepieceSize()) return false;
}
if (hasShrinkingFactor() != other.hasShrinkingFactor()) return false;
if (hasShrinkingFactor()) {
if (java.lang.Float.floatToIntBits(getShrinkingFactor())
!= java.lang.Float.floatToIntBits(
other.getShrinkingFactor())) return false;
}
if (hasMaxSentenceLength() != other.hasMaxSentenceLength()) return false;
if (hasMaxSentenceLength()) {
if (getMaxSentenceLength()
!= other.getMaxSentenceLength()) return false;
}
if (hasNumThreads() != other.hasNumThreads()) return false;
if (hasNumThreads()) {
if (getNumThreads()
!= other.getNumThreads()) return false;
}
if (hasNumSubIterations() != other.hasNumSubIterations()) return false;
if (hasNumSubIterations()) {
if (getNumSubIterations()
!= other.getNumSubIterations()) return false;
}
if (hasMaxSentencepieceLength() != other.hasMaxSentencepieceLength()) return false;
if (hasMaxSentencepieceLength()) {
if (getMaxSentencepieceLength()
!= other.getMaxSentencepieceLength()) return false;
}
if (hasSplitByUnicodeScript() != other.hasSplitByUnicodeScript()) return false;
if (hasSplitByUnicodeScript()) {
if (getSplitByUnicodeScript()
!= other.getSplitByUnicodeScript()) return false;
}
if (hasSplitByNumber() != other.hasSplitByNumber()) return false;
if (hasSplitByNumber()) {
if (getSplitByNumber()
!= other.getSplitByNumber()) return false;
}
if (hasSplitByWhitespace() != other.hasSplitByWhitespace()) return false;
if (hasSplitByWhitespace()) {
if (getSplitByWhitespace()
!= other.getSplitByWhitespace()) return false;
}
if (hasTreatWhitespaceAsSuffix() != other.hasTreatWhitespaceAsSuffix()) return false;
if (hasTreatWhitespaceAsSuffix()) {
if (getTreatWhitespaceAsSuffix()
!= other.getTreatWhitespaceAsSuffix()) return false;
}
if (hasAllowWhitespaceOnlyPieces() != other.hasAllowWhitespaceOnlyPieces()) return false;
if (hasAllowWhitespaceOnlyPieces()) {
if (getAllowWhitespaceOnlyPieces()
!= other.getAllowWhitespaceOnlyPieces()) return false;
}
if (hasSplitDigits() != other.hasSplitDigits()) return false;
if (hasSplitDigits()) {
if (getSplitDigits()
!= other.getSplitDigits()) return false;
}
if (!getControlSymbolsList()
.equals(other.getControlSymbolsList())) return false;
if (!getUserDefinedSymbolsList()
.equals(other.getUserDefinedSymbolsList())) return false;
if (hasRequiredChars() != other.hasRequiredChars()) return false;
if (hasRequiredChars()) {
if (!getRequiredChars()
.equals(other.getRequiredChars())) return false;
}
if (hasByteFallback() != other.hasByteFallback()) return false;
if (hasByteFallback()) {
if (getByteFallback()
!= other.getByteFallback()) return false;
}
if (hasVocabularyOutputPieceScore() != other.hasVocabularyOutputPieceScore()) return false;
if (hasVocabularyOutputPieceScore()) {
if (getVocabularyOutputPieceScore()
!= other.getVocabularyOutputPieceScore()) return false;
}
if (hasHardVocabLimit() != other.hasHardVocabLimit()) return false;
if (hasHardVocabLimit()) {
if (getHardVocabLimit()
!= other.getHardVocabLimit()) return false;
}
if (hasUseAllVocab() != other.hasUseAllVocab()) return false;
if (hasUseAllVocab()) {
if (getUseAllVocab()
!= other.getUseAllVocab()) return false;
}
if (hasUnkId() != other.hasUnkId()) return false;
if (hasUnkId()) {
if (getUnkId()
!= other.getUnkId()) return false;
}
if (hasBosId() != other.hasBosId()) return false;
if (hasBosId()) {
if (getBosId()
!= other.getBosId()) return false;
}
if (hasEosId() != other.hasEosId()) return false;
if (hasEosId()) {
if (getEosId()
!= other.getEosId()) return false;
}
if (hasPadId() != other.hasPadId()) return false;
if (hasPadId()) {
if (getPadId()
!= other.getPadId()) return false;
}
if (hasUnkPiece() != other.hasUnkPiece()) return false;
if (hasUnkPiece()) {
if (!getUnkPiece()
.equals(other.getUnkPiece())) return false;
}
if (hasBosPiece() != other.hasBosPiece()) return false;
if (hasBosPiece()) {
if (!getBosPiece()
.equals(other.getBosPiece())) return false;
}
if (hasEosPiece() != other.hasEosPiece()) return false;
if (hasEosPiece()) {
if (!getEosPiece()
.equals(other.getEosPiece())) return false;
}
if (hasPadPiece() != other.hasPadPiece()) return false;
if (hasPadPiece()) {
if (!getPadPiece()
.equals(other.getPadPiece())) return false;
}
if (hasUnkSurface() != other.hasUnkSurface()) return false;
if (hasUnkSurface()) {
if (!getUnkSurface()
.equals(other.getUnkSurface())) return false;
}
if (hasTrainExtremelyLargeCorpus() != other.hasTrainExtremelyLargeCorpus()) return false;
if (hasTrainExtremelyLargeCorpus()) {
if (getTrainExtremelyLargeCorpus()
!= other.getTrainExtremelyLargeCorpus()) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
if (!getExtensionFields().equals(other.getExtensionFields()))
return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (getInputCount() > 0) {
hash = (37 * hash) + INPUT_FIELD_NUMBER;
hash = (53 * hash) + getInputList().hashCode();
}
if (hasInputFormat()) {
hash = (37 * hash) + INPUT_FORMAT_FIELD_NUMBER;
hash = (53 * hash) + getInputFormat().hashCode();
}
if (hasModelPrefix()) {
hash = (37 * hash) + MODEL_PREFIX_FIELD_NUMBER;
hash = (53 * hash) + getModelPrefix().hashCode();
}
if (hasModelType()) {
hash = (37 * hash) + MODEL_TYPE_FIELD_NUMBER;
hash = (53 * hash) + modelType_;
}
if (hasVocabSize()) {
hash = (37 * hash) + VOCAB_SIZE_FIELD_NUMBER;
hash = (53 * hash) + getVocabSize();
}
if (getAcceptLanguageCount() > 0) {
hash = (37 * hash) + ACCEPT_LANGUAGE_FIELD_NUMBER;
hash = (53 * hash) + getAcceptLanguageList().hashCode();
}
if (hasSelfTestSampleSize()) {
hash = (37 * hash) + SELF_TEST_SAMPLE_SIZE_FIELD_NUMBER;
hash = (53 * hash) + getSelfTestSampleSize();
}
if (hasCharacterCoverage()) {
hash = (37 * hash) + CHARACTER_COVERAGE_FIELD_NUMBER;
hash = (53 * hash) + java.lang.Float.floatToIntBits(
getCharacterCoverage());
}
if (hasInputSentenceSize()) {
hash = (37 * hash) + INPUT_SENTENCE_SIZE_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashLong(
getInputSentenceSize());
}
if (hasShuffleInputSentence()) {
hash = (37 * hash) + SHUFFLE_INPUT_SENTENCE_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getShuffleInputSentence());
}
if (hasMiningSentenceSize()) {
hash = (37 * hash) + MINING_SENTENCE_SIZE_FIELD_NUMBER;
hash = (53 * hash) + getMiningSentenceSize();
}
if (hasTrainingSentenceSize()) {
hash = (37 * hash) + TRAINING_SENTENCE_SIZE_FIELD_NUMBER;
hash = (53 * hash) + getTrainingSentenceSize();
}
if (hasSeedSentencepieceSize()) {
hash = (37 * hash) + SEED_SENTENCEPIECE_SIZE_FIELD_NUMBER;
hash = (53 * hash) + getSeedSentencepieceSize();
}
if (hasShrinkingFactor()) {
hash = (37 * hash) + SHRINKING_FACTOR_FIELD_NUMBER;
hash = (53 * hash) + java.lang.Float.floatToIntBits(
getShrinkingFactor());
}
if (hasMaxSentenceLength()) {
hash = (37 * hash) + MAX_SENTENCE_LENGTH_FIELD_NUMBER;
hash = (53 * hash) + getMaxSentenceLength();
}
if (hasNumThreads()) {
hash = (37 * hash) + NUM_THREADS_FIELD_NUMBER;
hash = (53 * hash) + getNumThreads();
}
if (hasNumSubIterations()) {
hash = (37 * hash) + NUM_SUB_ITERATIONS_FIELD_NUMBER;
hash = (53 * hash) + getNumSubIterations();
}
if (hasMaxSentencepieceLength()) {
hash = (37 * hash) + MAX_SENTENCEPIECE_LENGTH_FIELD_NUMBER;
hash = (53 * hash) + getMaxSentencepieceLength();
}
if (hasSplitByUnicodeScript()) {
hash = (37 * hash) + SPLIT_BY_UNICODE_SCRIPT_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getSplitByUnicodeScript());
}
if (hasSplitByNumber()) {
hash = (37 * hash) + SPLIT_BY_NUMBER_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getSplitByNumber());
}
if (hasSplitByWhitespace()) {
hash = (37 * hash) + SPLIT_BY_WHITESPACE_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getSplitByWhitespace());
}
if (hasTreatWhitespaceAsSuffix()) {
hash = (37 * hash) + TREAT_WHITESPACE_AS_SUFFIX_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getTreatWhitespaceAsSuffix());
}
if (hasAllowWhitespaceOnlyPieces()) {
hash = (37 * hash) + ALLOW_WHITESPACE_ONLY_PIECES_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getAllowWhitespaceOnlyPieces());
}
if (hasSplitDigits()) {
hash = (37 * hash) + SPLIT_DIGITS_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getSplitDigits());
}
if (getControlSymbolsCount() > 0) {
hash = (37 * hash) + CONTROL_SYMBOLS_FIELD_NUMBER;
hash = (53 * hash) + getControlSymbolsList().hashCode();
}
if (getUserDefinedSymbolsCount() > 0) {
hash = (37 * hash) + USER_DEFINED_SYMBOLS_FIELD_NUMBER;
hash = (53 * hash) + getUserDefinedSymbolsList().hashCode();
}
if (hasRequiredChars()) {
hash = (37 * hash) + REQUIRED_CHARS_FIELD_NUMBER;
hash = (53 * hash) + getRequiredChars().hashCode();
}
if (hasByteFallback()) {
hash = (37 * hash) + BYTE_FALLBACK_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getByteFallback());
}
if (hasVocabularyOutputPieceScore()) {
hash = (37 * hash) + VOCABULARY_OUTPUT_PIECE_SCORE_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getVocabularyOutputPieceScore());
}
if (hasHardVocabLimit()) {
hash = (37 * hash) + HARD_VOCAB_LIMIT_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getHardVocabLimit());
}
if (hasUseAllVocab()) {
hash = (37 * hash) + USE_ALL_VOCAB_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getUseAllVocab());
}
if (hasUnkId()) {
hash = (37 * hash) + UNK_ID_FIELD_NUMBER;
hash = (53 * hash) + getUnkId();
}
if (hasBosId()) {
hash = (37 * hash) + BOS_ID_FIELD_NUMBER;
hash = (53 * hash) + getBosId();
}
if (hasEosId()) {
hash = (37 * hash) + EOS_ID_FIELD_NUMBER;
hash = (53 * hash) + getEosId();
}
if (hasPadId()) {
hash = (37 * hash) + PAD_ID_FIELD_NUMBER;
hash = (53 * hash) + getPadId();
}
if (hasUnkPiece()) {
hash = (37 * hash) + UNK_PIECE_FIELD_NUMBER;
hash = (53 * hash) + getUnkPiece().hashCode();
}
if (hasBosPiece()) {
hash = (37 * hash) + BOS_PIECE_FIELD_NUMBER;
hash = (53 * hash) + getBosPiece().hashCode();
}
if (hasEosPiece()) {
hash = (37 * hash) + EOS_PIECE_FIELD_NUMBER;
hash = (53 * hash) + getEosPiece().hashCode();
}
if (hasPadPiece()) {
hash = (37 * hash) + PAD_PIECE_FIELD_NUMBER;
hash = (53 * hash) + getPadPiece().hashCode();
}
if (hasUnkSurface()) {
hash = (37 * hash) + UNK_SURFACE_FIELD_NUMBER;
hash = (53 * hash) + getUnkSurface().hashCode();
}
if (hasTrainExtremelyLargeCorpus()) {
hash = (37 * hash) + TRAIN_EXTREMELY_LARGE_CORPUS_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getTrainExtremelyLargeCorpus());
}
hash = hashFields(hash, getExtensionFields());
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.TrainerSpec parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.TrainerSpec prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
*
* TrainerSpec encodes a various parameters for SentencePiece training.
*
*
* Protobuf type {@code sentencepiece.TrainerSpec}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<
sentencepiece.SentencepieceModel.TrainerSpec, Builder> implements
// @@protoc_insertion_point(builder_implements:sentencepiece.TrainerSpec)
sentencepiece.SentencepieceModel.TrainerSpecOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.TrainerSpec.class, sentencepiece.SentencepieceModel.TrainerSpec.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.TrainerSpec.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
bitField1_ = 0;
input_ =
com.google.protobuf.LazyStringArrayList.emptyList();
inputFormat_ = "";
modelPrefix_ = "";
modelType_ = 1;
vocabSize_ = 8000;
acceptLanguage_ =
com.google.protobuf.LazyStringArrayList.emptyList();
selfTestSampleSize_ = 0;
characterCoverage_ = 0.9995F;
inputSentenceSize_ = 0L;
shuffleInputSentence_ = true;
miningSentenceSize_ = 0;
trainingSentenceSize_ = 0;
seedSentencepieceSize_ = 1000000;
shrinkingFactor_ = 0.75F;
maxSentenceLength_ = 4192;
numThreads_ = 16;
numSubIterations_ = 2;
maxSentencepieceLength_ = 16;
splitByUnicodeScript_ = true;
splitByNumber_ = true;
splitByWhitespace_ = true;
treatWhitespaceAsSuffix_ = false;
allowWhitespaceOnlyPieces_ = false;
splitDigits_ = false;
controlSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
userDefinedSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
requiredChars_ = "";
byteFallback_ = false;
vocabularyOutputPieceScore_ = true;
hardVocabLimit_ = true;
useAllVocab_ = false;
unkId_ = 0;
bosId_ = 1;
eosId_ = 2;
padId_ = -1;
unkPiece_ = "";
bosPiece_ = "";
eosPiece_ = "";
padPiece_ = "";
unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 ");
trainExtremelyLargeCorpus_ = false;
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_TrainerSpec_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec build() {
sentencepiece.SentencepieceModel.TrainerSpec result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec buildPartial() {
sentencepiece.SentencepieceModel.TrainerSpec result = new sentencepiece.SentencepieceModel.TrainerSpec(this);
if (bitField0_ != 0) { buildPartial0(result); }
if (bitField1_ != 0) { buildPartial1(result); }
onBuilt();
return result;
}
private void buildPartial0(sentencepiece.SentencepieceModel.TrainerSpec result) {
int from_bitField0_ = bitField0_;
if (((from_bitField0_ & 0x00000001) != 0)) {
input_.makeImmutable();
result.input_ = input_;
}
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000002) != 0)) {
result.inputFormat_ = inputFormat_;
to_bitField0_ |= 0x00000001;
}
if (((from_bitField0_ & 0x00000004) != 0)) {
result.modelPrefix_ = modelPrefix_;
to_bitField0_ |= 0x00000002;
}
if (((from_bitField0_ & 0x00000008) != 0)) {
result.modelType_ = modelType_;
to_bitField0_ |= 0x00000004;
}
if (((from_bitField0_ & 0x00000010) != 0)) {
result.vocabSize_ = vocabSize_;
to_bitField0_ |= 0x00000008;
}
if (((from_bitField0_ & 0x00000020) != 0)) {
acceptLanguage_.makeImmutable();
result.acceptLanguage_ = acceptLanguage_;
}
if (((from_bitField0_ & 0x00000040) != 0)) {
result.selfTestSampleSize_ = selfTestSampleSize_;
to_bitField0_ |= 0x00000010;
}
if (((from_bitField0_ & 0x00000080) != 0)) {
result.characterCoverage_ = characterCoverage_;
to_bitField0_ |= 0x00000020;
}
if (((from_bitField0_ & 0x00000100) != 0)) {
result.inputSentenceSize_ = inputSentenceSize_;
to_bitField0_ |= 0x00000040;
}
if (((from_bitField0_ & 0x00000200) != 0)) {
result.shuffleInputSentence_ = shuffleInputSentence_;
to_bitField0_ |= 0x00000080;
}
if (((from_bitField0_ & 0x00000400) != 0)) {
result.miningSentenceSize_ = miningSentenceSize_;
to_bitField0_ |= 0x00000100;
}
if (((from_bitField0_ & 0x00000800) != 0)) {
result.trainingSentenceSize_ = trainingSentenceSize_;
to_bitField0_ |= 0x00000200;
}
if (((from_bitField0_ & 0x00001000) != 0)) {
result.seedSentencepieceSize_ = seedSentencepieceSize_;
to_bitField0_ |= 0x00000400;
}
if (((from_bitField0_ & 0x00002000) != 0)) {
result.shrinkingFactor_ = shrinkingFactor_;
to_bitField0_ |= 0x00000800;
}
if (((from_bitField0_ & 0x00004000) != 0)) {
result.maxSentenceLength_ = maxSentenceLength_;
to_bitField0_ |= 0x00001000;
}
if (((from_bitField0_ & 0x00008000) != 0)) {
result.numThreads_ = numThreads_;
to_bitField0_ |= 0x00002000;
}
if (((from_bitField0_ & 0x00010000) != 0)) {
result.numSubIterations_ = numSubIterations_;
to_bitField0_ |= 0x00004000;
}
if (((from_bitField0_ & 0x00020000) != 0)) {
result.maxSentencepieceLength_ = maxSentencepieceLength_;
to_bitField0_ |= 0x00008000;
}
if (((from_bitField0_ & 0x00040000) != 0)) {
result.splitByUnicodeScript_ = splitByUnicodeScript_;
to_bitField0_ |= 0x00010000;
}
if (((from_bitField0_ & 0x00080000) != 0)) {
result.splitByNumber_ = splitByNumber_;
to_bitField0_ |= 0x00020000;
}
if (((from_bitField0_ & 0x00100000) != 0)) {
result.splitByWhitespace_ = splitByWhitespace_;
to_bitField0_ |= 0x00040000;
}
if (((from_bitField0_ & 0x00200000) != 0)) {
result.treatWhitespaceAsSuffix_ = treatWhitespaceAsSuffix_;
to_bitField0_ |= 0x00080000;
}
if (((from_bitField0_ & 0x00400000) != 0)) {
result.allowWhitespaceOnlyPieces_ = allowWhitespaceOnlyPieces_;
to_bitField0_ |= 0x00100000;
}
if (((from_bitField0_ & 0x00800000) != 0)) {
result.splitDigits_ = splitDigits_;
to_bitField0_ |= 0x00200000;
}
if (((from_bitField0_ & 0x01000000) != 0)) {
controlSymbols_.makeImmutable();
result.controlSymbols_ = controlSymbols_;
}
if (((from_bitField0_ & 0x02000000) != 0)) {
userDefinedSymbols_.makeImmutable();
result.userDefinedSymbols_ = userDefinedSymbols_;
}
if (((from_bitField0_ & 0x04000000) != 0)) {
result.requiredChars_ = requiredChars_;
to_bitField0_ |= 0x00400000;
}
if (((from_bitField0_ & 0x08000000) != 0)) {
result.byteFallback_ = byteFallback_;
to_bitField0_ |= 0x00800000;
}
if (((from_bitField0_ & 0x10000000) != 0)) {
result.vocabularyOutputPieceScore_ = vocabularyOutputPieceScore_;
to_bitField0_ |= 0x01000000;
}
if (((from_bitField0_ & 0x20000000) != 0)) {
result.hardVocabLimit_ = hardVocabLimit_;
to_bitField0_ |= 0x02000000;
}
if (((from_bitField0_ & 0x40000000) != 0)) {
result.useAllVocab_ = useAllVocab_;
to_bitField0_ |= 0x04000000;
}
if (((from_bitField0_ & 0x80000000) != 0)) {
result.unkId_ = unkId_;
to_bitField0_ |= 0x08000000;
}
result.bitField0_ |= to_bitField0_;
}
private void buildPartial1(sentencepiece.SentencepieceModel.TrainerSpec result) {
int from_bitField1_ = bitField1_;
int to_bitField0_ = 0;
if (((from_bitField1_ & 0x00000001) != 0)) {
result.bosId_ = bosId_;
to_bitField0_ |= 0x10000000;
}
if (((from_bitField1_ & 0x00000002) != 0)) {
result.eosId_ = eosId_;
to_bitField0_ |= 0x20000000;
}
if (((from_bitField1_ & 0x00000004) != 0)) {
result.padId_ = padId_;
to_bitField0_ |= 0x40000000;
}
if (((from_bitField1_ & 0x00000008) != 0)) {
result.unkPiece_ = unkPiece_;
to_bitField0_ |= 0x80000000;
}
int to_bitField1_ = 0;
if (((from_bitField1_ & 0x00000010) != 0)) {
result.bosPiece_ = bosPiece_;
to_bitField1_ |= 0x00000001;
}
if (((from_bitField1_ & 0x00000020) != 0)) {
result.eosPiece_ = eosPiece_;
to_bitField1_ |= 0x00000002;
}
if (((from_bitField1_ & 0x00000040) != 0)) {
result.padPiece_ = padPiece_;
to_bitField1_ |= 0x00000004;
}
if (((from_bitField1_ & 0x00000080) != 0)) {
result.unkSurface_ = unkSurface_;
to_bitField1_ |= 0x00000008;
}
if (((from_bitField1_ & 0x00000100) != 0)) {
result.trainExtremelyLargeCorpus_ = trainExtremelyLargeCorpus_;
to_bitField1_ |= 0x00000010;
}
result.bitField0_ |= to_bitField0_;
result.bitField1_ |= to_bitField1_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.TrainerSpec, Type> extension,
Type value) {
return super.setExtension(extension, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.TrainerSpec, java.util.List> extension,
int index, Type value) {
return super.setExtension(extension, index, value);
}
@java.lang.Override
public Builder addExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.TrainerSpec, java.util.List> extension,
Type value) {
return super.addExtension(extension, value);
}
@java.lang.Override
public Builder clearExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.TrainerSpec, T> extension) {
return super.clearExtension(extension);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.TrainerSpec) {
return mergeFrom((sentencepiece.SentencepieceModel.TrainerSpec)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.TrainerSpec other) {
if (other == sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance()) return this;
if (!other.input_.isEmpty()) {
if (input_.isEmpty()) {
input_ = other.input_;
bitField0_ |= 0x00000001;
} else {
ensureInputIsMutable();
input_.addAll(other.input_);
}
onChanged();
}
if (other.hasInputFormat()) {
inputFormat_ = other.inputFormat_;
bitField0_ |= 0x00000002;
onChanged();
}
if (other.hasModelPrefix()) {
modelPrefix_ = other.modelPrefix_;
bitField0_ |= 0x00000004;
onChanged();
}
if (other.hasModelType()) {
setModelType(other.getModelType());
}
if (other.hasVocabSize()) {
setVocabSize(other.getVocabSize());
}
if (!other.acceptLanguage_.isEmpty()) {
if (acceptLanguage_.isEmpty()) {
acceptLanguage_ = other.acceptLanguage_;
bitField0_ |= 0x00000020;
} else {
ensureAcceptLanguageIsMutable();
acceptLanguage_.addAll(other.acceptLanguage_);
}
onChanged();
}
if (other.hasSelfTestSampleSize()) {
setSelfTestSampleSize(other.getSelfTestSampleSize());
}
if (other.hasCharacterCoverage()) {
setCharacterCoverage(other.getCharacterCoverage());
}
if (other.hasInputSentenceSize()) {
setInputSentenceSize(other.getInputSentenceSize());
}
if (other.hasShuffleInputSentence()) {
setShuffleInputSentence(other.getShuffleInputSentence());
}
if (other.hasMiningSentenceSize()) {
setMiningSentenceSize(other.getMiningSentenceSize());
}
if (other.hasTrainingSentenceSize()) {
setTrainingSentenceSize(other.getTrainingSentenceSize());
}
if (other.hasSeedSentencepieceSize()) {
setSeedSentencepieceSize(other.getSeedSentencepieceSize());
}
if (other.hasShrinkingFactor()) {
setShrinkingFactor(other.getShrinkingFactor());
}
if (other.hasMaxSentenceLength()) {
setMaxSentenceLength(other.getMaxSentenceLength());
}
if (other.hasNumThreads()) {
setNumThreads(other.getNumThreads());
}
if (other.hasNumSubIterations()) {
setNumSubIterations(other.getNumSubIterations());
}
if (other.hasMaxSentencepieceLength()) {
setMaxSentencepieceLength(other.getMaxSentencepieceLength());
}
if (other.hasSplitByUnicodeScript()) {
setSplitByUnicodeScript(other.getSplitByUnicodeScript());
}
if (other.hasSplitByNumber()) {
setSplitByNumber(other.getSplitByNumber());
}
if (other.hasSplitByWhitespace()) {
setSplitByWhitespace(other.getSplitByWhitespace());
}
if (other.hasTreatWhitespaceAsSuffix()) {
setTreatWhitespaceAsSuffix(other.getTreatWhitespaceAsSuffix());
}
if (other.hasAllowWhitespaceOnlyPieces()) {
setAllowWhitespaceOnlyPieces(other.getAllowWhitespaceOnlyPieces());
}
if (other.hasSplitDigits()) {
setSplitDigits(other.getSplitDigits());
}
if (!other.controlSymbols_.isEmpty()) {
if (controlSymbols_.isEmpty()) {
controlSymbols_ = other.controlSymbols_;
bitField0_ |= 0x01000000;
} else {
ensureControlSymbolsIsMutable();
controlSymbols_.addAll(other.controlSymbols_);
}
onChanged();
}
if (!other.userDefinedSymbols_.isEmpty()) {
if (userDefinedSymbols_.isEmpty()) {
userDefinedSymbols_ = other.userDefinedSymbols_;
bitField0_ |= 0x02000000;
} else {
ensureUserDefinedSymbolsIsMutable();
userDefinedSymbols_.addAll(other.userDefinedSymbols_);
}
onChanged();
}
if (other.hasRequiredChars()) {
requiredChars_ = other.requiredChars_;
bitField0_ |= 0x04000000;
onChanged();
}
if (other.hasByteFallback()) {
setByteFallback(other.getByteFallback());
}
if (other.hasVocabularyOutputPieceScore()) {
setVocabularyOutputPieceScore(other.getVocabularyOutputPieceScore());
}
if (other.hasHardVocabLimit()) {
setHardVocabLimit(other.getHardVocabLimit());
}
if (other.hasUseAllVocab()) {
setUseAllVocab(other.getUseAllVocab());
}
if (other.hasUnkId()) {
setUnkId(other.getUnkId());
}
if (other.hasBosId()) {
setBosId(other.getBosId());
}
if (other.hasEosId()) {
setEosId(other.getEosId());
}
if (other.hasPadId()) {
setPadId(other.getPadId());
}
if (other.hasUnkPiece()) {
unkPiece_ = other.unkPiece_;
bitField1_ |= 0x00000008;
onChanged();
}
if (other.hasBosPiece()) {
bosPiece_ = other.bosPiece_;
bitField1_ |= 0x00000010;
onChanged();
}
if (other.hasEosPiece()) {
eosPiece_ = other.eosPiece_;
bitField1_ |= 0x00000020;
onChanged();
}
if (other.hasPadPiece()) {
padPiece_ = other.padPiece_;
bitField1_ |= 0x00000040;
onChanged();
}
if (other.hasUnkSurface()) {
unkSurface_ = other.unkSurface_;
bitField1_ |= 0x00000080;
onChanged();
}
if (other.hasTrainExtremelyLargeCorpus()) {
setTrainExtremelyLargeCorpus(other.getTrainExtremelyLargeCorpus());
}
this.mergeExtensionFields(other);
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
if (!extensionsAreInitialized()) {
return false;
}
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
com.google.protobuf.ByteString bs = input.readBytes();
ensureInputIsMutable();
input_.add(bs);
break;
} // case 10
case 18: {
modelPrefix_ = input.readBytes();
bitField0_ |= 0x00000004;
break;
} // case 18
case 24: {
int tmpRaw = input.readEnum();
sentencepiece.SentencepieceModel.TrainerSpec.ModelType tmpValue =
sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(tmpRaw);
if (tmpValue == null) {
mergeUnknownVarintField(3, tmpRaw);
} else {
modelType_ = tmpRaw;
bitField0_ |= 0x00000008;
}
break;
} // case 24
case 32: {
vocabSize_ = input.readInt32();
bitField0_ |= 0x00000010;
break;
} // case 32
case 42: {
com.google.protobuf.ByteString bs = input.readBytes();
ensureAcceptLanguageIsMutable();
acceptLanguage_.add(bs);
break;
} // case 42
case 48: {
selfTestSampleSize_ = input.readInt32();
bitField0_ |= 0x00000040;
break;
} // case 48
case 58: {
inputFormat_ = input.readBytes();
bitField0_ |= 0x00000002;
break;
} // case 58
case 85: {
characterCoverage_ = input.readFloat();
bitField0_ |= 0x00000080;
break;
} // case 85
case 88: {
inputSentenceSize_ = input.readUInt64();
bitField0_ |= 0x00000100;
break;
} // case 88
case 96: {
miningSentenceSize_ = input.readInt32();
bitField0_ |= 0x00000400;
break;
} // case 96
case 104: {
trainingSentenceSize_ = input.readInt32();
bitField0_ |= 0x00000800;
break;
} // case 104
case 112: {
seedSentencepieceSize_ = input.readInt32();
bitField0_ |= 0x00001000;
break;
} // case 112
case 125: {
shrinkingFactor_ = input.readFloat();
bitField0_ |= 0x00002000;
break;
} // case 125
case 128: {
numThreads_ = input.readInt32();
bitField0_ |= 0x00008000;
break;
} // case 128
case 136: {
numSubIterations_ = input.readInt32();
bitField0_ |= 0x00010000;
break;
} // case 136
case 144: {
maxSentenceLength_ = input.readInt32();
bitField0_ |= 0x00004000;
break;
} // case 144
case 152: {
shuffleInputSentence_ = input.readBool();
bitField0_ |= 0x00000200;
break;
} // case 152
case 160: {
maxSentencepieceLength_ = input.readInt32();
bitField0_ |= 0x00020000;
break;
} // case 160
case 168: {
splitByUnicodeScript_ = input.readBool();
bitField0_ |= 0x00040000;
break;
} // case 168
case 176: {
splitByWhitespace_ = input.readBool();
bitField0_ |= 0x00100000;
break;
} // case 176
case 184: {
splitByNumber_ = input.readBool();
bitField0_ |= 0x00080000;
break;
} // case 184
case 192: {
treatWhitespaceAsSuffix_ = input.readBool();
bitField0_ |= 0x00200000;
break;
} // case 192
case 200: {
splitDigits_ = input.readBool();
bitField0_ |= 0x00800000;
break;
} // case 200
case 208: {
allowWhitespaceOnlyPieces_ = input.readBool();
bitField0_ |= 0x00400000;
break;
} // case 208
case 242: {
com.google.protobuf.ByteString bs = input.readBytes();
ensureControlSymbolsIsMutable();
controlSymbols_.add(bs);
break;
} // case 242
case 250: {
com.google.protobuf.ByteString bs = input.readBytes();
ensureUserDefinedSymbolsIsMutable();
userDefinedSymbols_.add(bs);
break;
} // case 250
case 256: {
vocabularyOutputPieceScore_ = input.readBool();
bitField0_ |= 0x10000000;
break;
} // case 256
case 264: {
hardVocabLimit_ = input.readBool();
bitField0_ |= 0x20000000;
break;
} // case 264
case 272: {
useAllVocab_ = input.readBool();
bitField0_ |= 0x40000000;
break;
} // case 272
case 280: {
byteFallback_ = input.readBool();
bitField0_ |= 0x08000000;
break;
} // case 280
case 290: {
requiredChars_ = input.readBytes();
bitField0_ |= 0x04000000;
break;
} // case 290
case 320: {
unkId_ = input.readInt32();
bitField0_ |= 0x80000000;
break;
} // case 320
case 328: {
bosId_ = input.readInt32();
bitField1_ |= 0x00000001;
break;
} // case 328
case 336: {
eosId_ = input.readInt32();
bitField1_ |= 0x00000002;
break;
} // case 336
case 344: {
padId_ = input.readInt32();
bitField1_ |= 0x00000004;
break;
} // case 344
case 354: {
unkSurface_ = input.readBytes();
bitField1_ |= 0x00000080;
break;
} // case 354
case 362: {
unkPiece_ = input.readBytes();
bitField1_ |= 0x00000008;
break;
} // case 362
case 370: {
bosPiece_ = input.readBytes();
bitField1_ |= 0x00000010;
break;
} // case 370
case 378: {
eosPiece_ = input.readBytes();
bitField1_ |= 0x00000020;
break;
} // case 378
case 386: {
padPiece_ = input.readBytes();
bitField1_ |= 0x00000040;
break;
} // case 386
case 392: {
trainExtremelyLargeCorpus_ = input.readBool();
bitField1_ |= 0x00000100;
break;
} // case 392
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private int bitField1_;
private com.google.protobuf.LazyStringArrayList input_ =
com.google.protobuf.LazyStringArrayList.emptyList();
private void ensureInputIsMutable() {
if (!input_.isModifiable()) {
input_ = new com.google.protobuf.LazyStringArrayList(input_);
}
bitField0_ |= 0x00000001;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return A list containing the input.
*/
public com.google.protobuf.ProtocolStringList
getInputList() {
input_.makeImmutable();
return input_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return The count of input.
*/
public int getInputCount() {
return input_.size();
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the element to return.
* @return The input at the given index.
*/
public java.lang.String getInput(int index) {
return input_.get(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index of the value to return.
* @return The bytes of the input at the given index.
*/
public com.google.protobuf.ByteString
getInputBytes(int index) {
return input_.getByteString(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param index The index to set the value at.
* @param value The input to set.
* @return This builder for chaining.
*/
public Builder setInput(
int index, java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureInputIsMutable();
input_.set(index, value);
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param value The input to add.
* @return This builder for chaining.
*/
public Builder addInput(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureInputIsMutable();
input_.add(value);
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param values The input to add.
* @return This builder for chaining.
*/
public Builder addAllInput(
java.lang.Iterable values) {
ensureInputIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, input_);
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @return This builder for chaining.
*/
public Builder clearInput() {
input_ =
com.google.protobuf.LazyStringArrayList.emptyList();
bitField0_ = (bitField0_ & ~0x00000001);;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* General parameters
*
* Input corpus files.
* Trainer accepts the following two formats:
* A) Monolingual: plain text, one sentence per line.
* B) Bilingual: TSV, source sentence <tab> target sentence
* When bilingual data is passed, shared vocabulary model is built.
* Note that the input file must be raw corpus, not a preprocessed corpus.
* Trainer only loads the first `input_sentence_size` sentences specified
* with this parameter.
*
*
* repeated string input = 1;
* @param value The bytes of the input to add.
* @return This builder for chaining.
*/
public Builder addInputBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
ensureInputIsMutable();
input_.add(value);
bitField0_ |= 0x00000001;
onChanged();
return this;
}
private java.lang.Object inputFormat_ = "";
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return Whether the inputFormat field is set.
*/
public boolean hasInputFormat() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The inputFormat.
*/
public java.lang.String getInputFormat() {
java.lang.Object ref = inputFormat_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
inputFormat_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return The bytes for inputFormat.
*/
public com.google.protobuf.ByteString
getInputFormatBytes() {
java.lang.Object ref = inputFormat_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
inputFormat_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @param value The inputFormat to set.
* @return This builder for chaining.
*/
public Builder setInputFormat(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
inputFormat_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @return This builder for chaining.
*/
public Builder clearInputFormat() {
inputFormat_ = getDefaultInstance().getInputFormat();
bitField0_ = (bitField0_ & ~0x00000002);
onChanged();
return this;
}
/**
*
* Input corpus format:
* "text": one-sentence-per-line text format (default)
* "tsv": sentence <tab> freq
*
*
* optional string input_format = 7;
* @param value The bytes for inputFormat to set.
* @return This builder for chaining.
*/
public Builder setInputFormatBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
inputFormat_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
private java.lang.Object modelPrefix_ = "";
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return Whether the modelPrefix field is set.
*/
public boolean hasModelPrefix() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The modelPrefix.
*/
public java.lang.String getModelPrefix() {
java.lang.Object ref = modelPrefix_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
modelPrefix_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return The bytes for modelPrefix.
*/
public com.google.protobuf.ByteString
getModelPrefixBytes() {
java.lang.Object ref = modelPrefix_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
modelPrefix_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @param value The modelPrefix to set.
* @return This builder for chaining.
*/
public Builder setModelPrefix(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
modelPrefix_ = value;
bitField0_ |= 0x00000004;
onChanged();
return this;
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @return This builder for chaining.
*/
public Builder clearModelPrefix() {
modelPrefix_ = getDefaultInstance().getModelPrefix();
bitField0_ = (bitField0_ & ~0x00000004);
onChanged();
return this;
}
/**
*
* Output model file prefix.
* <model_prefix>.model and <model_prefix>.vocab are generated.
*
*
* optional string model_prefix = 2;
* @param value The bytes for modelPrefix to set.
* @return This builder for chaining.
*/
public Builder setModelPrefixBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
modelPrefix_ = value;
bitField0_ |= 0x00000004;
onChanged();
return this;
}
private int modelType_ = 1;
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return Whether the modelType field is set.
*/
@java.lang.Override public boolean hasModelType() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return The modelType.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec.ModelType getModelType() {
sentencepiece.SentencepieceModel.TrainerSpec.ModelType result = sentencepiece.SentencepieceModel.TrainerSpec.ModelType.forNumber(modelType_);
return result == null ? sentencepiece.SentencepieceModel.TrainerSpec.ModelType.UNIGRAM : result;
}
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @param value The modelType to set.
* @return This builder for chaining.
*/
public Builder setModelType(sentencepiece.SentencepieceModel.TrainerSpec.ModelType value) {
if (value == null) {
throw new NullPointerException();
}
bitField0_ |= 0x00000008;
modelType_ = value.getNumber();
onChanged();
return this;
}
/**
* optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
* @return This builder for chaining.
*/
public Builder clearModelType() {
bitField0_ = (bitField0_ & ~0x00000008);
modelType_ = 1;
onChanged();
return this;
}
private int vocabSize_ = 8000;
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return Whether the vocabSize field is set.
*/
@java.lang.Override
public boolean hasVocabSize() {
return ((bitField0_ & 0x00000010) != 0);
}
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return The vocabSize.
*/
@java.lang.Override
public int getVocabSize() {
return vocabSize_;
}
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @param value The vocabSize to set.
* @return This builder for chaining.
*/
public Builder setVocabSize(int value) {
vocabSize_ = value;
bitField0_ |= 0x00000010;
onChanged();
return this;
}
/**
*
* Vocabulary size. 8k is the default size.
*
*
* optional int32 vocab_size = 4 [default = 8000];
* @return This builder for chaining.
*/
public Builder clearVocabSize() {
bitField0_ = (bitField0_ & ~0x00000010);
vocabSize_ = 8000;
onChanged();
return this;
}
private com.google.protobuf.LazyStringArrayList acceptLanguage_ =
com.google.protobuf.LazyStringArrayList.emptyList();
private void ensureAcceptLanguageIsMutable() {
if (!acceptLanguage_.isModifiable()) {
acceptLanguage_ = new com.google.protobuf.LazyStringArrayList(acceptLanguage_);
}
bitField0_ |= 0x00000020;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return A list containing the acceptLanguage.
*/
public com.google.protobuf.ProtocolStringList
getAcceptLanguageList() {
acceptLanguage_.makeImmutable();
return acceptLanguage_;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return The count of acceptLanguage.
*/
public int getAcceptLanguageCount() {
return acceptLanguage_.size();
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the element to return.
* @return The acceptLanguage at the given index.
*/
public java.lang.String getAcceptLanguage(int index) {
return acceptLanguage_.get(index);
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index of the value to return.
* @return The bytes of the acceptLanguage at the given index.
*/
public com.google.protobuf.ByteString
getAcceptLanguageBytes(int index) {
return acceptLanguage_.getByteString(index);
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param index The index to set the value at.
* @param value The acceptLanguage to set.
* @return This builder for chaining.
*/
public Builder setAcceptLanguage(
int index, java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureAcceptLanguageIsMutable();
acceptLanguage_.set(index, value);
bitField0_ |= 0x00000020;
onChanged();
return this;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param value The acceptLanguage to add.
* @return This builder for chaining.
*/
public Builder addAcceptLanguage(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureAcceptLanguageIsMutable();
acceptLanguage_.add(value);
bitField0_ |= 0x00000020;
onChanged();
return this;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param values The acceptLanguage to add.
* @return This builder for chaining.
*/
public Builder addAllAcceptLanguage(
java.lang.Iterable values) {
ensureAcceptLanguageIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, acceptLanguage_);
bitField0_ |= 0x00000020;
onChanged();
return this;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @return This builder for chaining.
*/
public Builder clearAcceptLanguage() {
acceptLanguage_ =
com.google.protobuf.LazyStringArrayList.emptyList();
bitField0_ = (bitField0_ & ~0x00000020);;
onChanged();
return this;
}
/**
*
* List of the languages this model can accept.
* Since the model is language-agnostic, this field is used as a reference.
*
*
* repeated string accept_language = 5;
* @param value The bytes of the acceptLanguage to add.
* @return This builder for chaining.
*/
public Builder addAcceptLanguageBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
ensureAcceptLanguageIsMutable();
acceptLanguage_.add(value);
bitField0_ |= 0x00000020;
onChanged();
return this;
}
private int selfTestSampleSize_ ;
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return Whether the selfTestSampleSize field is set.
*/
@java.lang.Override
public boolean hasSelfTestSampleSize() {
return ((bitField0_ & 0x00000040) != 0);
}
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return The selfTestSampleSize.
*/
@java.lang.Override
public int getSelfTestSampleSize() {
return selfTestSampleSize_;
}
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @param value The selfTestSampleSize to set.
* @return This builder for chaining.
*/
public Builder setSelfTestSampleSize(int value) {
selfTestSampleSize_ = value;
bitField0_ |= 0x00000040;
onChanged();
return this;
}
/**
*
* Size of self-test samples, which are encoded in the model file.
*
*
* optional int32 self_test_sample_size = 6 [default = 0];
* @return This builder for chaining.
*/
public Builder clearSelfTestSampleSize() {
bitField0_ = (bitField0_ & ~0x00000040);
selfTestSampleSize_ = 0;
onChanged();
return this;
}
private float characterCoverage_ = 0.9995F;
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return Whether the characterCoverage field is set.
*/
@java.lang.Override
public boolean hasCharacterCoverage() {
return ((bitField0_ & 0x00000080) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return The characterCoverage.
*/
@java.lang.Override
public float getCharacterCoverage() {
return characterCoverage_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @param value The characterCoverage to set.
* @return This builder for chaining.
*/
public Builder setCharacterCoverage(float value) {
characterCoverage_ = value;
bitField0_ |= 0x00000080;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Training parameters.
*
* Uses characters which cover the corpus with the ratio of `chars_coverage`.
* This parameter determines the set of basic Alphabet of sentence piece.
* 1.0 - `chars_coverage` characters are treated as UNK.
* See also required_chars field.
*
*
* optional float character_coverage = 10 [default = 0.9995];
* @return This builder for chaining.
*/
public Builder clearCharacterCoverage() {
bitField0_ = (bitField0_ & ~0x00000080);
characterCoverage_ = 0.9995F;
onChanged();
return this;
}
private long inputSentenceSize_ ;
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return Whether the inputSentenceSize field is set.
*/
@java.lang.Override
public boolean hasInputSentenceSize() {
return ((bitField0_ & 0x00000100) != 0);
}
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return The inputSentenceSize.
*/
@java.lang.Override
public long getInputSentenceSize() {
return inputSentenceSize_;
}
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @param value The inputSentenceSize to set.
* @return This builder for chaining.
*/
public Builder setInputSentenceSize(long value) {
inputSentenceSize_ = value;
bitField0_ |= 0x00000100;
onChanged();
return this;
}
/**
*
* Maximum size of sentences the trainer loads from `input` parameter.
* Trainer simply loads the `input` files in sequence.
* It is better to shuffle the input corpus randomly.
*
*
* optional uint64 input_sentence_size = 11 [default = 0];
* @return This builder for chaining.
*/
public Builder clearInputSentenceSize() {
bitField0_ = (bitField0_ & ~0x00000100);
inputSentenceSize_ = 0L;
onChanged();
return this;
}
private boolean shuffleInputSentence_ = true;
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return Whether the shuffleInputSentence field is set.
*/
@java.lang.Override
public boolean hasShuffleInputSentence() {
return ((bitField0_ & 0x00000200) != 0);
}
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return The shuffleInputSentence.
*/
@java.lang.Override
public boolean getShuffleInputSentence() {
return shuffleInputSentence_;
}
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @param value The shuffleInputSentence to set.
* @return This builder for chaining.
*/
public Builder setShuffleInputSentence(boolean value) {
shuffleInputSentence_ = value;
bitField0_ |= 0x00000200;
onChanged();
return this;
}
/**
* optional bool shuffle_input_sentence = 19 [default = true];
* @return This builder for chaining.
*/
public Builder clearShuffleInputSentence() {
bitField0_ = (bitField0_ & ~0x00000200);
shuffleInputSentence_ = true;
onChanged();
return this;
}
private int miningSentenceSize_ ;
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return Whether the miningSentenceSize field is set.
*/
@java.lang.Override
@java.lang.Deprecated public boolean hasMiningSentenceSize() {
return ((bitField0_ & 0x00000400) != 0);
}
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.mining_sentence_size is deprecated.
* See sentencepiece_model.proto;l=83
* @return The miningSentenceSize.
*/
@java.lang.Override
@java.lang.Deprecated public int getMiningSentenceSize() {
return miningSentenceSize_;
}
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @param value The miningSentenceSize to set.
* @return This builder for chaining.
*/
@java.lang.Deprecated public Builder setMiningSentenceSize(int value) {
miningSentenceSize_ = value;
bitField0_ |= 0x00000400;
onChanged();
return this;
}
/**
*
* Maximum size of sentences to make seed sentence pieces.
* Extended suffix array is constructed to extract frequent
* sub-strings from the corpus. This uses 20N working space,
* where N is the size of corpus.
*
*
* optional int32 mining_sentence_size = 12 [deprecated = true];
* @return This builder for chaining.
*/
@java.lang.Deprecated public Builder clearMiningSentenceSize() {
bitField0_ = (bitField0_ & ~0x00000400);
miningSentenceSize_ = 0;
onChanged();
return this;
}
private int trainingSentenceSize_ ;
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return Whether the trainingSentenceSize field is set.
*/
@java.lang.Override
@java.lang.Deprecated public boolean hasTrainingSentenceSize() {
return ((bitField0_ & 0x00000800) != 0);
}
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @deprecated sentencepiece.TrainerSpec.training_sentence_size is deprecated.
* See sentencepiece_model.proto;l=86
* @return The trainingSentenceSize.
*/
@java.lang.Override
@java.lang.Deprecated public int getTrainingSentenceSize() {
return trainingSentenceSize_;
}
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @param value The trainingSentenceSize to set.
* @return This builder for chaining.
*/
@java.lang.Deprecated public Builder setTrainingSentenceSize(int value) {
trainingSentenceSize_ = value;
bitField0_ |= 0x00000800;
onChanged();
return this;
}
/**
*
* Maximum size of sentences to train sentence pieces.
*
*
* optional int32 training_sentence_size = 13 [deprecated = true];
* @return This builder for chaining.
*/
@java.lang.Deprecated public Builder clearTrainingSentenceSize() {
bitField0_ = (bitField0_ & ~0x00000800);
trainingSentenceSize_ = 0;
onChanged();
return this;
}
private int seedSentencepieceSize_ = 1000000;
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return Whether the seedSentencepieceSize field is set.
*/
@java.lang.Override
public boolean hasSeedSentencepieceSize() {
return ((bitField0_ & 0x00001000) != 0);
}
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return The seedSentencepieceSize.
*/
@java.lang.Override
public int getSeedSentencepieceSize() {
return seedSentencepieceSize_;
}
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @param value The seedSentencepieceSize to set.
* @return This builder for chaining.
*/
public Builder setSeedSentencepieceSize(int value) {
seedSentencepieceSize_ = value;
bitField0_ |= 0x00001000;
onChanged();
return this;
}
/**
*
* The size of seed sentencepieces.
* `seed_sentencepiece_size` must be larger than `vocab_size`.
*
*
* optional int32 seed_sentencepiece_size = 14 [default = 1000000];
* @return This builder for chaining.
*/
public Builder clearSeedSentencepieceSize() {
bitField0_ = (bitField0_ & ~0x00001000);
seedSentencepieceSize_ = 1000000;
onChanged();
return this;
}
private float shrinkingFactor_ = 0.75F;
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return Whether the shrinkingFactor field is set.
*/
@java.lang.Override
public boolean hasShrinkingFactor() {
return ((bitField0_ & 0x00002000) != 0);
}
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return The shrinkingFactor.
*/
@java.lang.Override
public float getShrinkingFactor() {
return shrinkingFactor_;
}
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @param value The shrinkingFactor to set.
* @return This builder for chaining.
*/
public Builder setShrinkingFactor(float value) {
shrinkingFactor_ = value;
bitField0_ |= 0x00002000;
onChanged();
return this;
}
/**
*
* In every EM sub-iterations, keeps top
* `shrinking_factor` * `current sentencepieces size` with respect to
* the loss of the sentence piece. This value should be smaller than 1.0.
*
*
* optional float shrinking_factor = 15 [default = 0.75];
* @return This builder for chaining.
*/
public Builder clearShrinkingFactor() {
bitField0_ = (bitField0_ & ~0x00002000);
shrinkingFactor_ = 0.75F;
onChanged();
return this;
}
private int maxSentenceLength_ = 4192;
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return Whether the maxSentenceLength field is set.
*/
@java.lang.Override
public boolean hasMaxSentenceLength() {
return ((bitField0_ & 0x00004000) != 0);
}
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return The maxSentenceLength.
*/
@java.lang.Override
public int getMaxSentenceLength() {
return maxSentenceLength_;
}
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @param value The maxSentenceLength to set.
* @return This builder for chaining.
*/
public Builder setMaxSentenceLength(int value) {
maxSentenceLength_ = value;
bitField0_ |= 0x00004000;
onChanged();
return this;
}
/**
*
* The maximum sentence length in byte. The sentences with the length
* larger than `max_sentence_length` is simply ignored.
* Longer input tends to bring the following risks:
* * Overflow during EM training (unigram language model only)
* * Performance drop because of O(n log n) cost in BPE.
*
*
* optional int32 max_sentence_length = 18 [default = 4192];
* @return This builder for chaining.
*/
public Builder clearMaxSentenceLength() {
bitField0_ = (bitField0_ & ~0x00004000);
maxSentenceLength_ = 4192;
onChanged();
return this;
}
private int numThreads_ = 16;
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return Whether the numThreads field is set.
*/
@java.lang.Override
public boolean hasNumThreads() {
return ((bitField0_ & 0x00008000) != 0);
}
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return The numThreads.
*/
@java.lang.Override
public int getNumThreads() {
return numThreads_;
}
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @param value The numThreads to set.
* @return This builder for chaining.
*/
public Builder setNumThreads(int value) {
numThreads_ = value;
bitField0_ |= 0x00008000;
onChanged();
return this;
}
/**
*
* Number of threads in the training.
*
*
* optional int32 num_threads = 16 [default = 16];
* @return This builder for chaining.
*/
public Builder clearNumThreads() {
bitField0_ = (bitField0_ & ~0x00008000);
numThreads_ = 16;
onChanged();
return this;
}
private int numSubIterations_ = 2;
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return Whether the numSubIterations field is set.
*/
@java.lang.Override
public boolean hasNumSubIterations() {
return ((bitField0_ & 0x00010000) != 0);
}
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return The numSubIterations.
*/
@java.lang.Override
public int getNumSubIterations() {
return numSubIterations_;
}
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @param value The numSubIterations to set.
* @return This builder for chaining.
*/
public Builder setNumSubIterations(int value) {
numSubIterations_ = value;
bitField0_ |= 0x00010000;
onChanged();
return this;
}
/**
*
* Number of EM sub iterations.
*
*
* optional int32 num_sub_iterations = 17 [default = 2];
* @return This builder for chaining.
*/
public Builder clearNumSubIterations() {
bitField0_ = (bitField0_ & ~0x00010000);
numSubIterations_ = 2;
onChanged();
return this;
}
private int maxSentencepieceLength_ = 16;
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return Whether the maxSentencepieceLength field is set.
*/
@java.lang.Override
public boolean hasMaxSentencepieceLength() {
return ((bitField0_ & 0x00020000) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return The maxSentencepieceLength.
*/
@java.lang.Override
public int getMaxSentencepieceLength() {
return maxSentencepieceLength_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @param value The maxSentencepieceLength to set.
* @return This builder for chaining.
*/
public Builder setMaxSentencepieceLength(int value) {
maxSentencepieceLength_ = value;
bitField0_ |= 0x00020000;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* SentencePiece parameters which control the shapes of sentence piece.
*
* Maximum length of sentencepiece.
*
*
* optional int32 max_sentencepiece_length = 20 [default = 16];
* @return This builder for chaining.
*/
public Builder clearMaxSentencepieceLength() {
bitField0_ = (bitField0_ & ~0x00020000);
maxSentencepieceLength_ = 16;
onChanged();
return this;
}
private boolean splitByUnicodeScript_ = true;
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return Whether the splitByUnicodeScript field is set.
*/
@java.lang.Override
public boolean hasSplitByUnicodeScript() {
return ((bitField0_ & 0x00040000) != 0);
}
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return The splitByUnicodeScript.
*/
@java.lang.Override
public boolean getSplitByUnicodeScript() {
return splitByUnicodeScript_;
}
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @param value The splitByUnicodeScript to set.
* @return This builder for chaining.
*/
public Builder setSplitByUnicodeScript(boolean value) {
splitByUnicodeScript_ = value;
bitField0_ |= 0x00040000;
onChanged();
return this;
}
/**
*
* Uses Unicode script to split sentence pieces.
* When `split_by_unicode_script` is true, we do not allow sentence piece to
* include multiple Unicode scripts, e.g. "F1" is not a valid piece.
* Exception: CJ characters (Hiragana/Katakana/Han) are all handled
* as one script type, since Japanese word can consist of multiple scripts.
* This exception is always applied regardless of the accept-language
* parameter.
*
*
* optional bool split_by_unicode_script = 21 [default = true];
* @return This builder for chaining.
*/
public Builder clearSplitByUnicodeScript() {
bitField0_ = (bitField0_ & ~0x00040000);
splitByUnicodeScript_ = true;
onChanged();
return this;
}
private boolean splitByNumber_ = true;
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return Whether the splitByNumber field is set.
*/
@java.lang.Override
public boolean hasSplitByNumber() {
return ((bitField0_ & 0x00080000) != 0);
}
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return The splitByNumber.
*/
@java.lang.Override
public boolean getSplitByNumber() {
return splitByNumber_;
}
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @param value The splitByNumber to set.
* @return This builder for chaining.
*/
public Builder setSplitByNumber(boolean value) {
splitByNumber_ = value;
bitField0_ |= 0x00080000;
onChanged();
return this;
}
/**
*
* When `split_by_number` is true, put a boundary between number and
* non-number transition. If we want to treat "F1" is one token, set this flag
* to be false.
*
*
* optional bool split_by_number = 23 [default = true];
* @return This builder for chaining.
*/
public Builder clearSplitByNumber() {
bitField0_ = (bitField0_ & ~0x00080000);
splitByNumber_ = true;
onChanged();
return this;
}
private boolean splitByWhitespace_ = true;
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return Whether the splitByWhitespace field is set.
*/
@java.lang.Override
public boolean hasSplitByWhitespace() {
return ((bitField0_ & 0x00100000) != 0);
}
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return The splitByWhitespace.
*/
@java.lang.Override
public boolean getSplitByWhitespace() {
return splitByWhitespace_;
}
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @param value The splitByWhitespace to set.
* @return This builder for chaining.
*/
public Builder setSplitByWhitespace(boolean value) {
splitByWhitespace_ = value;
bitField0_ |= 0x00100000;
onChanged();
return this;
}
/**
*
* Use a white space to split sentence pieces.
* When `split_by_whitespace` is false, we may have the piece containing
* a white space in the middle. e.g., "in_the".
*
*
* optional bool split_by_whitespace = 22 [default = true];
* @return This builder for chaining.
*/
public Builder clearSplitByWhitespace() {
bitField0_ = (bitField0_ & ~0x00100000);
splitByWhitespace_ = true;
onChanged();
return this;
}
private boolean treatWhitespaceAsSuffix_ ;
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return Whether the treatWhitespaceAsSuffix field is set.
*/
@java.lang.Override
public boolean hasTreatWhitespaceAsSuffix() {
return ((bitField0_ & 0x00200000) != 0);
}
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return The treatWhitespaceAsSuffix.
*/
@java.lang.Override
public boolean getTreatWhitespaceAsSuffix() {
return treatWhitespaceAsSuffix_;
}
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @param value The treatWhitespaceAsSuffix to set.
* @return This builder for chaining.
*/
public Builder setTreatWhitespaceAsSuffix(boolean value) {
treatWhitespaceAsSuffix_ = value;
bitField0_ |= 0x00200000;
onChanged();
return this;
}
/**
*
* Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
* hello_. When `treat_whitespace_as_suffix` is true,
* NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
* of sentence.
*
*
* optional bool treat_whitespace_as_suffix = 24 [default = false];
* @return This builder for chaining.
*/
public Builder clearTreatWhitespaceAsSuffix() {
bitField0_ = (bitField0_ & ~0x00200000);
treatWhitespaceAsSuffix_ = false;
onChanged();
return this;
}
private boolean allowWhitespaceOnlyPieces_ ;
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return Whether the allowWhitespaceOnlyPieces field is set.
*/
@java.lang.Override
public boolean hasAllowWhitespaceOnlyPieces() {
return ((bitField0_ & 0x00400000) != 0);
}
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return The allowWhitespaceOnlyPieces.
*/
@java.lang.Override
public boolean getAllowWhitespaceOnlyPieces() {
return allowWhitespaceOnlyPieces_;
}
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @param value The allowWhitespaceOnlyPieces to set.
* @return This builder for chaining.
*/
public Builder setAllowWhitespaceOnlyPieces(boolean value) {
allowWhitespaceOnlyPieces_ = value;
bitField0_ |= 0x00400000;
onChanged();
return this;
}
/**
*
* Allows pieces that only contain whitespaces instead of appearing only as
* prefix or suffix of other pieces.
*
*
* optional bool allow_whitespace_only_pieces = 26 [default = false];
* @return This builder for chaining.
*/
public Builder clearAllowWhitespaceOnlyPieces() {
bitField0_ = (bitField0_ & ~0x00400000);
allowWhitespaceOnlyPieces_ = false;
onChanged();
return this;
}
private boolean splitDigits_ ;
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return Whether the splitDigits field is set.
*/
@java.lang.Override
public boolean hasSplitDigits() {
return ((bitField0_ & 0x00800000) != 0);
}
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return The splitDigits.
*/
@java.lang.Override
public boolean getSplitDigits() {
return splitDigits_;
}
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @param value The splitDigits to set.
* @return This builder for chaining.
*/
public Builder setSplitDigits(boolean value) {
splitDigits_ = value;
bitField0_ |= 0x00800000;
onChanged();
return this;
}
/**
*
* Split all digits (0-9) into separate pieces.
*
*
* optional bool split_digits = 25 [default = false];
* @return This builder for chaining.
*/
public Builder clearSplitDigits() {
bitField0_ = (bitField0_ & ~0x00800000);
splitDigits_ = false;
onChanged();
return this;
}
private com.google.protobuf.LazyStringArrayList controlSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
private void ensureControlSymbolsIsMutable() {
if (!controlSymbols_.isModifiable()) {
controlSymbols_ = new com.google.protobuf.LazyStringArrayList(controlSymbols_);
}
bitField0_ |= 0x01000000;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return A list containing the controlSymbols.
*/
public com.google.protobuf.ProtocolStringList
getControlSymbolsList() {
controlSymbols_.makeImmutable();
return controlSymbols_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return The count of controlSymbols.
*/
public int getControlSymbolsCount() {
return controlSymbols_.size();
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the element to return.
* @return The controlSymbols at the given index.
*/
public java.lang.String getControlSymbols(int index) {
return controlSymbols_.get(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index of the value to return.
* @return The bytes of the controlSymbols at the given index.
*/
public com.google.protobuf.ByteString
getControlSymbolsBytes(int index) {
return controlSymbols_.getByteString(index);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param index The index to set the value at.
* @param value The controlSymbols to set.
* @return This builder for chaining.
*/
public Builder setControlSymbols(
int index, java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureControlSymbolsIsMutable();
controlSymbols_.set(index, value);
bitField0_ |= 0x01000000;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param value The controlSymbols to add.
* @return This builder for chaining.
*/
public Builder addControlSymbols(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureControlSymbolsIsMutable();
controlSymbols_.add(value);
bitField0_ |= 0x01000000;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param values The controlSymbols to add.
* @return This builder for chaining.
*/
public Builder addAllControlSymbols(
java.lang.Iterable values) {
ensureControlSymbolsIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, controlSymbols_);
bitField0_ |= 0x01000000;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @return This builder for chaining.
*/
public Builder clearControlSymbols() {
controlSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
bitField0_ = (bitField0_ & ~0x01000000);;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Vocabulary management
*
* Defines control symbols used as an indicator to
* change the behavior of the decoder. <s> and </s> are pre-defined.
* We can use this field to encode various meta information,
* including language indicator in multilingual model.
* These symbols are not visible to users, but visible to
* the decoder. Note that when the input sentence contains control symbols,
* they are not treated as one token, but segmented into normal pieces.
* Control symbols must be inserted independently from the segmentation.
*
*
* repeated string control_symbols = 30;
* @param value The bytes of the controlSymbols to add.
* @return This builder for chaining.
*/
public Builder addControlSymbolsBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
ensureControlSymbolsIsMutable();
controlSymbols_.add(value);
bitField0_ |= 0x01000000;
onChanged();
return this;
}
private com.google.protobuf.LazyStringArrayList userDefinedSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
private void ensureUserDefinedSymbolsIsMutable() {
if (!userDefinedSymbols_.isModifiable()) {
userDefinedSymbols_ = new com.google.protobuf.LazyStringArrayList(userDefinedSymbols_);
}
bitField0_ |= 0x02000000;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return A list containing the userDefinedSymbols.
*/
public com.google.protobuf.ProtocolStringList
getUserDefinedSymbolsList() {
userDefinedSymbols_.makeImmutable();
return userDefinedSymbols_;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return The count of userDefinedSymbols.
*/
public int getUserDefinedSymbolsCount() {
return userDefinedSymbols_.size();
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the element to return.
* @return The userDefinedSymbols at the given index.
*/
public java.lang.String getUserDefinedSymbols(int index) {
return userDefinedSymbols_.get(index);
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index of the value to return.
* @return The bytes of the userDefinedSymbols at the given index.
*/
public com.google.protobuf.ByteString
getUserDefinedSymbolsBytes(int index) {
return userDefinedSymbols_.getByteString(index);
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param index The index to set the value at.
* @param value The userDefinedSymbols to set.
* @return This builder for chaining.
*/
public Builder setUserDefinedSymbols(
int index, java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureUserDefinedSymbolsIsMutable();
userDefinedSymbols_.set(index, value);
bitField0_ |= 0x02000000;
onChanged();
return this;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param value The userDefinedSymbols to add.
* @return This builder for chaining.
*/
public Builder addUserDefinedSymbols(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
ensureUserDefinedSymbolsIsMutable();
userDefinedSymbols_.add(value);
bitField0_ |= 0x02000000;
onChanged();
return this;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param values The userDefinedSymbols to add.
* @return This builder for chaining.
*/
public Builder addAllUserDefinedSymbols(
java.lang.Iterable values) {
ensureUserDefinedSymbolsIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, userDefinedSymbols_);
bitField0_ |= 0x02000000;
onChanged();
return this;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @return This builder for chaining.
*/
public Builder clearUserDefinedSymbols() {
userDefinedSymbols_ =
com.google.protobuf.LazyStringArrayList.emptyList();
bitField0_ = (bitField0_ & ~0x02000000);;
onChanged();
return this;
}
/**
*
* Defines user defined symbols.
* These symbols are added with extremely high score
* so they are always treated as one unique symbol in any context.
* Typical usage of user_defined_symbols is placeholder for named entities.
*
*
* repeated string user_defined_symbols = 31;
* @param value The bytes of the userDefinedSymbols to add.
* @return This builder for chaining.
*/
public Builder addUserDefinedSymbolsBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
ensureUserDefinedSymbolsIsMutable();
userDefinedSymbols_.add(value);
bitField0_ |= 0x02000000;
onChanged();
return this;
}
private java.lang.Object requiredChars_ = "";
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return Whether the requiredChars field is set.
*/
public boolean hasRequiredChars() {
return ((bitField0_ & 0x04000000) != 0);
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The requiredChars.
*/
public java.lang.String getRequiredChars() {
java.lang.Object ref = requiredChars_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
requiredChars_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return The bytes for requiredChars.
*/
public com.google.protobuf.ByteString
getRequiredCharsBytes() {
java.lang.Object ref = requiredChars_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
requiredChars_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @param value The requiredChars to set.
* @return This builder for chaining.
*/
public Builder setRequiredChars(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
requiredChars_ = value;
bitField0_ |= 0x04000000;
onChanged();
return this;
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @return This builder for chaining.
*/
public Builder clearRequiredChars() {
requiredChars_ = getDefaultInstance().getRequiredChars();
bitField0_ = (bitField0_ & ~0x04000000);
onChanged();
return this;
}
/**
*
* Defines required characters. Each UTF8 character in this string is included
* in the character set regardless of character_coverage value. Unlike
* user_defined_symbols, these characters have scores based on the frequency
* on input sentences, and the model can form subwords using characters
* in this field.
*
*
* optional string required_chars = 36;
* @param value The bytes for requiredChars to set.
* @return This builder for chaining.
*/
public Builder setRequiredCharsBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
requiredChars_ = value;
bitField0_ |= 0x04000000;
onChanged();
return this;
}
private boolean byteFallback_ ;
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return Whether the byteFallback field is set.
*/
@java.lang.Override
public boolean hasByteFallback() {
return ((bitField0_ & 0x08000000) != 0);
}
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return The byteFallback.
*/
@java.lang.Override
public boolean getByteFallback() {
return byteFallback_;
}
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @param value The byteFallback to set.
* @return This builder for chaining.
*/
public Builder setByteFallback(boolean value) {
byteFallback_ = value;
bitField0_ |= 0x08000000;
onChanged();
return this;
}
/**
*
* Decomposes unknown pieces into UTF-8 bytes.
*
*
* optional bool byte_fallback = 35 [default = false];
* @return This builder for chaining.
*/
public Builder clearByteFallback() {
bitField0_ = (bitField0_ & ~0x08000000);
byteFallback_ = false;
onChanged();
return this;
}
private boolean vocabularyOutputPieceScore_ = true;
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return Whether the vocabularyOutputPieceScore field is set.
*/
@java.lang.Override
public boolean hasVocabularyOutputPieceScore() {
return ((bitField0_ & 0x10000000) != 0);
}
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return The vocabularyOutputPieceScore.
*/
@java.lang.Override
public boolean getVocabularyOutputPieceScore() {
return vocabularyOutputPieceScore_;
}
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @param value The vocabularyOutputPieceScore to set.
* @return This builder for chaining.
*/
public Builder setVocabularyOutputPieceScore(boolean value) {
vocabularyOutputPieceScore_ = value;
bitField0_ |= 0x10000000;
onChanged();
return this;
}
/**
*
* When creating the vocabulary file, defines whether or not to additionally
* output the score for each piece.
*
*
* optional bool vocabulary_output_piece_score = 32 [default = true];
* @return This builder for chaining.
*/
public Builder clearVocabularyOutputPieceScore() {
bitField0_ = (bitField0_ & ~0x10000000);
vocabularyOutputPieceScore_ = true;
onChanged();
return this;
}
private boolean hardVocabLimit_ = true;
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return Whether the hardVocabLimit field is set.
*/
@java.lang.Override
public boolean hasHardVocabLimit() {
return ((bitField0_ & 0x20000000) != 0);
}
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return The hardVocabLimit.
*/
@java.lang.Override
public boolean getHardVocabLimit() {
return hardVocabLimit_;
}
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @param value The hardVocabLimit to set.
* @return This builder for chaining.
*/
public Builder setHardVocabLimit(boolean value) {
hardVocabLimit_ = value;
bitField0_ |= 0x20000000;
onChanged();
return this;
}
/**
*
* `vocab_size` is treated as hard limit. Crash if
* the model can not produce the vocab of size `vocab_size`,
* When `hard_vocab_limit` is false, vocab_size is treated
* as soft limit. Note that when model_type=char,
* always assumes hard_vocab_limit = false.
*
*
* optional bool hard_vocab_limit = 33 [default = true];
* @return This builder for chaining.
*/
public Builder clearHardVocabLimit() {
bitField0_ = (bitField0_ & ~0x20000000);
hardVocabLimit_ = true;
onChanged();
return this;
}
private boolean useAllVocab_ ;
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return Whether the useAllVocab field is set.
*/
@java.lang.Override
public boolean hasUseAllVocab() {
return ((bitField0_ & 0x40000000) != 0);
}
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return The useAllVocab.
*/
@java.lang.Override
public boolean getUseAllVocab() {
return useAllVocab_;
}
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @param value The useAllVocab to set.
* @return This builder for chaining.
*/
public Builder setUseAllVocab(boolean value) {
useAllVocab_ = value;
bitField0_ |= 0x40000000;
onChanged();
return this;
}
/**
*
* use all symbols for vocab extraction. This flag is valid
* if model type is either CHAR or WORD
*
*
* optional bool use_all_vocab = 34 [default = false];
* @return This builder for chaining.
*/
public Builder clearUseAllVocab() {
bitField0_ = (bitField0_ & ~0x40000000);
useAllVocab_ = false;
onChanged();
return this;
}
private int unkId_ ;
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return Whether the unkId field is set.
*/
@java.lang.Override
public boolean hasUnkId() {
return ((bitField0_ & 0x80000000) != 0);
}
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return The unkId.
*/
@java.lang.Override
public int getUnkId() {
return unkId_;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @param value The unkId to set.
* @return This builder for chaining.
*/
public Builder setUnkId(int value) {
unkId_ = value;
bitField0_ |= 0x80000000;
onChanged();
return this;
}
/**
*
*/////////////////////////////////////////////////////////////////
* Reserved special meta tokens.
* * -1 is not used.
* * unk_id must not be -1.
* Id must starts with 0 and be contigous.
*
*
* optional int32 unk_id = 40 [default = 0];
* @return This builder for chaining.
*/
public Builder clearUnkId() {
bitField0_ = (bitField0_ & ~0x80000000);
unkId_ = 0;
onChanged();
return this;
}
private int bosId_ = 1;
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return Whether the bosId field is set.
*/
@java.lang.Override
public boolean hasBosId() {
return ((bitField1_ & 0x00000001) != 0);
}
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return The bosId.
*/
@java.lang.Override
public int getBosId() {
return bosId_;
}
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @param value The bosId to set.
* @return This builder for chaining.
*/
public Builder setBosId(int value) {
bosId_ = value;
bitField1_ |= 0x00000001;
onChanged();
return this;
}
/**
*
* <s>
*
*
* optional int32 bos_id = 41 [default = 1];
* @return This builder for chaining.
*/
public Builder clearBosId() {
bitField1_ = (bitField1_ & ~0x00000001);
bosId_ = 1;
onChanged();
return this;
}
private int eosId_ = 2;
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return Whether the eosId field is set.
*/
@java.lang.Override
public boolean hasEosId() {
return ((bitField1_ & 0x00000002) != 0);
}
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return The eosId.
*/
@java.lang.Override
public int getEosId() {
return eosId_;
}
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @param value The eosId to set.
* @return This builder for chaining.
*/
public Builder setEosId(int value) {
eosId_ = value;
bitField1_ |= 0x00000002;
onChanged();
return this;
}
/**
*
* </s>
*
*
* optional int32 eos_id = 42 [default = 2];
* @return This builder for chaining.
*/
public Builder clearEosId() {
bitField1_ = (bitField1_ & ~0x00000002);
eosId_ = 2;
onChanged();
return this;
}
private int padId_ = -1;
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return Whether the padId field is set.
*/
@java.lang.Override
public boolean hasPadId() {
return ((bitField1_ & 0x00000004) != 0);
}
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return The padId.
*/
@java.lang.Override
public int getPadId() {
return padId_;
}
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @param value The padId to set.
* @return This builder for chaining.
*/
public Builder setPadId(int value) {
padId_ = value;
bitField1_ |= 0x00000004;
onChanged();
return this;
}
/**
*
* <pad> (padding)
*
*
* optional int32 pad_id = 43 [default = -1];
* @return This builder for chaining.
*/
public Builder clearPadId() {
bitField1_ = (bitField1_ & ~0x00000004);
padId_ = -1;
onChanged();
return this;
}
private java.lang.Object unkPiece_ = "";
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return Whether the unkPiece field is set.
*/
public boolean hasUnkPiece() {
return ((bitField1_ & 0x00000008) != 0);
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The unkPiece.
*/
public java.lang.String getUnkPiece() {
java.lang.Object ref = unkPiece_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
unkPiece_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return The bytes for unkPiece.
*/
public com.google.protobuf.ByteString
getUnkPieceBytes() {
java.lang.Object ref = unkPiece_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
unkPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @param value The unkPiece to set.
* @return This builder for chaining.
*/
public Builder setUnkPiece(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
unkPiece_ = value;
bitField1_ |= 0x00000008;
onChanged();
return this;
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @return This builder for chaining.
*/
public Builder clearUnkPiece() {
unkPiece_ = getDefaultInstance().getUnkPiece();
bitField1_ = (bitField1_ & ~0x00000008);
onChanged();
return this;
}
/**
* optional string unk_piece = 45 [default = "<unk>"];
* @param value The bytes for unkPiece to set.
* @return This builder for chaining.
*/
public Builder setUnkPieceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
unkPiece_ = value;
bitField1_ |= 0x00000008;
onChanged();
return this;
}
private java.lang.Object bosPiece_ = "";
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return Whether the bosPiece field is set.
*/
public boolean hasBosPiece() {
return ((bitField1_ & 0x00000010) != 0);
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bosPiece.
*/
public java.lang.String getBosPiece() {
java.lang.Object ref = bosPiece_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
bosPiece_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return The bytes for bosPiece.
*/
public com.google.protobuf.ByteString
getBosPieceBytes() {
java.lang.Object ref = bosPiece_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
bosPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @param value The bosPiece to set.
* @return This builder for chaining.
*/
public Builder setBosPiece(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
bosPiece_ = value;
bitField1_ |= 0x00000010;
onChanged();
return this;
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @return This builder for chaining.
*/
public Builder clearBosPiece() {
bosPiece_ = getDefaultInstance().getBosPiece();
bitField1_ = (bitField1_ & ~0x00000010);
onChanged();
return this;
}
/**
* optional string bos_piece = 46 [default = "<s>"];
* @param value The bytes for bosPiece to set.
* @return This builder for chaining.
*/
public Builder setBosPieceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
bosPiece_ = value;
bitField1_ |= 0x00000010;
onChanged();
return this;
}
private java.lang.Object eosPiece_ = "";
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return Whether the eosPiece field is set.
*/
public boolean hasEosPiece() {
return ((bitField1_ & 0x00000020) != 0);
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The eosPiece.
*/
public java.lang.String getEosPiece() {
java.lang.Object ref = eosPiece_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
eosPiece_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return The bytes for eosPiece.
*/
public com.google.protobuf.ByteString
getEosPieceBytes() {
java.lang.Object ref = eosPiece_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
eosPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @param value The eosPiece to set.
* @return This builder for chaining.
*/
public Builder setEosPiece(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
eosPiece_ = value;
bitField1_ |= 0x00000020;
onChanged();
return this;
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @return This builder for chaining.
*/
public Builder clearEosPiece() {
eosPiece_ = getDefaultInstance().getEosPiece();
bitField1_ = (bitField1_ & ~0x00000020);
onChanged();
return this;
}
/**
* optional string eos_piece = 47 [default = "</s>"];
* @param value The bytes for eosPiece to set.
* @return This builder for chaining.
*/
public Builder setEosPieceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
eosPiece_ = value;
bitField1_ |= 0x00000020;
onChanged();
return this;
}
private java.lang.Object padPiece_ = "";
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return Whether the padPiece field is set.
*/
public boolean hasPadPiece() {
return ((bitField1_ & 0x00000040) != 0);
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The padPiece.
*/
public java.lang.String getPadPiece() {
java.lang.Object ref = padPiece_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
padPiece_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return The bytes for padPiece.
*/
public com.google.protobuf.ByteString
getPadPieceBytes() {
java.lang.Object ref = padPiece_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
padPiece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @param value The padPiece to set.
* @return This builder for chaining.
*/
public Builder setPadPiece(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
padPiece_ = value;
bitField1_ |= 0x00000040;
onChanged();
return this;
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @return This builder for chaining.
*/
public Builder clearPadPiece() {
padPiece_ = getDefaultInstance().getPadPiece();
bitField1_ = (bitField1_ & ~0x00000040);
onChanged();
return this;
}
/**
* optional string pad_piece = 48 [default = "<pad>"];
* @param value The bytes for padPiece to set.
* @return This builder for chaining.
*/
public Builder setPadPieceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
padPiece_ = value;
bitField1_ |= 0x00000040;
onChanged();
return this;
}
private java.lang.Object unkSurface_ = com.google.protobuf.Internal.stringDefaultValue(" \342\201\207 ");
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return Whether the unkSurface field is set.
*/
public boolean hasUnkSurface() {
return ((bitField1_ & 0x00000080) != 0);
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The unkSurface.
*/
public java.lang.String getUnkSurface() {
java.lang.Object ref = unkSurface_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
unkSurface_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return The bytes for unkSurface.
*/
public com.google.protobuf.ByteString
getUnkSurfaceBytes() {
java.lang.Object ref = unkSurface_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
unkSurface_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @param value The unkSurface to set.
* @return This builder for chaining.
*/
public Builder setUnkSurface(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
unkSurface_ = value;
bitField1_ |= 0x00000080;
onChanged();
return this;
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @return This builder for chaining.
*/
public Builder clearUnkSurface() {
unkSurface_ = getDefaultInstance().getUnkSurface();
bitField1_ = (bitField1_ & ~0x00000080);
onChanged();
return this;
}
/**
*
* Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
* since this character can be useful both for user and
* developer. We can easily figure out that <unk> is emitted.
*
*
* optional string unk_surface = 44 [default = " \342\201\207 "];
* @param value The bytes for unkSurface to set.
* @return This builder for chaining.
*/
public Builder setUnkSurfaceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
unkSurface_ = value;
bitField1_ |= 0x00000080;
onChanged();
return this;
}
private boolean trainExtremelyLargeCorpus_ ;
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return Whether the trainExtremelyLargeCorpus field is set.
*/
@java.lang.Override
public boolean hasTrainExtremelyLargeCorpus() {
return ((bitField1_ & 0x00000100) != 0);
}
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return The trainExtremelyLargeCorpus.
*/
@java.lang.Override
public boolean getTrainExtremelyLargeCorpus() {
return trainExtremelyLargeCorpus_;
}
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @param value The trainExtremelyLargeCorpus to set.
* @return This builder for chaining.
*/
public Builder setTrainExtremelyLargeCorpus(boolean value) {
trainExtremelyLargeCorpus_ = value;
bitField1_ |= 0x00000100;
onChanged();
return this;
}
/**
*
* Increase bit depth to allow unigram model training on large
* (>10M sentences) corpora. A Side-effect of enabling this flag
* is increased memory usage.
*
*
* optional bool train_extremely_large_corpus = 49 [default = false];
* @return This builder for chaining.
*/
public Builder clearTrainExtremelyLargeCorpus() {
bitField1_ = (bitField1_ & ~0x00000100);
trainExtremelyLargeCorpus_ = false;
onChanged();
return this;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.TrainerSpec)
}
// @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec)
private static final sentencepiece.SentencepieceModel.TrainerSpec DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.TrainerSpec();
}
public static sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public TrainerSpec parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
public interface NormalizerSpecOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.NormalizerSpec)
com.google.protobuf.GeneratedMessageV3.
ExtendableMessageOrBuilder {
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return Whether the name field is set.
*/
boolean hasName();
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The name.
*/
java.lang.String getName();
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The bytes for name.
*/
com.google.protobuf.ByteString
getNameBytes();
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return Whether the precompiledCharsmap field is set.
*/
boolean hasPrecompiledCharsmap();
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return The precompiledCharsmap.
*/
com.google.protobuf.ByteString getPrecompiledCharsmap();
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return Whether the addDummyPrefix field is set.
*/
boolean hasAddDummyPrefix();
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return The addDummyPrefix.
*/
boolean getAddDummyPrefix();
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return Whether the removeExtraWhitespaces field is set.
*/
boolean hasRemoveExtraWhitespaces();
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return The removeExtraWhitespaces.
*/
boolean getRemoveExtraWhitespaces();
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return Whether the escapeWhitespaces field is set.
*/
boolean hasEscapeWhitespaces();
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return The escapeWhitespaces.
*/
boolean getEscapeWhitespaces();
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return Whether the normalizationRuleTsv field is set.
*/
boolean hasNormalizationRuleTsv();
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The normalizationRuleTsv.
*/
java.lang.String getNormalizationRuleTsv();
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The bytes for normalizationRuleTsv.
*/
com.google.protobuf.ByteString
getNormalizationRuleTsvBytes();
}
/**
*
* NormalizerSpec encodes a various parameters for string normalizaiton
*
*
* Protobuf type {@code sentencepiece.NormalizerSpec}
*/
public static final class NormalizerSpec extends
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<
NormalizerSpec> implements
// @@protoc_insertion_point(message_implements:sentencepiece.NormalizerSpec)
NormalizerSpecOrBuilder {
private static final long serialVersionUID = 0L;
// Use NormalizerSpec.newBuilder() to construct.
private NormalizerSpec(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) {
super(builder);
}
private NormalizerSpec() {
name_ = "";
precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY;
addDummyPrefix_ = true;
removeExtraWhitespaces_ = true;
escapeWhitespaces_ = true;
normalizationRuleTsv_ = "";
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new NormalizerSpec();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.NormalizerSpec.class, sentencepiece.SentencepieceModel.NormalizerSpec.Builder.class);
}
private int bitField0_;
public static final int NAME_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private volatile java.lang.Object name_ = "";
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return Whether the name field is set.
*/
@java.lang.Override
public boolean hasName() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The name.
*/
@java.lang.Override
public java.lang.String getName() {
java.lang.Object ref = name_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
name_ = s;
}
return s;
}
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The bytes for name.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getNameBytes() {
java.lang.Object ref = name_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
name_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int PRECOMPILED_CHARSMAP_FIELD_NUMBER = 2;
private com.google.protobuf.ByteString precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY;
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return Whether the precompiledCharsmap field is set.
*/
@java.lang.Override
public boolean hasPrecompiledCharsmap() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return The precompiledCharsmap.
*/
@java.lang.Override
public com.google.protobuf.ByteString getPrecompiledCharsmap() {
return precompiledCharsmap_;
}
public static final int ADD_DUMMY_PREFIX_FIELD_NUMBER = 3;
private boolean addDummyPrefix_ = true;
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return Whether the addDummyPrefix field is set.
*/
@java.lang.Override
public boolean hasAddDummyPrefix() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return The addDummyPrefix.
*/
@java.lang.Override
public boolean getAddDummyPrefix() {
return addDummyPrefix_;
}
public static final int REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER = 4;
private boolean removeExtraWhitespaces_ = true;
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return Whether the removeExtraWhitespaces field is set.
*/
@java.lang.Override
public boolean hasRemoveExtraWhitespaces() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return The removeExtraWhitespaces.
*/
@java.lang.Override
public boolean getRemoveExtraWhitespaces() {
return removeExtraWhitespaces_;
}
public static final int ESCAPE_WHITESPACES_FIELD_NUMBER = 5;
private boolean escapeWhitespaces_ = true;
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return Whether the escapeWhitespaces field is set.
*/
@java.lang.Override
public boolean hasEscapeWhitespaces() {
return ((bitField0_ & 0x00000010) != 0);
}
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return The escapeWhitespaces.
*/
@java.lang.Override
public boolean getEscapeWhitespaces() {
return escapeWhitespaces_;
}
public static final int NORMALIZATION_RULE_TSV_FIELD_NUMBER = 6;
@SuppressWarnings("serial")
private volatile java.lang.Object normalizationRuleTsv_ = "";
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return Whether the normalizationRuleTsv field is set.
*/
@java.lang.Override
public boolean hasNormalizationRuleTsv() {
return ((bitField0_ & 0x00000020) != 0);
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The normalizationRuleTsv.
*/
@java.lang.Override
public java.lang.String getNormalizationRuleTsv() {
java.lang.Object ref = normalizationRuleTsv_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
normalizationRuleTsv_ = s;
}
return s;
}
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The bytes for normalizationRuleTsv.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getNormalizationRuleTsvBytes() {
java.lang.Object ref = normalizationRuleTsv_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
normalizationRuleTsv_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
if (!extensionsAreInitialized()) {
memoizedIsInitialized = 0;
return false;
}
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
com.google.protobuf.GeneratedMessageV3
.ExtendableMessage.ExtensionWriter
extensionWriter = newExtensionWriter();
if (((bitField0_ & 0x00000001) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 1, name_);
}
if (((bitField0_ & 0x00000002) != 0)) {
output.writeBytes(2, precompiledCharsmap_);
}
if (((bitField0_ & 0x00000004) != 0)) {
output.writeBool(3, addDummyPrefix_);
}
if (((bitField0_ & 0x00000008) != 0)) {
output.writeBool(4, removeExtraWhitespaces_);
}
if (((bitField0_ & 0x00000010) != 0)) {
output.writeBool(5, escapeWhitespaces_);
}
if (((bitField0_ & 0x00000020) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 6, normalizationRuleTsv_);
}
extensionWriter.writeUntil(536870912, output);
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, name_);
}
if (((bitField0_ & 0x00000002) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(2, precompiledCharsmap_);
}
if (((bitField0_ & 0x00000004) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(3, addDummyPrefix_);
}
if (((bitField0_ & 0x00000008) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(4, removeExtraWhitespaces_);
}
if (((bitField0_ & 0x00000010) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeBoolSize(5, escapeWhitespaces_);
}
if (((bitField0_ & 0x00000020) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, normalizationRuleTsv_);
}
size += extensionsSerializedSize();
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.NormalizerSpec)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.NormalizerSpec other = (sentencepiece.SentencepieceModel.NormalizerSpec) obj;
if (hasName() != other.hasName()) return false;
if (hasName()) {
if (!getName()
.equals(other.getName())) return false;
}
if (hasPrecompiledCharsmap() != other.hasPrecompiledCharsmap()) return false;
if (hasPrecompiledCharsmap()) {
if (!getPrecompiledCharsmap()
.equals(other.getPrecompiledCharsmap())) return false;
}
if (hasAddDummyPrefix() != other.hasAddDummyPrefix()) return false;
if (hasAddDummyPrefix()) {
if (getAddDummyPrefix()
!= other.getAddDummyPrefix()) return false;
}
if (hasRemoveExtraWhitespaces() != other.hasRemoveExtraWhitespaces()) return false;
if (hasRemoveExtraWhitespaces()) {
if (getRemoveExtraWhitespaces()
!= other.getRemoveExtraWhitespaces()) return false;
}
if (hasEscapeWhitespaces() != other.hasEscapeWhitespaces()) return false;
if (hasEscapeWhitespaces()) {
if (getEscapeWhitespaces()
!= other.getEscapeWhitespaces()) return false;
}
if (hasNormalizationRuleTsv() != other.hasNormalizationRuleTsv()) return false;
if (hasNormalizationRuleTsv()) {
if (!getNormalizationRuleTsv()
.equals(other.getNormalizationRuleTsv())) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
if (!getExtensionFields().equals(other.getExtensionFields()))
return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (hasName()) {
hash = (37 * hash) + NAME_FIELD_NUMBER;
hash = (53 * hash) + getName().hashCode();
}
if (hasPrecompiledCharsmap()) {
hash = (37 * hash) + PRECOMPILED_CHARSMAP_FIELD_NUMBER;
hash = (53 * hash) + getPrecompiledCharsmap().hashCode();
}
if (hasAddDummyPrefix()) {
hash = (37 * hash) + ADD_DUMMY_PREFIX_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getAddDummyPrefix());
}
if (hasRemoveExtraWhitespaces()) {
hash = (37 * hash) + REMOVE_EXTRA_WHITESPACES_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getRemoveExtraWhitespaces());
}
if (hasEscapeWhitespaces()) {
hash = (37 * hash) + ESCAPE_WHITESPACES_FIELD_NUMBER;
hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(
getEscapeWhitespaces());
}
if (hasNormalizationRuleTsv()) {
hash = (37 * hash) + NORMALIZATION_RULE_TSV_FIELD_NUMBER;
hash = (53 * hash) + getNormalizationRuleTsv().hashCode();
}
hash = hashFields(hash, getExtensionFields());
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.NormalizerSpec parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.NormalizerSpec prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
*
* NormalizerSpec encodes a various parameters for string normalizaiton
*
*
* Protobuf type {@code sentencepiece.NormalizerSpec}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<
sentencepiece.SentencepieceModel.NormalizerSpec, Builder> implements
// @@protoc_insertion_point(builder_implements:sentencepiece.NormalizerSpec)
sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.NormalizerSpec.class, sentencepiece.SentencepieceModel.NormalizerSpec.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.NormalizerSpec.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
name_ = "";
precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY;
addDummyPrefix_ = true;
removeExtraWhitespaces_ = true;
escapeWhitespaces_ = true;
normalizationRuleTsv_ = "";
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_NormalizerSpec_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec build() {
sentencepiece.SentencepieceModel.NormalizerSpec result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec buildPartial() {
sentencepiece.SentencepieceModel.NormalizerSpec result = new sentencepiece.SentencepieceModel.NormalizerSpec(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(sentencepiece.SentencepieceModel.NormalizerSpec result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.name_ = name_;
to_bitField0_ |= 0x00000001;
}
if (((from_bitField0_ & 0x00000002) != 0)) {
result.precompiledCharsmap_ = precompiledCharsmap_;
to_bitField0_ |= 0x00000002;
}
if (((from_bitField0_ & 0x00000004) != 0)) {
result.addDummyPrefix_ = addDummyPrefix_;
to_bitField0_ |= 0x00000004;
}
if (((from_bitField0_ & 0x00000008) != 0)) {
result.removeExtraWhitespaces_ = removeExtraWhitespaces_;
to_bitField0_ |= 0x00000008;
}
if (((from_bitField0_ & 0x00000010) != 0)) {
result.escapeWhitespaces_ = escapeWhitespaces_;
to_bitField0_ |= 0x00000010;
}
if (((from_bitField0_ & 0x00000020) != 0)) {
result.normalizationRuleTsv_ = normalizationRuleTsv_;
to_bitField0_ |= 0x00000020;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.NormalizerSpec, Type> extension,
Type value) {
return super.setExtension(extension, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.NormalizerSpec, java.util.List> extension,
int index, Type value) {
return super.setExtension(extension, index, value);
}
@java.lang.Override
public Builder addExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.NormalizerSpec, java.util.List> extension,
Type value) {
return super.addExtension(extension, value);
}
@java.lang.Override
public Builder clearExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.NormalizerSpec, T> extension) {
return super.clearExtension(extension);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.NormalizerSpec) {
return mergeFrom((sentencepiece.SentencepieceModel.NormalizerSpec)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.NormalizerSpec other) {
if (other == sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) return this;
if (other.hasName()) {
name_ = other.name_;
bitField0_ |= 0x00000001;
onChanged();
}
if (other.hasPrecompiledCharsmap()) {
setPrecompiledCharsmap(other.getPrecompiledCharsmap());
}
if (other.hasAddDummyPrefix()) {
setAddDummyPrefix(other.getAddDummyPrefix());
}
if (other.hasRemoveExtraWhitespaces()) {
setRemoveExtraWhitespaces(other.getRemoveExtraWhitespaces());
}
if (other.hasEscapeWhitespaces()) {
setEscapeWhitespaces(other.getEscapeWhitespaces());
}
if (other.hasNormalizationRuleTsv()) {
normalizationRuleTsv_ = other.normalizationRuleTsv_;
bitField0_ |= 0x00000020;
onChanged();
}
this.mergeExtensionFields(other);
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
if (!extensionsAreInitialized()) {
return false;
}
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
name_ = input.readBytes();
bitField0_ |= 0x00000001;
break;
} // case 10
case 18: {
precompiledCharsmap_ = input.readBytes();
bitField0_ |= 0x00000002;
break;
} // case 18
case 24: {
addDummyPrefix_ = input.readBool();
bitField0_ |= 0x00000004;
break;
} // case 24
case 32: {
removeExtraWhitespaces_ = input.readBool();
bitField0_ |= 0x00000008;
break;
} // case 32
case 40: {
escapeWhitespaces_ = input.readBool();
bitField0_ |= 0x00000010;
break;
} // case 40
case 50: {
normalizationRuleTsv_ = input.readBytes();
bitField0_ |= 0x00000020;
break;
} // case 50
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private java.lang.Object name_ = "";
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return Whether the name field is set.
*/
public boolean hasName() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The name.
*/
public java.lang.String getName() {
java.lang.Object ref = name_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
name_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return The bytes for name.
*/
public com.google.protobuf.ByteString
getNameBytes() {
java.lang.Object ref = name_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
name_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @param value The name to set.
* @return This builder for chaining.
*/
public Builder setName(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
name_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @return This builder for chaining.
*/
public Builder clearName() {
name_ = getDefaultInstance().getName();
bitField0_ = (bitField0_ & ~0x00000001);
onChanged();
return this;
}
/**
*
* name of normalization rule.
*
*
* optional string name = 1;
* @param value The bytes for name to set.
* @return This builder for chaining.
*/
public Builder setNameBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
name_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
private com.google.protobuf.ByteString precompiledCharsmap_ = com.google.protobuf.ByteString.EMPTY;
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return Whether the precompiledCharsmap field is set.
*/
@java.lang.Override
public boolean hasPrecompiledCharsmap() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return The precompiledCharsmap.
*/
@java.lang.Override
public com.google.protobuf.ByteString getPrecompiledCharsmap() {
return precompiledCharsmap_;
}
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @param value The precompiledCharsmap to set.
* @return This builder for chaining.
*/
public Builder setPrecompiledCharsmap(com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
precompiledCharsmap_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
*
* Pre-compiled normalization rule created by
* Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
* Usually this field is set by Builder::GetNormalizerSpec() method.
*
*
* optional bytes precompiled_charsmap = 2;
* @return This builder for chaining.
*/
public Builder clearPrecompiledCharsmap() {
bitField0_ = (bitField0_ & ~0x00000002);
precompiledCharsmap_ = getDefaultInstance().getPrecompiledCharsmap();
onChanged();
return this;
}
private boolean addDummyPrefix_ = true;
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return Whether the addDummyPrefix field is set.
*/
@java.lang.Override
public boolean hasAddDummyPrefix() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return The addDummyPrefix.
*/
@java.lang.Override
public boolean getAddDummyPrefix() {
return addDummyPrefix_;
}
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @param value The addDummyPrefix to set.
* @return This builder for chaining.
*/
public Builder setAddDummyPrefix(boolean value) {
addDummyPrefix_ = value;
bitField0_ |= 0x00000004;
onChanged();
return this;
}
/**
*
* Adds dummy whitespace at the beginning of text in order to
* treat "world" in "world" and "hello world" in the same way.
*
*
* optional bool add_dummy_prefix = 3 [default = true];
* @return This builder for chaining.
*/
public Builder clearAddDummyPrefix() {
bitField0_ = (bitField0_ & ~0x00000004);
addDummyPrefix_ = true;
onChanged();
return this;
}
private boolean removeExtraWhitespaces_ = true;
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return Whether the removeExtraWhitespaces field is set.
*/
@java.lang.Override
public boolean hasRemoveExtraWhitespaces() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return The removeExtraWhitespaces.
*/
@java.lang.Override
public boolean getRemoveExtraWhitespaces() {
return removeExtraWhitespaces_;
}
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @param value The removeExtraWhitespaces to set.
* @return This builder for chaining.
*/
public Builder setRemoveExtraWhitespaces(boolean value) {
removeExtraWhitespaces_ = value;
bitField0_ |= 0x00000008;
onChanged();
return this;
}
/**
*
* Removes leading, trailing, and duplicate internal whitespace.
*
*
* optional bool remove_extra_whitespaces = 4 [default = true];
* @return This builder for chaining.
*/
public Builder clearRemoveExtraWhitespaces() {
bitField0_ = (bitField0_ & ~0x00000008);
removeExtraWhitespaces_ = true;
onChanged();
return this;
}
private boolean escapeWhitespaces_ = true;
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return Whether the escapeWhitespaces field is set.
*/
@java.lang.Override
public boolean hasEscapeWhitespaces() {
return ((bitField0_ & 0x00000010) != 0);
}
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return The escapeWhitespaces.
*/
@java.lang.Override
public boolean getEscapeWhitespaces() {
return escapeWhitespaces_;
}
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @param value The escapeWhitespaces to set.
* @return This builder for chaining.
*/
public Builder setEscapeWhitespaces(boolean value) {
escapeWhitespaces_ = value;
bitField0_ |= 0x00000010;
onChanged();
return this;
}
/**
*
* Replaces whitespace with meta symbol.
* This field must be true to train sentence piece model.
*
*
* optional bool escape_whitespaces = 5 [default = true];
* @return This builder for chaining.
*/
public Builder clearEscapeWhitespaces() {
bitField0_ = (bitField0_ & ~0x00000010);
escapeWhitespaces_ = true;
onChanged();
return this;
}
private java.lang.Object normalizationRuleTsv_ = "";
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return Whether the normalizationRuleTsv field is set.
*/
public boolean hasNormalizationRuleTsv() {
return ((bitField0_ & 0x00000020) != 0);
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The normalizationRuleTsv.
*/
public java.lang.String getNormalizationRuleTsv() {
java.lang.Object ref = normalizationRuleTsv_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
normalizationRuleTsv_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return The bytes for normalizationRuleTsv.
*/
public com.google.protobuf.ByteString
getNormalizationRuleTsvBytes() {
java.lang.Object ref = normalizationRuleTsv_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
normalizationRuleTsv_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @param value The normalizationRuleTsv to set.
* @return This builder for chaining.
*/
public Builder setNormalizationRuleTsv(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
normalizationRuleTsv_ = value;
bitField0_ |= 0x00000020;
onChanged();
return this;
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @return This builder for chaining.
*/
public Builder clearNormalizationRuleTsv() {
normalizationRuleTsv_ = getDefaultInstance().getNormalizationRuleTsv();
bitField0_ = (bitField0_ & ~0x00000020);
onChanged();
return this;
}
/**
*
* Custom normalization rule file in TSV format.
* https://github.com/google/sentencepiece/blob/master/doc/normalization.md
* This field is only used in SentencePieceTrainer::Train() method, which
* compiles the rule into the binary rule stored in `precompiled_charsmap`.
*
*
* optional string normalization_rule_tsv = 6;
* @param value The bytes for normalizationRuleTsv to set.
* @return This builder for chaining.
*/
public Builder setNormalizationRuleTsvBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
normalizationRuleTsv_ = value;
bitField0_ |= 0x00000020;
onChanged();
return this;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.NormalizerSpec)
}
// @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec)
private static final sentencepiece.SentencepieceModel.NormalizerSpec DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.NormalizerSpec();
}
public static sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public NormalizerSpec parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
public interface SelfTestDataOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.SelfTestData)
com.google.protobuf.GeneratedMessageV3.
ExtendableMessageOrBuilder {
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
java.util.List
getSamplesList();
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index);
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
int getSamplesCount();
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
java.util.List extends sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>
getSamplesOrBuilderList();
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder(
int index);
}
/**
*
* Proto to store samples for self-testing.
*
*
* Protobuf type {@code sentencepiece.SelfTestData}
*/
public static final class SelfTestData extends
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<
SelfTestData> implements
// @@protoc_insertion_point(message_implements:sentencepiece.SelfTestData)
SelfTestDataOrBuilder {
private static final long serialVersionUID = 0L;
// Use SelfTestData.newBuilder() to construct.
private SelfTestData(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) {
super(builder);
}
private SelfTestData() {
samples_ = java.util.Collections.emptyList();
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new SelfTestData();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.SelfTestData.class, sentencepiece.SentencepieceModel.SelfTestData.Builder.class);
}
public interface SampleOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.SelfTestData.Sample)
com.google.protobuf.MessageOrBuilder {
/**
* optional string input = 1;
* @return Whether the input field is set.
*/
boolean hasInput();
/**
* optional string input = 1;
* @return The input.
*/
java.lang.String getInput();
/**
* optional string input = 1;
* @return The bytes for input.
*/
com.google.protobuf.ByteString
getInputBytes();
/**
* optional string expected = 2;
* @return Whether the expected field is set.
*/
boolean hasExpected();
/**
* optional string expected = 2;
* @return The expected.
*/
java.lang.String getExpected();
/**
* optional string expected = 2;
* @return The bytes for expected.
*/
com.google.protobuf.ByteString
getExpectedBytes();
}
/**
* Protobuf type {@code sentencepiece.SelfTestData.Sample}
*/
public static final class Sample extends
com.google.protobuf.GeneratedMessageV3 implements
// @@protoc_insertion_point(message_implements:sentencepiece.SelfTestData.Sample)
SampleOrBuilder {
private static final long serialVersionUID = 0L;
// Use Sample.newBuilder() to construct.
private Sample(com.google.protobuf.GeneratedMessageV3.Builder> builder) {
super(builder);
}
private Sample() {
input_ = "";
expected_ = "";
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new Sample();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.SelfTestData.Sample.class, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder.class);
}
private int bitField0_;
public static final int INPUT_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private volatile java.lang.Object input_ = "";
/**
* optional string input = 1;
* @return Whether the input field is set.
*/
@java.lang.Override
public boolean hasInput() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* optional string input = 1;
* @return The input.
*/
@java.lang.Override
public java.lang.String getInput() {
java.lang.Object ref = input_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
input_ = s;
}
return s;
}
}
/**
* optional string input = 1;
* @return The bytes for input.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getInputBytes() {
java.lang.Object ref = input_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
input_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int EXPECTED_FIELD_NUMBER = 2;
@SuppressWarnings("serial")
private volatile java.lang.Object expected_ = "";
/**
* optional string expected = 2;
* @return Whether the expected field is set.
*/
@java.lang.Override
public boolean hasExpected() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
* optional string expected = 2;
* @return The expected.
*/
@java.lang.Override
public java.lang.String getExpected() {
java.lang.Object ref = expected_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
expected_ = s;
}
return s;
}
}
/**
* optional string expected = 2;
* @return The bytes for expected.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getExpectedBytes() {
java.lang.Object ref = expected_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
expected_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (((bitField0_ & 0x00000001) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 1, input_);
}
if (((bitField0_ & 0x00000002) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 2, expected_);
}
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, input_);
}
if (((bitField0_ & 0x00000002) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, expected_);
}
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.SelfTestData.Sample)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.SelfTestData.Sample other = (sentencepiece.SentencepieceModel.SelfTestData.Sample) obj;
if (hasInput() != other.hasInput()) return false;
if (hasInput()) {
if (!getInput()
.equals(other.getInput())) return false;
}
if (hasExpected() != other.hasExpected()) return false;
if (hasExpected()) {
if (!getExpected()
.equals(other.getExpected())) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (hasInput()) {
hash = (37 * hash) + INPUT_FIELD_NUMBER;
hash = (53 * hash) + getInput().hashCode();
}
if (hasExpected()) {
hash = (37 * hash) + EXPECTED_FIELD_NUMBER;
hash = (53 * hash) + getExpected().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.SelfTestData.Sample prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code sentencepiece.SelfTestData.Sample}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.Builder implements
// @@protoc_insertion_point(builder_implements:sentencepiece.SelfTestData.Sample)
sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.SelfTestData.Sample.class, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.SelfTestData.Sample.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
input_ = "";
expected_ = "";
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_Sample_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.Sample build() {
sentencepiece.SentencepieceModel.SelfTestData.Sample result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.Sample buildPartial() {
sentencepiece.SentencepieceModel.SelfTestData.Sample result = new sentencepiece.SentencepieceModel.SelfTestData.Sample(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(sentencepiece.SentencepieceModel.SelfTestData.Sample result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.input_ = input_;
to_bitField0_ |= 0x00000001;
}
if (((from_bitField0_ & 0x00000002) != 0)) {
result.expected_ = expected_;
to_bitField0_ |= 0x00000002;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.SelfTestData.Sample) {
return mergeFrom((sentencepiece.SentencepieceModel.SelfTestData.Sample)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.SelfTestData.Sample other) {
if (other == sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance()) return this;
if (other.hasInput()) {
input_ = other.input_;
bitField0_ |= 0x00000001;
onChanged();
}
if (other.hasExpected()) {
expected_ = other.expected_;
bitField0_ |= 0x00000002;
onChanged();
}
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
input_ = input.readBytes();
bitField0_ |= 0x00000001;
break;
} // case 10
case 18: {
expected_ = input.readBytes();
bitField0_ |= 0x00000002;
break;
} // case 18
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private java.lang.Object input_ = "";
/**
* optional string input = 1;
* @return Whether the input field is set.
*/
public boolean hasInput() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
* optional string input = 1;
* @return The input.
*/
public java.lang.String getInput() {
java.lang.Object ref = input_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
input_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string input = 1;
* @return The bytes for input.
*/
public com.google.protobuf.ByteString
getInputBytes() {
java.lang.Object ref = input_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
input_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string input = 1;
* @param value The input to set.
* @return This builder for chaining.
*/
public Builder setInput(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
input_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
* optional string input = 1;
* @return This builder for chaining.
*/
public Builder clearInput() {
input_ = getDefaultInstance().getInput();
bitField0_ = (bitField0_ & ~0x00000001);
onChanged();
return this;
}
/**
* optional string input = 1;
* @param value The bytes for input to set.
* @return This builder for chaining.
*/
public Builder setInputBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
input_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
private java.lang.Object expected_ = "";
/**
* optional string expected = 2;
* @return Whether the expected field is set.
*/
public boolean hasExpected() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
* optional string expected = 2;
* @return The expected.
*/
public java.lang.String getExpected() {
java.lang.Object ref = expected_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
expected_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* optional string expected = 2;
* @return The bytes for expected.
*/
public com.google.protobuf.ByteString
getExpectedBytes() {
java.lang.Object ref = expected_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
expected_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* optional string expected = 2;
* @param value The expected to set.
* @return This builder for chaining.
*/
public Builder setExpected(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
expected_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
* optional string expected = 2;
* @return This builder for chaining.
*/
public Builder clearExpected() {
expected_ = getDefaultInstance().getExpected();
bitField0_ = (bitField0_ & ~0x00000002);
onChanged();
return this;
}
/**
* optional string expected = 2;
* @param value The bytes for expected to set.
* @return This builder for chaining.
*/
public Builder setExpectedBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
expected_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.SelfTestData.Sample)
}
// @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample)
private static final sentencepiece.SentencepieceModel.SelfTestData.Sample DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.SelfTestData.Sample();
}
public static sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public Sample parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.Sample getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
public static final int SAMPLES_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private java.util.List samples_;
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
@java.lang.Override
public java.util.List getSamplesList() {
return samples_;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
@java.lang.Override
public java.util.List extends sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>
getSamplesOrBuilderList() {
return samples_;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
@java.lang.Override
public int getSamplesCount() {
return samples_.size();
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index) {
return samples_.get(index);
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder(
int index) {
return samples_.get(index);
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
if (!extensionsAreInitialized()) {
memoizedIsInitialized = 0;
return false;
}
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
com.google.protobuf.GeneratedMessageV3
.ExtendableMessage.ExtensionWriter
extensionWriter = newExtensionWriter();
for (int i = 0; i < samples_.size(); i++) {
output.writeMessage(1, samples_.get(i));
}
extensionWriter.writeUntil(536870912, output);
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
for (int i = 0; i < samples_.size(); i++) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(1, samples_.get(i));
}
size += extensionsSerializedSize();
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.SelfTestData)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.SelfTestData other = (sentencepiece.SentencepieceModel.SelfTestData) obj;
if (!getSamplesList()
.equals(other.getSamplesList())) return false;
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
if (!getExtensionFields().equals(other.getExtensionFields()))
return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (getSamplesCount() > 0) {
hash = (37 * hash) + SAMPLES_FIELD_NUMBER;
hash = (53 * hash) + getSamplesList().hashCode();
}
hash = hashFields(hash, getExtensionFields());
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.SelfTestData parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.SelfTestData prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
*
* Proto to store samples for self-testing.
*
*
* Protobuf type {@code sentencepiece.SelfTestData}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<
sentencepiece.SentencepieceModel.SelfTestData, Builder> implements
// @@protoc_insertion_point(builder_implements:sentencepiece.SelfTestData)
sentencepiece.SentencepieceModel.SelfTestDataOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.SelfTestData.class, sentencepiece.SentencepieceModel.SelfTestData.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.SelfTestData.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
if (samplesBuilder_ == null) {
samples_ = java.util.Collections.emptyList();
} else {
samples_ = null;
samplesBuilder_.clear();
}
bitField0_ = (bitField0_ & ~0x00000001);
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_SelfTestData_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData build() {
sentencepiece.SentencepieceModel.SelfTestData result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData buildPartial() {
sentencepiece.SentencepieceModel.SelfTestData result = new sentencepiece.SentencepieceModel.SelfTestData(this);
buildPartialRepeatedFields(result);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartialRepeatedFields(sentencepiece.SentencepieceModel.SelfTestData result) {
if (samplesBuilder_ == null) {
if (((bitField0_ & 0x00000001) != 0)) {
samples_ = java.util.Collections.unmodifiableList(samples_);
bitField0_ = (bitField0_ & ~0x00000001);
}
result.samples_ = samples_;
} else {
result.samples_ = samplesBuilder_.build();
}
}
private void buildPartial0(sentencepiece.SentencepieceModel.SelfTestData result) {
int from_bitField0_ = bitField0_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.SelfTestData, Type> extension,
Type value) {
return super.setExtension(extension, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.SelfTestData, java.util.List> extension,
int index, Type value) {
return super.setExtension(extension, index, value);
}
@java.lang.Override
public Builder addExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.SelfTestData, java.util.List> extension,
Type value) {
return super.addExtension(extension, value);
}
@java.lang.Override
public Builder clearExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.SelfTestData, T> extension) {
return super.clearExtension(extension);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.SelfTestData) {
return mergeFrom((sentencepiece.SentencepieceModel.SelfTestData)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.SelfTestData other) {
if (other == sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance()) return this;
if (samplesBuilder_ == null) {
if (!other.samples_.isEmpty()) {
if (samples_.isEmpty()) {
samples_ = other.samples_;
bitField0_ = (bitField0_ & ~0x00000001);
} else {
ensureSamplesIsMutable();
samples_.addAll(other.samples_);
}
onChanged();
}
} else {
if (!other.samples_.isEmpty()) {
if (samplesBuilder_.isEmpty()) {
samplesBuilder_.dispose();
samplesBuilder_ = null;
samples_ = other.samples_;
bitField0_ = (bitField0_ & ~0x00000001);
samplesBuilder_ =
com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders ?
getSamplesFieldBuilder() : null;
} else {
samplesBuilder_.addAllMessages(other.samples_);
}
}
}
this.mergeExtensionFields(other);
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
if (!extensionsAreInitialized()) {
return false;
}
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
sentencepiece.SentencepieceModel.SelfTestData.Sample m =
input.readMessage(
sentencepiece.SentencepieceModel.SelfTestData.Sample.PARSER,
extensionRegistry);
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
samples_.add(m);
} else {
samplesBuilder_.addMessage(m);
}
break;
} // case 10
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private java.util.List samples_ =
java.util.Collections.emptyList();
private void ensureSamplesIsMutable() {
if (!((bitField0_ & 0x00000001) != 0)) {
samples_ = new java.util.ArrayList(samples_);
bitField0_ |= 0x00000001;
}
}
private com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder> samplesBuilder_;
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public java.util.List getSamplesList() {
if (samplesBuilder_ == null) {
return java.util.Collections.unmodifiableList(samples_);
} else {
return samplesBuilder_.getMessageList();
}
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public int getSamplesCount() {
if (samplesBuilder_ == null) {
return samples_.size();
} else {
return samplesBuilder_.getCount();
}
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public sentencepiece.SentencepieceModel.SelfTestData.Sample getSamples(int index) {
if (samplesBuilder_ == null) {
return samples_.get(index);
} else {
return samplesBuilder_.getMessage(index);
}
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder setSamples(
int index, sentencepiece.SentencepieceModel.SelfTestData.Sample value) {
if (samplesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensureSamplesIsMutable();
samples_.set(index, value);
onChanged();
} else {
samplesBuilder_.setMessage(index, value);
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder setSamples(
int index, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) {
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
samples_.set(index, builderForValue.build());
onChanged();
} else {
samplesBuilder_.setMessage(index, builderForValue.build());
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder addSamples(sentencepiece.SentencepieceModel.SelfTestData.Sample value) {
if (samplesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensureSamplesIsMutable();
samples_.add(value);
onChanged();
} else {
samplesBuilder_.addMessage(value);
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder addSamples(
int index, sentencepiece.SentencepieceModel.SelfTestData.Sample value) {
if (samplesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensureSamplesIsMutable();
samples_.add(index, value);
onChanged();
} else {
samplesBuilder_.addMessage(index, value);
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder addSamples(
sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) {
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
samples_.add(builderForValue.build());
onChanged();
} else {
samplesBuilder_.addMessage(builderForValue.build());
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder addSamples(
int index, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder builderForValue) {
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
samples_.add(index, builderForValue.build());
onChanged();
} else {
samplesBuilder_.addMessage(index, builderForValue.build());
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder addAllSamples(
java.lang.Iterable extends sentencepiece.SentencepieceModel.SelfTestData.Sample> values) {
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, samples_);
onChanged();
} else {
samplesBuilder_.addAllMessages(values);
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder clearSamples() {
if (samplesBuilder_ == null) {
samples_ = java.util.Collections.emptyList();
bitField0_ = (bitField0_ & ~0x00000001);
onChanged();
} else {
samplesBuilder_.clear();
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public Builder removeSamples(int index) {
if (samplesBuilder_ == null) {
ensureSamplesIsMutable();
samples_.remove(index);
onChanged();
} else {
samplesBuilder_.remove(index);
}
return this;
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder getSamplesBuilder(
int index) {
return getSamplesFieldBuilder().getBuilder(index);
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder getSamplesOrBuilder(
int index) {
if (samplesBuilder_ == null) {
return samples_.get(index); } else {
return samplesBuilder_.getMessageOrBuilder(index);
}
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public java.util.List extends sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>
getSamplesOrBuilderList() {
if (samplesBuilder_ != null) {
return samplesBuilder_.getMessageOrBuilderList();
} else {
return java.util.Collections.unmodifiableList(samples_);
}
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder addSamplesBuilder() {
return getSamplesFieldBuilder().addBuilder(
sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance());
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder addSamplesBuilder(
int index) {
return getSamplesFieldBuilder().addBuilder(
index, sentencepiece.SentencepieceModel.SelfTestData.Sample.getDefaultInstance());
}
/**
* repeated .sentencepiece.SelfTestData.Sample samples = 1;
*/
public java.util.List
getSamplesBuilderList() {
return getSamplesFieldBuilder().getBuilderList();
}
private com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>
getSamplesFieldBuilder() {
if (samplesBuilder_ == null) {
samplesBuilder_ = new com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData.Sample, sentencepiece.SentencepieceModel.SelfTestData.Sample.Builder, sentencepiece.SentencepieceModel.SelfTestData.SampleOrBuilder>(
samples_,
((bitField0_ & 0x00000001) != 0),
getParentForChildren(),
isClean());
samples_ = null;
}
return samplesBuilder_;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.SelfTestData)
}
// @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData)
private static final sentencepiece.SentencepieceModel.SelfTestData DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.SelfTestData();
}
public static sentencepiece.SentencepieceModel.SelfTestData getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public SelfTestData parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
public interface ModelProtoOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.ModelProto)
com.google.protobuf.GeneratedMessageV3.
ExtendableMessageOrBuilder {
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
java.util.List
getPiecesList();
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index);
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
int getPiecesCount();
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
java.util.List extends sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>
getPiecesOrBuilderList();
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder(
int index);
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return Whether the trainerSpec field is set.
*/
boolean hasTrainerSpec();
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return The trainerSpec.
*/
sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec();
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder();
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return Whether the normalizerSpec field is set.
*/
boolean hasNormalizerSpec();
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return The normalizerSpec.
*/
sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec();
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder();
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return Whether the selfTestData field is set.
*/
boolean hasSelfTestData();
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return The selfTestData.
*/
sentencepiece.SentencepieceModel.SelfTestData getSelfTestData();
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder();
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return Whether the denormalizerSpec field is set.
*/
boolean hasDenormalizerSpec();
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return The denormalizerSpec.
*/
sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec();
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder();
}
/**
*
* ModelProto stores model parameters.
* SentencePieceProcessor is supposed to be self-contained.
* All settings/parameters which may change the behavior must be encoded
* in ModelProto.
*
*
* Protobuf type {@code sentencepiece.ModelProto}
*/
public static final class ModelProto extends
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<
ModelProto> implements
// @@protoc_insertion_point(message_implements:sentencepiece.ModelProto)
ModelProtoOrBuilder {
private static final long serialVersionUID = 0L;
// Use ModelProto.newBuilder() to construct.
private ModelProto(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) {
super(builder);
}
private ModelProto() {
pieces_ = java.util.Collections.emptyList();
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new ModelProto();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.ModelProto.class, sentencepiece.SentencepieceModel.ModelProto.Builder.class);
}
public interface SentencePieceOrBuilder extends
// @@protoc_insertion_point(interface_extends:sentencepiece.ModelProto.SentencePiece)
com.google.protobuf.GeneratedMessageV3.
ExtendableMessageOrBuilder {
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return Whether the piece field is set.
*/
boolean hasPiece();
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The piece.
*/
java.lang.String getPiece();
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The bytes for piece.
*/
com.google.protobuf.ByteString
getPieceBytes();
/**
* optional float score = 2;
* @return Whether the score field is set.
*/
boolean hasScore();
/**
* optional float score = 2;
* @return The score.
*/
float getScore();
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return Whether the type field is set.
*/
boolean hasType();
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return The type.
*/
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType();
}
/**
* Protobuf type {@code sentencepiece.ModelProto.SentencePiece}
*/
public static final class SentencePiece extends
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<
SentencePiece> implements
// @@protoc_insertion_point(message_implements:sentencepiece.ModelProto.SentencePiece)
SentencePieceOrBuilder {
private static final long serialVersionUID = 0L;
// Use SentencePiece.newBuilder() to construct.
private SentencePiece(com.google.protobuf.GeneratedMessageV3.ExtendableBuilder builder) {
super(builder);
}
private SentencePiece() {
piece_ = "";
type_ = 1;
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new SentencePiece();
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.class, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder.class);
}
/**
* Protobuf enum {@code sentencepiece.ModelProto.SentencePiece.Type}
*/
public enum Type
implements com.google.protobuf.ProtocolMessageEnum {
/**
*
* normal symbol
*
*
* NORMAL = 1;
*/
NORMAL(1),
/**
*
* unknown symbol. only <unk> for now.
*
*
* UNKNOWN = 2;
*/
UNKNOWN(2),
/**
*
* control symbols. </s>, <s>, <2ja> etc.
*
*
* CONTROL = 3;
*/
CONTROL(3),
/**
*
* user defined symbols.
*
*
* USER_DEFINED = 4;
*/
USER_DEFINED(4),
/**
*
* Typical usage of USER_DEFINED symbol
* is placeholder.
*
*
* BYTE = 6;
*/
BYTE(6),
/**
*
* this piece is not used.
*
*
* UNUSED = 5;
*/
UNUSED(5),
;
/**
*
* normal symbol
*
*
* NORMAL = 1;
*/
public static final int NORMAL_VALUE = 1;
/**
*
* unknown symbol. only <unk> for now.
*
*
* UNKNOWN = 2;
*/
public static final int UNKNOWN_VALUE = 2;
/**
*
* control symbols. </s>, <s>, <2ja> etc.
*
*
* CONTROL = 3;
*/
public static final int CONTROL_VALUE = 3;
/**
*
* user defined symbols.
*
*
* USER_DEFINED = 4;
*/
public static final int USER_DEFINED_VALUE = 4;
/**
*
* Typical usage of USER_DEFINED symbol
* is placeholder.
*
*
* BYTE = 6;
*/
public static final int BYTE_VALUE = 6;
/**
*
* this piece is not used.
*
*
* UNUSED = 5;
*/
public static final int UNUSED_VALUE = 5;
public final int getNumber() {
return value;
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
* @deprecated Use {@link #forNumber(int)} instead.
*/
@java.lang.Deprecated
public static Type valueOf(int value) {
return forNumber(value);
}
/**
* @param value The numeric wire value of the corresponding enum entry.
* @return The enum associated with the given numeric wire value.
*/
public static Type forNumber(int value) {
switch (value) {
case 1: return NORMAL;
case 2: return UNKNOWN;
case 3: return CONTROL;
case 4: return USER_DEFINED;
case 6: return BYTE;
case 5: return UNUSED;
default: return null;
}
}
public static com.google.protobuf.Internal.EnumLiteMap
internalGetValueMap() {
return internalValueMap;
}
private static final com.google.protobuf.Internal.EnumLiteMap<
Type> internalValueMap =
new com.google.protobuf.Internal.EnumLiteMap() {
public Type findValueByNumber(int number) {
return Type.forNumber(number);
}
};
public final com.google.protobuf.Descriptors.EnumValueDescriptor
getValueDescriptor() {
return getDescriptor().getValues().get(ordinal());
}
public final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptorForType() {
return getDescriptor();
}
public static final com.google.protobuf.Descriptors.EnumDescriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDescriptor().getEnumTypes().get(0);
}
private static final Type[] VALUES = values();
public static Type valueOf(
com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
if (desc.getType() != getDescriptor()) {
throw new java.lang.IllegalArgumentException(
"EnumValueDescriptor is not for this type.");
}
return VALUES[desc.getIndex()];
}
private final int value;
private Type(int value) {
this.value = value;
}
// @@protoc_insertion_point(enum_scope:sentencepiece.ModelProto.SentencePiece.Type)
}
private int bitField0_;
public static final int PIECE_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private volatile java.lang.Object piece_ = "";
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return Whether the piece field is set.
*/
@java.lang.Override
public boolean hasPiece() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The piece.
*/
@java.lang.Override
public java.lang.String getPiece() {
java.lang.Object ref = piece_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
piece_ = s;
}
return s;
}
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The bytes for piece.
*/
@java.lang.Override
public com.google.protobuf.ByteString
getPieceBytes() {
java.lang.Object ref = piece_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
piece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int SCORE_FIELD_NUMBER = 2;
private float score_ = 0F;
/**
* optional float score = 2;
* @return Whether the score field is set.
*/
@java.lang.Override
public boolean hasScore() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
* optional float score = 2;
* @return The score.
*/
@java.lang.Override
public float getScore() {
return score_;
}
public static final int TYPE_FIELD_NUMBER = 3;
private int type_ = 1;
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return Whether the type field is set.
*/
@java.lang.Override public boolean hasType() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return The type.
*/
@java.lang.Override public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType() {
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type result = sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(type_);
return result == null ? sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.NORMAL : result;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
if (!extensionsAreInitialized()) {
memoizedIsInitialized = 0;
return false;
}
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
com.google.protobuf.GeneratedMessageV3
.ExtendableMessage.ExtensionWriter
extensionWriter = newExtensionWriter();
if (((bitField0_ & 0x00000001) != 0)) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 1, piece_);
}
if (((bitField0_ & 0x00000002) != 0)) {
output.writeFloat(2, score_);
}
if (((bitField0_ & 0x00000004) != 0)) {
output.writeEnum(3, type_);
}
extensionWriter.writeUntil(536870912, output);
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, piece_);
}
if (((bitField0_ & 0x00000002) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeFloatSize(2, score_);
}
if (((bitField0_ & 0x00000004) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeEnumSize(3, type_);
}
size += extensionsSerializedSize();
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.ModelProto.SentencePiece)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.ModelProto.SentencePiece other = (sentencepiece.SentencepieceModel.ModelProto.SentencePiece) obj;
if (hasPiece() != other.hasPiece()) return false;
if (hasPiece()) {
if (!getPiece()
.equals(other.getPiece())) return false;
}
if (hasScore() != other.hasScore()) return false;
if (hasScore()) {
if (java.lang.Float.floatToIntBits(getScore())
!= java.lang.Float.floatToIntBits(
other.getScore())) return false;
}
if (hasType() != other.hasType()) return false;
if (hasType()) {
if (type_ != other.type_) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
if (!getExtensionFields().equals(other.getExtensionFields()))
return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (hasPiece()) {
hash = (37 * hash) + PIECE_FIELD_NUMBER;
hash = (53 * hash) + getPiece().hashCode();
}
if (hasScore()) {
hash = (37 * hash) + SCORE_FIELD_NUMBER;
hash = (53 * hash) + java.lang.Float.floatToIntBits(
getScore());
}
if (hasType()) {
hash = (37 * hash) + TYPE_FIELD_NUMBER;
hash = (53 * hash) + type_;
}
hash = hashFields(hash, getExtensionFields());
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.ModelProto.SentencePiece prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* Protobuf type {@code sentencepiece.ModelProto.SentencePiece}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, Builder> implements
// @@protoc_insertion_point(builder_implements:sentencepiece.ModelProto.SentencePiece)
sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.class, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.ModelProto.SentencePiece.newBuilder()
private Builder() {
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
piece_ = "";
score_ = 0F;
type_ = 1;
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_SentencePiece_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece build() {
sentencepiece.SentencepieceModel.ModelProto.SentencePiece result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece buildPartial() {
sentencepiece.SentencepieceModel.ModelProto.SentencePiece result = new sentencepiece.SentencepieceModel.ModelProto.SentencePiece(this);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartial0(sentencepiece.SentencepieceModel.ModelProto.SentencePiece result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000001) != 0)) {
result.piece_ = piece_;
to_bitField0_ |= 0x00000001;
}
if (((from_bitField0_ & 0x00000002) != 0)) {
result.score_ = score_;
to_bitField0_ |= 0x00000002;
}
if (((from_bitField0_ & 0x00000004) != 0)) {
result.type_ = type_;
to_bitField0_ |= 0x00000004;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, Type> extension,
Type value) {
return super.setExtension(extension, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, java.util.List> extension,
int index, Type value) {
return super.setExtension(extension, index, value);
}
@java.lang.Override
public Builder addExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, java.util.List> extension,
Type value) {
return super.addExtension(extension, value);
}
@java.lang.Override
public Builder clearExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, T> extension) {
return super.clearExtension(extension);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.ModelProto.SentencePiece) {
return mergeFrom((sentencepiece.SentencepieceModel.ModelProto.SentencePiece)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.ModelProto.SentencePiece other) {
if (other == sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance()) return this;
if (other.hasPiece()) {
piece_ = other.piece_;
bitField0_ |= 0x00000001;
onChanged();
}
if (other.hasScore()) {
setScore(other.getScore());
}
if (other.hasType()) {
setType(other.getType());
}
this.mergeExtensionFields(other);
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
if (!extensionsAreInitialized()) {
return false;
}
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
piece_ = input.readBytes();
bitField0_ |= 0x00000001;
break;
} // case 10
case 21: {
score_ = input.readFloat();
bitField0_ |= 0x00000002;
break;
} // case 21
case 24: {
int tmpRaw = input.readEnum();
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type tmpValue =
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(tmpRaw);
if (tmpValue == null) {
mergeUnknownVarintField(3, tmpRaw);
} else {
type_ = tmpRaw;
bitField0_ |= 0x00000004;
}
break;
} // case 24
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private java.lang.Object piece_ = "";
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return Whether the piece field is set.
*/
public boolean hasPiece() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The piece.
*/
public java.lang.String getPiece() {
java.lang.Object ref = piece_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
if (bs.isValidUtf8()) {
piece_ = s;
}
return s;
} else {
return (java.lang.String) ref;
}
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return The bytes for piece.
*/
public com.google.protobuf.ByteString
getPieceBytes() {
java.lang.Object ref = piece_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
piece_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @param value The piece to set.
* @return This builder for chaining.
*/
public Builder setPiece(
java.lang.String value) {
if (value == null) { throw new NullPointerException(); }
piece_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @return This builder for chaining.
*/
public Builder clearPiece() {
piece_ = getDefaultInstance().getPiece();
bitField0_ = (bitField0_ & ~0x00000001);
onChanged();
return this;
}
/**
*
* piece must not be empty.
*
*
* optional string piece = 1;
* @param value The bytes for piece to set.
* @return This builder for chaining.
*/
public Builder setPieceBytes(
com.google.protobuf.ByteString value) {
if (value == null) { throw new NullPointerException(); }
piece_ = value;
bitField0_ |= 0x00000001;
onChanged();
return this;
}
private float score_ ;
/**
* optional float score = 2;
* @return Whether the score field is set.
*/
@java.lang.Override
public boolean hasScore() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
* optional float score = 2;
* @return The score.
*/
@java.lang.Override
public float getScore() {
return score_;
}
/**
* optional float score = 2;
* @param value The score to set.
* @return This builder for chaining.
*/
public Builder setScore(float value) {
score_ = value;
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
* optional float score = 2;
* @return This builder for chaining.
*/
public Builder clearScore() {
bitField0_ = (bitField0_ & ~0x00000002);
score_ = 0F;
onChanged();
return this;
}
private int type_ = 1;
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return Whether the type field is set.
*/
@java.lang.Override public boolean hasType() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return The type.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type getType() {
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type result = sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.forNumber(type_);
return result == null ? sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type.NORMAL : result;
}
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @param value The type to set.
* @return This builder for chaining.
*/
public Builder setType(sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Type value) {
if (value == null) {
throw new NullPointerException();
}
bitField0_ |= 0x00000004;
type_ = value.getNumber();
onChanged();
return this;
}
/**
* optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL];
* @return This builder for chaining.
*/
public Builder clearType() {
bitField0_ = (bitField0_ & ~0x00000004);
type_ = 1;
onChanged();
return this;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.ModelProto.SentencePiece)
}
// @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece)
private static final sentencepiece.SentencepieceModel.ModelProto.SentencePiece DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.ModelProto.SentencePiece();
}
public static sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public SentencePiece parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private int bitField0_;
public static final int PIECES_FIELD_NUMBER = 1;
@SuppressWarnings("serial")
private java.util.List pieces_;
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
@java.lang.Override
public java.util.List getPiecesList() {
return pieces_;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
@java.lang.Override
public java.util.List extends sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>
getPiecesOrBuilderList() {
return pieces_;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
@java.lang.Override
public int getPiecesCount() {
return pieces_.size();
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index) {
return pieces_.get(index);
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder(
int index) {
return pieces_.get(index);
}
public static final int TRAINER_SPEC_FIELD_NUMBER = 2;
private sentencepiece.SentencepieceModel.TrainerSpec trainerSpec_;
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return Whether the trainerSpec field is set.
*/
@java.lang.Override
public boolean hasTrainerSpec() {
return ((bitField0_ & 0x00000001) != 0);
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return The trainerSpec.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec() {
return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_;
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder() {
return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_;
}
public static final int NORMALIZER_SPEC_FIELD_NUMBER = 3;
private sentencepiece.SentencepieceModel.NormalizerSpec normalizerSpec_;
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return Whether the normalizerSpec field is set.
*/
@java.lang.Override
public boolean hasNormalizerSpec() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return The normalizerSpec.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec() {
return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_;
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder() {
return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_;
}
public static final int SELF_TEST_DATA_FIELD_NUMBER = 4;
private sentencepiece.SentencepieceModel.SelfTestData selfTestData_;
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return Whether the selfTestData field is set.
*/
@java.lang.Override
public boolean hasSelfTestData() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return The selfTestData.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestData getSelfTestData() {
return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_;
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder() {
return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_;
}
public static final int DENORMALIZER_SPEC_FIELD_NUMBER = 5;
private sentencepiece.SentencepieceModel.NormalizerSpec denormalizerSpec_;
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return Whether the denormalizerSpec field is set.
*/
@java.lang.Override
public boolean hasDenormalizerSpec() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return The denormalizerSpec.
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec() {
return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_;
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
@java.lang.Override
public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder() {
return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_;
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
for (int i = 0; i < getPiecesCount(); i++) {
if (!getPieces(i).isInitialized()) {
memoizedIsInitialized = 0;
return false;
}
}
if (hasTrainerSpec()) {
if (!getTrainerSpec().isInitialized()) {
memoizedIsInitialized = 0;
return false;
}
}
if (hasNormalizerSpec()) {
if (!getNormalizerSpec().isInitialized()) {
memoizedIsInitialized = 0;
return false;
}
}
if (hasSelfTestData()) {
if (!getSelfTestData().isInitialized()) {
memoizedIsInitialized = 0;
return false;
}
}
if (hasDenormalizerSpec()) {
if (!getDenormalizerSpec().isInitialized()) {
memoizedIsInitialized = 0;
return false;
}
}
if (!extensionsAreInitialized()) {
memoizedIsInitialized = 0;
return false;
}
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
com.google.protobuf.GeneratedMessageV3
.ExtendableMessage.ExtensionWriter
extensionWriter = newExtensionWriter();
for (int i = 0; i < pieces_.size(); i++) {
output.writeMessage(1, pieces_.get(i));
}
if (((bitField0_ & 0x00000001) != 0)) {
output.writeMessage(2, getTrainerSpec());
}
if (((bitField0_ & 0x00000002) != 0)) {
output.writeMessage(3, getNormalizerSpec());
}
if (((bitField0_ & 0x00000004) != 0)) {
output.writeMessage(4, getSelfTestData());
}
if (((bitField0_ & 0x00000008) != 0)) {
output.writeMessage(5, getDenormalizerSpec());
}
extensionWriter.writeUntil(536870912, output);
getUnknownFields().writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
for (int i = 0; i < pieces_.size(); i++) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(1, pieces_.get(i));
}
if (((bitField0_ & 0x00000001) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(2, getTrainerSpec());
}
if (((bitField0_ & 0x00000002) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(3, getNormalizerSpec());
}
if (((bitField0_ & 0x00000004) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(4, getSelfTestData());
}
if (((bitField0_ & 0x00000008) != 0)) {
size += com.google.protobuf.CodedOutputStream
.computeMessageSize(5, getDenormalizerSpec());
}
size += extensionsSerializedSize();
size += getUnknownFields().getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof sentencepiece.SentencepieceModel.ModelProto)) {
return super.equals(obj);
}
sentencepiece.SentencepieceModel.ModelProto other = (sentencepiece.SentencepieceModel.ModelProto) obj;
if (!getPiecesList()
.equals(other.getPiecesList())) return false;
if (hasTrainerSpec() != other.hasTrainerSpec()) return false;
if (hasTrainerSpec()) {
if (!getTrainerSpec()
.equals(other.getTrainerSpec())) return false;
}
if (hasNormalizerSpec() != other.hasNormalizerSpec()) return false;
if (hasNormalizerSpec()) {
if (!getNormalizerSpec()
.equals(other.getNormalizerSpec())) return false;
}
if (hasSelfTestData() != other.hasSelfTestData()) return false;
if (hasSelfTestData()) {
if (!getSelfTestData()
.equals(other.getSelfTestData())) return false;
}
if (hasDenormalizerSpec() != other.hasDenormalizerSpec()) return false;
if (hasDenormalizerSpec()) {
if (!getDenormalizerSpec()
.equals(other.getDenormalizerSpec())) return false;
}
if (!getUnknownFields().equals(other.getUnknownFields())) return false;
if (!getExtensionFields().equals(other.getExtensionFields()))
return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
if (getPiecesCount() > 0) {
hash = (37 * hash) + PIECES_FIELD_NUMBER;
hash = (53 * hash) + getPiecesList().hashCode();
}
if (hasTrainerSpec()) {
hash = (37 * hash) + TRAINER_SPEC_FIELD_NUMBER;
hash = (53 * hash) + getTrainerSpec().hashCode();
}
if (hasNormalizerSpec()) {
hash = (37 * hash) + NORMALIZER_SPEC_FIELD_NUMBER;
hash = (53 * hash) + getNormalizerSpec().hashCode();
}
if (hasSelfTestData()) {
hash = (37 * hash) + SELF_TEST_DATA_FIELD_NUMBER;
hash = (53 * hash) + getSelfTestData().hashCode();
}
if (hasDenormalizerSpec()) {
hash = (37 * hash) + DENORMALIZER_SPEC_FIELD_NUMBER;
hash = (53 * hash) + getDenormalizerSpec().hashCode();
}
hash = hashFields(hash, getExtensionFields());
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static sentencepiece.SentencepieceModel.ModelProto parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(sentencepiece.SentencepieceModel.ModelProto prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
*
* ModelProto stores model parameters.
* SentencePieceProcessor is supposed to be self-contained.
* All settings/parameters which may change the behavior must be encoded
* in ModelProto.
*
*
* Protobuf type {@code sentencepiece.ModelProto}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<
sentencepiece.SentencepieceModel.ModelProto, Builder> implements
// @@protoc_insertion_point(builder_implements:sentencepiece.ModelProto)
sentencepiece.SentencepieceModel.ModelProtoOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_fieldAccessorTable
.ensureFieldAccessorsInitialized(
sentencepiece.SentencepieceModel.ModelProto.class, sentencepiece.SentencepieceModel.ModelProto.Builder.class);
}
// Construct using sentencepiece.SentencepieceModel.ModelProto.newBuilder()
private Builder() {
maybeForceBuilderInitialization();
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
maybeForceBuilderInitialization();
}
private void maybeForceBuilderInitialization() {
if (com.google.protobuf.GeneratedMessageV3
.alwaysUseFieldBuilders) {
getPiecesFieldBuilder();
getTrainerSpecFieldBuilder();
getNormalizerSpecFieldBuilder();
getSelfTestDataFieldBuilder();
getDenormalizerSpecFieldBuilder();
}
}
@java.lang.Override
public Builder clear() {
super.clear();
bitField0_ = 0;
if (piecesBuilder_ == null) {
pieces_ = java.util.Collections.emptyList();
} else {
pieces_ = null;
piecesBuilder_.clear();
}
bitField0_ = (bitField0_ & ~0x00000001);
trainerSpec_ = null;
if (trainerSpecBuilder_ != null) {
trainerSpecBuilder_.dispose();
trainerSpecBuilder_ = null;
}
normalizerSpec_ = null;
if (normalizerSpecBuilder_ != null) {
normalizerSpecBuilder_.dispose();
normalizerSpecBuilder_ = null;
}
selfTestData_ = null;
if (selfTestDataBuilder_ != null) {
selfTestDataBuilder_.dispose();
selfTestDataBuilder_ = null;
}
denormalizerSpec_ = null;
if (denormalizerSpecBuilder_ != null) {
denormalizerSpecBuilder_.dispose();
denormalizerSpecBuilder_ = null;
}
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return sentencepiece.SentencepieceModel.internal_static_sentencepiece_ModelProto_descriptor;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto getDefaultInstanceForType() {
return sentencepiece.SentencepieceModel.ModelProto.getDefaultInstance();
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto build() {
sentencepiece.SentencepieceModel.ModelProto result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto buildPartial() {
sentencepiece.SentencepieceModel.ModelProto result = new sentencepiece.SentencepieceModel.ModelProto(this);
buildPartialRepeatedFields(result);
if (bitField0_ != 0) { buildPartial0(result); }
onBuilt();
return result;
}
private void buildPartialRepeatedFields(sentencepiece.SentencepieceModel.ModelProto result) {
if (piecesBuilder_ == null) {
if (((bitField0_ & 0x00000001) != 0)) {
pieces_ = java.util.Collections.unmodifiableList(pieces_);
bitField0_ = (bitField0_ & ~0x00000001);
}
result.pieces_ = pieces_;
} else {
result.pieces_ = piecesBuilder_.build();
}
}
private void buildPartial0(sentencepiece.SentencepieceModel.ModelProto result) {
int from_bitField0_ = bitField0_;
int to_bitField0_ = 0;
if (((from_bitField0_ & 0x00000002) != 0)) {
result.trainerSpec_ = trainerSpecBuilder_ == null
? trainerSpec_
: trainerSpecBuilder_.build();
to_bitField0_ |= 0x00000001;
}
if (((from_bitField0_ & 0x00000004) != 0)) {
result.normalizerSpec_ = normalizerSpecBuilder_ == null
? normalizerSpec_
: normalizerSpecBuilder_.build();
to_bitField0_ |= 0x00000002;
}
if (((from_bitField0_ & 0x00000008) != 0)) {
result.selfTestData_ = selfTestDataBuilder_ == null
? selfTestData_
: selfTestDataBuilder_.build();
to_bitField0_ |= 0x00000004;
}
if (((from_bitField0_ & 0x00000010) != 0)) {
result.denormalizerSpec_ = denormalizerSpecBuilder_ == null
? denormalizerSpec_
: denormalizerSpecBuilder_.build();
to_bitField0_ |= 0x00000008;
}
result.bitField0_ |= to_bitField0_;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto, Type> extension,
Type value) {
return super.setExtension(extension, value);
}
@java.lang.Override
public Builder setExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto, java.util.List> extension,
int index, Type value) {
return super.setExtension(extension, index, value);
}
@java.lang.Override
public Builder addExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto, java.util.List> extension,
Type value) {
return super.addExtension(extension, value);
}
@java.lang.Override
public Builder clearExtension(
com.google.protobuf.GeneratedMessage.GeneratedExtension<
sentencepiece.SentencepieceModel.ModelProto, T> extension) {
return super.clearExtension(extension);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof sentencepiece.SentencepieceModel.ModelProto) {
return mergeFrom((sentencepiece.SentencepieceModel.ModelProto)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(sentencepiece.SentencepieceModel.ModelProto other) {
if (other == sentencepiece.SentencepieceModel.ModelProto.getDefaultInstance()) return this;
if (piecesBuilder_ == null) {
if (!other.pieces_.isEmpty()) {
if (pieces_.isEmpty()) {
pieces_ = other.pieces_;
bitField0_ = (bitField0_ & ~0x00000001);
} else {
ensurePiecesIsMutable();
pieces_.addAll(other.pieces_);
}
onChanged();
}
} else {
if (!other.pieces_.isEmpty()) {
if (piecesBuilder_.isEmpty()) {
piecesBuilder_.dispose();
piecesBuilder_ = null;
pieces_ = other.pieces_;
bitField0_ = (bitField0_ & ~0x00000001);
piecesBuilder_ =
com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders ?
getPiecesFieldBuilder() : null;
} else {
piecesBuilder_.addAllMessages(other.pieces_);
}
}
}
if (other.hasTrainerSpec()) {
mergeTrainerSpec(other.getTrainerSpec());
}
if (other.hasNormalizerSpec()) {
mergeNormalizerSpec(other.getNormalizerSpec());
}
if (other.hasSelfTestData()) {
mergeSelfTestData(other.getSelfTestData());
}
if (other.hasDenormalizerSpec()) {
mergeDenormalizerSpec(other.getDenormalizerSpec());
}
this.mergeExtensionFields(other);
this.mergeUnknownFields(other.getUnknownFields());
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
for (int i = 0; i < getPiecesCount(); i++) {
if (!getPieces(i).isInitialized()) {
return false;
}
}
if (hasTrainerSpec()) {
if (!getTrainerSpec().isInitialized()) {
return false;
}
}
if (hasNormalizerSpec()) {
if (!getNormalizerSpec().isInitialized()) {
return false;
}
}
if (hasSelfTestData()) {
if (!getSelfTestData().isInitialized()) {
return false;
}
}
if (hasDenormalizerSpec()) {
if (!getDenormalizerSpec().isInitialized()) {
return false;
}
}
if (!extensionsAreInitialized()) {
return false;
}
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
sentencepiece.SentencepieceModel.ModelProto.SentencePiece m =
input.readMessage(
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.PARSER,
extensionRegistry);
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
pieces_.add(m);
} else {
piecesBuilder_.addMessage(m);
}
break;
} // case 10
case 18: {
input.readMessage(
getTrainerSpecFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000002;
break;
} // case 18
case 26: {
input.readMessage(
getNormalizerSpecFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000004;
break;
} // case 26
case 34: {
input.readMessage(
getSelfTestDataFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000008;
break;
} // case 34
case 42: {
input.readMessage(
getDenormalizerSpecFieldBuilder().getBuilder(),
extensionRegistry);
bitField0_ |= 0x00000010;
break;
} // case 42
default: {
if (!super.parseUnknownField(input, extensionRegistry, tag)) {
done = true; // was an endgroup tag
}
break;
} // default:
} // switch (tag)
} // while (!done)
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.unwrapIOException();
} finally {
onChanged();
} // finally
return this;
}
private int bitField0_;
private java.util.List pieces_ =
java.util.Collections.emptyList();
private void ensurePiecesIsMutable() {
if (!((bitField0_ & 0x00000001) != 0)) {
pieces_ = new java.util.ArrayList(pieces_);
bitField0_ |= 0x00000001;
}
}
private com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder> piecesBuilder_;
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public java.util.List getPiecesList() {
if (piecesBuilder_ == null) {
return java.util.Collections.unmodifiableList(pieces_);
} else {
return piecesBuilder_.getMessageList();
}
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public int getPiecesCount() {
if (piecesBuilder_ == null) {
return pieces_.size();
} else {
return piecesBuilder_.getCount();
}
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece getPieces(int index) {
if (piecesBuilder_ == null) {
return pieces_.get(index);
} else {
return piecesBuilder_.getMessage(index);
}
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder setPieces(
int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) {
if (piecesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensurePiecesIsMutable();
pieces_.set(index, value);
onChanged();
} else {
piecesBuilder_.setMessage(index, value);
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder setPieces(
int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) {
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
pieces_.set(index, builderForValue.build());
onChanged();
} else {
piecesBuilder_.setMessage(index, builderForValue.build());
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder addPieces(sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) {
if (piecesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensurePiecesIsMutable();
pieces_.add(value);
onChanged();
} else {
piecesBuilder_.addMessage(value);
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder addPieces(
int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece value) {
if (piecesBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
ensurePiecesIsMutable();
pieces_.add(index, value);
onChanged();
} else {
piecesBuilder_.addMessage(index, value);
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder addPieces(
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) {
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
pieces_.add(builderForValue.build());
onChanged();
} else {
piecesBuilder_.addMessage(builderForValue.build());
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder addPieces(
int index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder builderForValue) {
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
pieces_.add(index, builderForValue.build());
onChanged();
} else {
piecesBuilder_.addMessage(index, builderForValue.build());
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder addAllPieces(
java.lang.Iterable extends sentencepiece.SentencepieceModel.ModelProto.SentencePiece> values) {
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
com.google.protobuf.AbstractMessageLite.Builder.addAll(
values, pieces_);
onChanged();
} else {
piecesBuilder_.addAllMessages(values);
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder clearPieces() {
if (piecesBuilder_ == null) {
pieces_ = java.util.Collections.emptyList();
bitField0_ = (bitField0_ & ~0x00000001);
onChanged();
} else {
piecesBuilder_.clear();
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public Builder removePieces(int index) {
if (piecesBuilder_ == null) {
ensurePiecesIsMutable();
pieces_.remove(index);
onChanged();
} else {
piecesBuilder_.remove(index);
}
return this;
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder getPiecesBuilder(
int index) {
return getPiecesFieldBuilder().getBuilder(index);
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder getPiecesOrBuilder(
int index) {
if (piecesBuilder_ == null) {
return pieces_.get(index); } else {
return piecesBuilder_.getMessageOrBuilder(index);
}
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public java.util.List extends sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>
getPiecesOrBuilderList() {
if (piecesBuilder_ != null) {
return piecesBuilder_.getMessageOrBuilderList();
} else {
return java.util.Collections.unmodifiableList(pieces_);
}
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder addPiecesBuilder() {
return getPiecesFieldBuilder().addBuilder(
sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance());
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder addPiecesBuilder(
int index) {
return getPiecesFieldBuilder().addBuilder(
index, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.getDefaultInstance());
}
/**
*
* Sentence pieces with scores.
*
*
* repeated .sentencepiece.ModelProto.SentencePiece pieces = 1;
*/
public java.util.List
getPiecesBuilderList() {
return getPiecesFieldBuilder().getBuilderList();
}
private com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>
getPiecesFieldBuilder() {
if (piecesBuilder_ == null) {
piecesBuilder_ = new com.google.protobuf.RepeatedFieldBuilderV3<
sentencepiece.SentencepieceModel.ModelProto.SentencePiece, sentencepiece.SentencepieceModel.ModelProto.SentencePiece.Builder, sentencepiece.SentencepieceModel.ModelProto.SentencePieceOrBuilder>(
pieces_,
((bitField0_ & 0x00000001) != 0),
getParentForChildren(),
isClean());
pieces_ = null;
}
return piecesBuilder_;
}
private sentencepiece.SentencepieceModel.TrainerSpec trainerSpec_;
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder> trainerSpecBuilder_;
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return Whether the trainerSpec field is set.
*/
public boolean hasTrainerSpec() {
return ((bitField0_ & 0x00000002) != 0);
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
* @return The trainerSpec.
*/
public sentencepiece.SentencepieceModel.TrainerSpec getTrainerSpec() {
if (trainerSpecBuilder_ == null) {
return trainerSpec_ == null ? sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_;
} else {
return trainerSpecBuilder_.getMessage();
}
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public Builder setTrainerSpec(sentencepiece.SentencepieceModel.TrainerSpec value) {
if (trainerSpecBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
trainerSpec_ = value;
} else {
trainerSpecBuilder_.setMessage(value);
}
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public Builder setTrainerSpec(
sentencepiece.SentencepieceModel.TrainerSpec.Builder builderForValue) {
if (trainerSpecBuilder_ == null) {
trainerSpec_ = builderForValue.build();
} else {
trainerSpecBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000002;
onChanged();
return this;
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public Builder mergeTrainerSpec(sentencepiece.SentencepieceModel.TrainerSpec value) {
if (trainerSpecBuilder_ == null) {
if (((bitField0_ & 0x00000002) != 0) &&
trainerSpec_ != null &&
trainerSpec_ != sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance()) {
getTrainerSpecBuilder().mergeFrom(value);
} else {
trainerSpec_ = value;
}
} else {
trainerSpecBuilder_.mergeFrom(value);
}
if (trainerSpec_ != null) {
bitField0_ |= 0x00000002;
onChanged();
}
return this;
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public Builder clearTrainerSpec() {
bitField0_ = (bitField0_ & ~0x00000002);
trainerSpec_ = null;
if (trainerSpecBuilder_ != null) {
trainerSpecBuilder_.dispose();
trainerSpecBuilder_ = null;
}
onChanged();
return this;
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public sentencepiece.SentencepieceModel.TrainerSpec.Builder getTrainerSpecBuilder() {
bitField0_ |= 0x00000002;
onChanged();
return getTrainerSpecFieldBuilder().getBuilder();
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
public sentencepiece.SentencepieceModel.TrainerSpecOrBuilder getTrainerSpecOrBuilder() {
if (trainerSpecBuilder_ != null) {
return trainerSpecBuilder_.getMessageOrBuilder();
} else {
return trainerSpec_ == null ?
sentencepiece.SentencepieceModel.TrainerSpec.getDefaultInstance() : trainerSpec_;
}
}
/**
*
* Spec used to generate this model file.
*
*
* optional .sentencepiece.TrainerSpec trainer_spec = 2;
*/
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder>
getTrainerSpecFieldBuilder() {
if (trainerSpecBuilder_ == null) {
trainerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.TrainerSpec, sentencepiece.SentencepieceModel.TrainerSpec.Builder, sentencepiece.SentencepieceModel.TrainerSpecOrBuilder>(
getTrainerSpec(),
getParentForChildren(),
isClean());
trainerSpec_ = null;
}
return trainerSpecBuilder_;
}
private sentencepiece.SentencepieceModel.NormalizerSpec normalizerSpec_;
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> normalizerSpecBuilder_;
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return Whether the normalizerSpec field is set.
*/
public boolean hasNormalizerSpec() {
return ((bitField0_ & 0x00000004) != 0);
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
* @return The normalizerSpec.
*/
public sentencepiece.SentencepieceModel.NormalizerSpec getNormalizerSpec() {
if (normalizerSpecBuilder_ == null) {
return normalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_;
} else {
return normalizerSpecBuilder_.getMessage();
}
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public Builder setNormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) {
if (normalizerSpecBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
normalizerSpec_ = value;
} else {
normalizerSpecBuilder_.setMessage(value);
}
bitField0_ |= 0x00000004;
onChanged();
return this;
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public Builder setNormalizerSpec(
sentencepiece.SentencepieceModel.NormalizerSpec.Builder builderForValue) {
if (normalizerSpecBuilder_ == null) {
normalizerSpec_ = builderForValue.build();
} else {
normalizerSpecBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000004;
onChanged();
return this;
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public Builder mergeNormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) {
if (normalizerSpecBuilder_ == null) {
if (((bitField0_ & 0x00000004) != 0) &&
normalizerSpec_ != null &&
normalizerSpec_ != sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) {
getNormalizerSpecBuilder().mergeFrom(value);
} else {
normalizerSpec_ = value;
}
} else {
normalizerSpecBuilder_.mergeFrom(value);
}
if (normalizerSpec_ != null) {
bitField0_ |= 0x00000004;
onChanged();
}
return this;
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public Builder clearNormalizerSpec() {
bitField0_ = (bitField0_ & ~0x00000004);
normalizerSpec_ = null;
if (normalizerSpecBuilder_ != null) {
normalizerSpecBuilder_.dispose();
normalizerSpecBuilder_ = null;
}
onChanged();
return this;
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public sentencepiece.SentencepieceModel.NormalizerSpec.Builder getNormalizerSpecBuilder() {
bitField0_ |= 0x00000004;
onChanged();
return getNormalizerSpecFieldBuilder().getBuilder();
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getNormalizerSpecOrBuilder() {
if (normalizerSpecBuilder_ != null) {
return normalizerSpecBuilder_.getMessageOrBuilder();
} else {
return normalizerSpec_ == null ?
sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : normalizerSpec_;
}
}
/**
*
* Spec for text normalization.
*
*
* optional .sentencepiece.NormalizerSpec normalizer_spec = 3;
*/
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>
getNormalizerSpecFieldBuilder() {
if (normalizerSpecBuilder_ == null) {
normalizerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>(
getNormalizerSpec(),
getParentForChildren(),
isClean());
normalizerSpec_ = null;
}
return normalizerSpecBuilder_;
}
private sentencepiece.SentencepieceModel.SelfTestData selfTestData_;
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder> selfTestDataBuilder_;
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return Whether the selfTestData field is set.
*/
public boolean hasSelfTestData() {
return ((bitField0_ & 0x00000008) != 0);
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
* @return The selfTestData.
*/
public sentencepiece.SentencepieceModel.SelfTestData getSelfTestData() {
if (selfTestDataBuilder_ == null) {
return selfTestData_ == null ? sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_;
} else {
return selfTestDataBuilder_.getMessage();
}
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public Builder setSelfTestData(sentencepiece.SentencepieceModel.SelfTestData value) {
if (selfTestDataBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
selfTestData_ = value;
} else {
selfTestDataBuilder_.setMessage(value);
}
bitField0_ |= 0x00000008;
onChanged();
return this;
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public Builder setSelfTestData(
sentencepiece.SentencepieceModel.SelfTestData.Builder builderForValue) {
if (selfTestDataBuilder_ == null) {
selfTestData_ = builderForValue.build();
} else {
selfTestDataBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000008;
onChanged();
return this;
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public Builder mergeSelfTestData(sentencepiece.SentencepieceModel.SelfTestData value) {
if (selfTestDataBuilder_ == null) {
if (((bitField0_ & 0x00000008) != 0) &&
selfTestData_ != null &&
selfTestData_ != sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance()) {
getSelfTestDataBuilder().mergeFrom(value);
} else {
selfTestData_ = value;
}
} else {
selfTestDataBuilder_.mergeFrom(value);
}
if (selfTestData_ != null) {
bitField0_ |= 0x00000008;
onChanged();
}
return this;
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public Builder clearSelfTestData() {
bitField0_ = (bitField0_ & ~0x00000008);
selfTestData_ = null;
if (selfTestDataBuilder_ != null) {
selfTestDataBuilder_.dispose();
selfTestDataBuilder_ = null;
}
onChanged();
return this;
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public sentencepiece.SentencepieceModel.SelfTestData.Builder getSelfTestDataBuilder() {
bitField0_ |= 0x00000008;
onChanged();
return getSelfTestDataFieldBuilder().getBuilder();
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
public sentencepiece.SentencepieceModel.SelfTestDataOrBuilder getSelfTestDataOrBuilder() {
if (selfTestDataBuilder_ != null) {
return selfTestDataBuilder_.getMessageOrBuilder();
} else {
return selfTestData_ == null ?
sentencepiece.SentencepieceModel.SelfTestData.getDefaultInstance() : selfTestData_;
}
}
/**
*
* Stores sample input and its expected segmentation to verify the model.
*
*
* optional .sentencepiece.SelfTestData self_test_data = 4;
*/
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder>
getSelfTestDataFieldBuilder() {
if (selfTestDataBuilder_ == null) {
selfTestDataBuilder_ = new com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.SelfTestData, sentencepiece.SentencepieceModel.SelfTestData.Builder, sentencepiece.SentencepieceModel.SelfTestDataOrBuilder>(
getSelfTestData(),
getParentForChildren(),
isClean());
selfTestData_ = null;
}
return selfTestDataBuilder_;
}
private sentencepiece.SentencepieceModel.NormalizerSpec denormalizerSpec_;
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder> denormalizerSpecBuilder_;
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return Whether the denormalizerSpec field is set.
*/
public boolean hasDenormalizerSpec() {
return ((bitField0_ & 0x00000010) != 0);
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
* @return The denormalizerSpec.
*/
public sentencepiece.SentencepieceModel.NormalizerSpec getDenormalizerSpec() {
if (denormalizerSpecBuilder_ == null) {
return denormalizerSpec_ == null ? sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_;
} else {
return denormalizerSpecBuilder_.getMessage();
}
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public Builder setDenormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) {
if (denormalizerSpecBuilder_ == null) {
if (value == null) {
throw new NullPointerException();
}
denormalizerSpec_ = value;
} else {
denormalizerSpecBuilder_.setMessage(value);
}
bitField0_ |= 0x00000010;
onChanged();
return this;
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public Builder setDenormalizerSpec(
sentencepiece.SentencepieceModel.NormalizerSpec.Builder builderForValue) {
if (denormalizerSpecBuilder_ == null) {
denormalizerSpec_ = builderForValue.build();
} else {
denormalizerSpecBuilder_.setMessage(builderForValue.build());
}
bitField0_ |= 0x00000010;
onChanged();
return this;
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public Builder mergeDenormalizerSpec(sentencepiece.SentencepieceModel.NormalizerSpec value) {
if (denormalizerSpecBuilder_ == null) {
if (((bitField0_ & 0x00000010) != 0) &&
denormalizerSpec_ != null &&
denormalizerSpec_ != sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance()) {
getDenormalizerSpecBuilder().mergeFrom(value);
} else {
denormalizerSpec_ = value;
}
} else {
denormalizerSpecBuilder_.mergeFrom(value);
}
if (denormalizerSpec_ != null) {
bitField0_ |= 0x00000010;
onChanged();
}
return this;
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public Builder clearDenormalizerSpec() {
bitField0_ = (bitField0_ & ~0x00000010);
denormalizerSpec_ = null;
if (denormalizerSpecBuilder_ != null) {
denormalizerSpecBuilder_.dispose();
denormalizerSpecBuilder_ = null;
}
onChanged();
return this;
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public sentencepiece.SentencepieceModel.NormalizerSpec.Builder getDenormalizerSpecBuilder() {
bitField0_ |= 0x00000010;
onChanged();
return getDenormalizerSpecFieldBuilder().getBuilder();
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
public sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder getDenormalizerSpecOrBuilder() {
if (denormalizerSpecBuilder_ != null) {
return denormalizerSpecBuilder_.getMessageOrBuilder();
} else {
return denormalizerSpec_ == null ?
sentencepiece.SentencepieceModel.NormalizerSpec.getDefaultInstance() : denormalizerSpec_;
}
}
/**
*
* Spec for text de-normalization.
*
*
* optional .sentencepiece.NormalizerSpec denormalizer_spec = 5;
*/
private com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>
getDenormalizerSpecFieldBuilder() {
if (denormalizerSpecBuilder_ == null) {
denormalizerSpecBuilder_ = new com.google.protobuf.SingleFieldBuilderV3<
sentencepiece.SentencepieceModel.NormalizerSpec, sentencepiece.SentencepieceModel.NormalizerSpec.Builder, sentencepiece.SentencepieceModel.NormalizerSpecOrBuilder>(
getDenormalizerSpec(),
getParentForChildren(),
isClean());
denormalizerSpec_ = null;
}
return denormalizerSpecBuilder_;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:sentencepiece.ModelProto)
}
// @@protoc_insertion_point(class_scope:sentencepiece.ModelProto)
private static final sentencepiece.SentencepieceModel.ModelProto DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new sentencepiece.SentencepieceModel.ModelProto();
}
public static sentencepiece.SentencepieceModel.ModelProto getDefaultInstance() {
return DEFAULT_INSTANCE;
}
@java.lang.Deprecated public static final com.google.protobuf.Parser
PARSER = new com.google.protobuf.AbstractParser() {
@java.lang.Override
public ModelProto parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
Builder builder = newBuilder();
try {
builder.mergeFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(builder.buildPartial());
} catch (com.google.protobuf.UninitializedMessageException e) {
throw e.asInvalidProtocolBufferException().setUnfinishedMessage(builder.buildPartial());
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(e)
.setUnfinishedMessage(builder.buildPartial());
}
return builder.buildPartial();
}
};
public static com.google.protobuf.Parser parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser getParserForType() {
return PARSER;
}
@java.lang.Override
public sentencepiece.SentencepieceModel.ModelProto getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_TrainerSpec_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_TrainerSpec_fieldAccessorTable;
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_NormalizerSpec_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable;
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_SelfTestData_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_SelfTestData_fieldAccessorTable;
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_SelfTestData_Sample_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable;
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_ModelProto_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_ModelProto_fieldAccessorTable;
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_sentencepiece_ModelProto_SentencePiece_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\031sentencepiece_model.proto\022\rsentencepie" +
"ce\"\316\n\n\013TrainerSpec\022\r\n\005input\030\001 \003(\t\022\024\n\014inp" +
"ut_format\030\007 \001(\t\022\024\n\014model_prefix\030\002 \001(\t\022A\n" +
"\nmodel_type\030\003 \001(\0162$.sentencepiece.Traine" +
"rSpec.ModelType:\007UNIGRAM\022\030\n\nvocab_size\030\004" +
" \001(\005:\0048000\022\027\n\017accept_language\030\005 \003(\t\022 \n\025s" +
"elf_test_sample_size\030\006 \001(\005:\0010\022\"\n\022charact" +
"er_coverage\030\n \001(\002:\0060.9995\022\036\n\023input_sente" +
"nce_size\030\013 \001(\004:\0010\022$\n\026shuffle_input_sente" +
"nce\030\023 \001(\010:\004true\022 \n\024mining_sentence_size\030" +
"\014 \001(\005B\002\030\001\022\"\n\026training_sentence_size\030\r \001(" +
"\005B\002\030\001\022(\n\027seed_sentencepiece_size\030\016 \001(\005:\007" +
"1000000\022\036\n\020shrinking_factor\030\017 \001(\002:\0040.75\022" +
"!\n\023max_sentence_length\030\022 \001(\005:\0044192\022\027\n\013nu" +
"m_threads\030\020 \001(\005:\00216\022\035\n\022num_sub_iteration" +
"s\030\021 \001(\005:\0012\022$\n\030max_sentencepiece_length\030\024" +
" \001(\005:\00216\022%\n\027split_by_unicode_script\030\025 \001(" +
"\010:\004true\022\035\n\017split_by_number\030\027 \001(\010:\004true\022!" +
"\n\023split_by_whitespace\030\026 \001(\010:\004true\022)\n\032tre" +
"at_whitespace_as_suffix\030\030 \001(\010:\005false\022+\n\034" +
"allow_whitespace_only_pieces\030\032 \001(\010:\005fals" +
"e\022\033\n\014split_digits\030\031 \001(\010:\005false\022\027\n\017contro" +
"l_symbols\030\036 \003(\t\022\034\n\024user_defined_symbols\030" +
"\037 \003(\t\022\026\n\016required_chars\030$ \001(\t\022\034\n\rbyte_fa" +
"llback\030# \001(\010:\005false\022+\n\035vocabulary_output" +
"_piece_score\030 \001(\010:\004true\022\036\n\020hard_vocab_l" +
"imit\030! \001(\010:\004true\022\034\n\ruse_all_vocab\030\" \001(\010:" +
"\005false\022\021\n\006unk_id\030( \001(\005:\0010\022\021\n\006bos_id\030) \001(" +
"\005:\0011\022\021\n\006eos_id\030* \001(\005:\0012\022\022\n\006pad_id\030+ \001(\005:" +
"\002-1\022\030\n\tunk_piece\030- \001(\t:\005\022\026\n\tbos_pie" +
"ce\030. \001(\t:\003\022\027\n\teos_piece\030/ \001(\t:\004\022\030" +
"\n\tpad_piece\0300 \001(\t:\005\022\032\n\013unk_surface\030" +
", \001(\t:\005 \342\201\207 \022+\n\034train_extremely_large_co" +
"rpus\0301 \001(\010:\005false\"5\n\tModelType\022\013\n\007UNIGRA" +
"M\020\001\022\007\n\003BPE\020\002\022\010\n\004WORD\020\003\022\010\n\004CHAR\020\004*\t\010\310\001\020\200\200" +
"\200\200\002\"\321\001\n\016NormalizerSpec\022\014\n\004name\030\001 \001(\t\022\034\n\024" +
"precompiled_charsmap\030\002 \001(\014\022\036\n\020add_dummy_" +
"prefix\030\003 \001(\010:\004true\022&\n\030remove_extra_white" +
"spaces\030\004 \001(\010:\004true\022 \n\022escape_whitespaces" +
"\030\005 \001(\010:\004true\022\036\n\026normalization_rule_tsv\030\006" +
" \001(\t*\t\010\310\001\020\200\200\200\200\002\"y\n\014SelfTestData\0223\n\007sampl" +
"es\030\001 \003(\0132\".sentencepiece.SelfTestData.Sa" +
"mple\032)\n\006Sample\022\r\n\005input\030\001 \001(\t\022\020\n\010expecte" +
"d\030\002 \001(\t*\t\010\310\001\020\200\200\200\200\002\"\376\003\n\nModelProto\0227\n\006pie" +
"ces\030\001 \003(\0132\'.sentencepiece.ModelProto.Sen" +
"tencePiece\0220\n\014trainer_spec\030\002 \001(\0132\032.sente" +
"ncepiece.TrainerSpec\0226\n\017normalizer_spec\030" +
"\003 \001(\0132\035.sentencepiece.NormalizerSpec\0223\n\016" +
"self_test_data\030\004 \001(\0132\033.sentencepiece.Sel" +
"fTestData\0228\n\021denormalizer_spec\030\005 \001(\0132\035.s" +
"entencepiece.NormalizerSpec\032\322\001\n\rSentence" +
"Piece\022\r\n\005piece\030\001 \001(\t\022\r\n\005score\030\002 \001(\002\022B\n\004t" +
"ype\030\003 \001(\0162,.sentencepiece.ModelProto.Sen" +
"tencePiece.Type:\006NORMAL\"T\n\004Type\022\n\n\006NORMA" +
"L\020\001\022\013\n\007UNKNOWN\020\002\022\013\n\007CONTROL\020\003\022\020\n\014USER_DE" +
"FINED\020\004\022\010\n\004BYTE\020\006\022\n\n\006UNUSED\020\005*\t\010\310\001\020\200\200\200\200\002" +
"*\t\010\310\001\020\200\200\200\200\002B\002H\003"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
internal_static_sentencepiece_TrainerSpec_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_sentencepiece_TrainerSpec_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_TrainerSpec_descriptor,
new java.lang.String[] { "Input", "InputFormat", "ModelPrefix", "ModelType", "VocabSize", "AcceptLanguage", "SelfTestSampleSize", "CharacterCoverage", "InputSentenceSize", "ShuffleInputSentence", "MiningSentenceSize", "TrainingSentenceSize", "SeedSentencepieceSize", "ShrinkingFactor", "MaxSentenceLength", "NumThreads", "NumSubIterations", "MaxSentencepieceLength", "SplitByUnicodeScript", "SplitByNumber", "SplitByWhitespace", "TreatWhitespaceAsSuffix", "AllowWhitespaceOnlyPieces", "SplitDigits", "ControlSymbols", "UserDefinedSymbols", "RequiredChars", "ByteFallback", "VocabularyOutputPieceScore", "HardVocabLimit", "UseAllVocab", "UnkId", "BosId", "EosId", "PadId", "UnkPiece", "BosPiece", "EosPiece", "PadPiece", "UnkSurface", "TrainExtremelyLargeCorpus", });
internal_static_sentencepiece_NormalizerSpec_descriptor =
getDescriptor().getMessageTypes().get(1);
internal_static_sentencepiece_NormalizerSpec_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_NormalizerSpec_descriptor,
new java.lang.String[] { "Name", "PrecompiledCharsmap", "AddDummyPrefix", "RemoveExtraWhitespaces", "EscapeWhitespaces", "NormalizationRuleTsv", });
internal_static_sentencepiece_SelfTestData_descriptor =
getDescriptor().getMessageTypes().get(2);
internal_static_sentencepiece_SelfTestData_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_SelfTestData_descriptor,
new java.lang.String[] { "Samples", });
internal_static_sentencepiece_SelfTestData_Sample_descriptor =
internal_static_sentencepiece_SelfTestData_descriptor.getNestedTypes().get(0);
internal_static_sentencepiece_SelfTestData_Sample_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_SelfTestData_Sample_descriptor,
new java.lang.String[] { "Input", "Expected", });
internal_static_sentencepiece_ModelProto_descriptor =
getDescriptor().getMessageTypes().get(3);
internal_static_sentencepiece_ModelProto_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_ModelProto_descriptor,
new java.lang.String[] { "Pieces", "TrainerSpec", "NormalizerSpec", "SelfTestData", "DenormalizerSpec", });
internal_static_sentencepiece_ModelProto_SentencePiece_descriptor =
internal_static_sentencepiece_ModelProto_descriptor.getNestedTypes().get(0);
internal_static_sentencepiece_ModelProto_SentencePiece_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_sentencepiece_ModelProto_SentencePiece_descriptor,
new java.lang.String[] { "Piece", "Score", "Type", });
}
// @@protoc_insertion_point(outer_class_scope)
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy