
ai.idylnlp.dl4j.IdylNLPTokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of idylnlp-dl4j Show documentation
Show all versions of idylnlp-dl4j Show documentation
Idyl NLP for DeepLearning4j
The newest version!
/*******************************************************************************
* Copyright 2018 Mountain Fog, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy
* of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package ai.idylnlp.dl4j;
import java.util.Arrays;
import java.util.List;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;
/**
* Implements {@link Tokenizer} to wrap Idyl NLP's tokenizer
* for use with DeepLearning4j.
*
* @author Mountain Fog, Inc.
*
*/
public class IdylNLPTokenizer implements Tokenizer {
private String[] tokens;
private int index = 0;
private TokenPreProcess preProcessor;
/**
* Creates a new tokenizer.
* @param tokenizer An Idyl NLP {@link ai.idylnlp.model.nlp.Tokenizer}.
* @param toTokenize The string to tokenize.
*/
public IdylNLPTokenizer(ai.idylnlp.model.nlp.Tokenizer tokenizer, String toTokenize) {
tokens = tokenizer.tokenize(toTokenize);
}
/**
* Creates a new tokenizer.
* @param tokenizer An Idyl NLP {@link ai.idylnlp.model.nlp.Tokenizer}.
* @param preProcessor A token {@link TokenPreProcess preprocessor}.
* @param toTokenize The string to tokenize.
*/
public IdylNLPTokenizer(ai.idylnlp.model.nlp.Tokenizer tokenizer, TokenPreProcess preProcessor, String toTokenize) {
this.preProcessor = preProcessor;
tokens = tokenizer.tokenize(toTokenize);
}
@Override
public boolean hasMoreTokens() {
return (index < tokens.length - 1);
}
@Override
public int countTokens() {
return tokens.length;
}
@Override
public String nextToken() {
if(index > tokens.length) {
throw new IndexOutOfBoundsException("No more tokens.");
}
if(preProcessor != null) {
return preProcessor.preProcess(tokens[index++]);
} else {
return tokens[index++];
}
}
@Override
public List getTokens() {
return Arrays.asList(tokens);
}
@Override
public void setTokenPreProcessor(TokenPreProcess tokenPreProcessor) {
this.preProcessor = tokenPreProcessor;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy