![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.tokenizer.PTBLineLexer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
*
*/
package edu.berkeley.nlp.tokenizer;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import edu.berkeley.nlp.util.IOUtils;
import edu.berkeley.nlp.util.Iterators;
import edu.berkeley.nlp.util.StringUtils;
/**
* Similar to PTBLexer. However, instead of reading from a Reader this class is given a line
* and returns a list of tokenized Strings.
* @author petrov
*
*/
public class PTBLineLexer extends PTBLexer implements LineTokenizer {
public PTBLineLexer(){
super((java.io.Reader)null);
}
public List tokenize(String line) {
PTBTokenizer toker = new PTBTokenizer(new StringReader(line),true);
List> elems = toker.tokenize();
List toks = new ArrayList();
for (Object o:elems) {
toks.add(o.toString());
}
return toks;
}
public List tokenizeLine(String line) throws IOException{
LinkedList tokenized = new LinkedList();
int nEl = line.length();
char[] array = line.toCharArray();
yy_buffer = line.toCharArray();//new char[nEl+1];
//for(int i=0;i tokenizeLine = tokenizer.tokenizeLine(line);
if (tokenizeLine.get(tokenizeLine.size() - 1) == null) tokenizeLine.remove(tokenizeLine.size() - 1);
System.out.println(StringUtils.join(tokenizeLine));
}
}
catch (IOException e)
{
// TODO Auto-generated catch block
throw new RuntimeException(e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy