Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreemnets. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.dictionary.serializer;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.StringList;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* This class is used by for reading and writing dictionaries of all kinds.
*/
public class DictionarySerializer {
// TODO: should check for invalid format, make it save
private static class DictionaryContenthandler implements ContentHandler {
private EntryInserter mInserter;
// private boolean mIsInsideDictionaryElement;
// private boolean mIsInsideEntryElement;
private boolean mIsInsideTokenElement;
private List mTokenList = new LinkedList();
private StringBuilder token = new StringBuilder();
private Attributes mAttributes;
private DictionaryContenthandler(EntryInserter inserter) {
mInserter = inserter;
}
/**
* Not implemented.
*/
public void processingInstruction(String target, String data)
throws SAXException {
}
/**
* Not implemented.
*/
public void startDocument() throws SAXException {
}
public void startElement(String uri, String localName, String qName,
org.xml.sax.Attributes atts) throws SAXException {
if (ENTRY_ELEMENT.equals(localName)) {
mAttributes = new Attributes();
for (int i = 0; i < atts.getLength(); i++) {
mAttributes.setValue(atts.getLocalName(i), atts.getValue(i));
}
}
else if (TOKEN_ELEMENT.equals(localName)) {
mIsInsideTokenElement = true;
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
if (mIsInsideTokenElement) {
token.append(ch, start, length);
}
}
/**
* Creates the Profile object after processing is complete
* and switches mIsInsideNgramElement flag.
*/
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (TOKEN_ELEMENT.equals(localName)) {
mTokenList.add(token.toString().trim());
token.setLength(0);
}
else if (ENTRY_ELEMENT.equals(localName)) {
String[] tokens = mTokenList.toArray(
new String[mTokenList.size()]);
Entry entry = new Entry(new StringList(tokens), mAttributes);
try {
mInserter.insert(entry);
} catch (InvalidFormatException e) {
throw new SAXException("Invalid dictionary format!", e);
}
mTokenList.clear();
mAttributes = null;
}
else if (TOKEN_ELEMENT.equals(localName)) {
mIsInsideTokenElement = false;
}
}
/**
* Not implemented.
*/
public void endDocument() throws SAXException {
}
/**
* Not implemented.
*/
public void endPrefixMapping(String prefix) throws SAXException {
}
/**
* Not implemented.
*/
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
}
/**
* Not implemented.
*/
public void setDocumentLocator(Locator locator) {
}
/**
* Not implemented.
*/
public void skippedEntity(String name) throws SAXException {
}
/**
* Not implemented.
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
}
}
private static final String CHARSET = "UTF-8";
private static final String DICTIONARY_ELEMENT = "dictionary";
private static final String ENTRY_ELEMENT = "entry";
private static final String TOKEN_ELEMENT = "token";
/**
* Creates {@link Entry}s form the given {@link InputStream} and
* forwards these {@link Entry}s to the {@link EntryInserter}.
*
* After creation is finished the provided {@link InputStream} is closed.
*
* @param in
* @param inserter
*
* @throws IOException
* @throws InvalidFormatException
*/
public static void create(InputStream in, EntryInserter inserter)
throws IOException, InvalidFormatException {
DictionaryContenthandler profileContentHandler =
new DictionaryContenthandler(inserter);
XMLReader xmlReader;
try {
xmlReader = XMLReaderFactory.createXMLReader();
xmlReader.setContentHandler(profileContentHandler);
xmlReader.parse(new InputSource(in));
}
catch (SAXException e) {
throw new InvalidFormatException("The profile data stream has " +
"an invalid format!", e);
}
}
/**
* Serializes the given entries to the given {@link OutputStream}.
*
* After the serialization is finished the provided
* {@link OutputStream} remains open.
*
* @param out
* @param entries
*
* @throws IOException If an I/O error occurs
*/
public static void serialize(OutputStream out, Iterator entries)
throws IOException {
StreamResult streamResult = new StreamResult(out);
SAXTransformerFactory tf = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler hd;
try {
hd = tf.newTransformerHandler();
} catch (TransformerConfigurationException e1) {
throw new AssertionError("The Tranformer configuration must be valid!");
}
Transformer serializer = hd.getTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, CHARSET);
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
hd.setResult(streamResult);
try {
hd.startDocument();
hd.startElement("", "", DICTIONARY_ELEMENT, new AttributesImpl());
while (entries.hasNext()) {
Entry entry = entries.next();
serializeEntry(hd, entry);
}
hd.endElement("", "", DICTIONARY_ELEMENT);
hd.endDocument();
}
catch (SAXException e) {
throw new IOException("There was an error during serialization!");
}
}
private static void serializeEntry(TransformerHandler hd, Entry entry)
throws SAXException{
AttributesImpl entryAttributes = new AttributesImpl();
for (Iterator it = entry.getAttributes().iterator(); it.hasNext();) {
String key = it.next();
entryAttributes.addAttribute("", "", key,
"", entry.getAttributes().getValue(key));
}
hd.startElement("", "", ENTRY_ELEMENT, entryAttributes);
StringList tokens = entry.getTokens();
for (Iterator it = tokens.iterator(); it.hasNext(); ) {
hd.startElement("", "", TOKEN_ELEMENT, new AttributesImpl());
String token = it.next();
hd.characters(token.toCharArray(),
0, token.length());
hd.endElement("", "", TOKEN_ELEMENT);
}
hd.endElement("", "", ENTRY_ELEMENT);
}
}