All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.tudarmstadt.ukp.wikipedia.mwdumper.importer.XmlWriter Maven / Gradle / Ivy

package de.tudarmstadt.ukp.wikipedia.mwdumper.importer;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;

/**
 * Quickie little class for sending properly encoded, prettily
 * indented XML output to a stream. There is no namespace support,
 * so prefixes and xmlns attributes must be managed manually.
 */
public class XmlWriter {
	OutputStream stream;
	String encoding;
	ArrayList stack;
	BufferedWriter writer;
	
	public XmlWriter(OutputStream stream) throws IOException {
		this.stream = stream;
		encoding = "utf-8";
		stack = new ArrayList();
		writer = new BufferedWriter(new OutputStreamWriter(stream, "UTF8"));
	}
	
	/**
	 * @throws IOException 
	 */
	public void close() throws IOException {
		writer.flush();
		writer.close();
	}
	
	
	/**
	 * Write the  header.
	 * @throws IOException 
	 */
	public void openXml() throws IOException {
		writeRaw("\n");
	}
	
	/**
	 * In theory we might close out open elements or such.
	 */
	public void closeXml() {
	}
	
	
	/**
	 * Write an empty element, such as , on a standalone line.
	 * Takes an optional dictionary of attributes.
	 * @throws IOException 
	 */
	public void emptyElement(String element) throws IOException {
		emptyElement(element, null);
	}
	
	public void emptyElement(String element, String[][] attributes) throws IOException {
		startElement(element, attributes, "/>\n");
		deindent();
	}
	
	/**
	 * Write an element open tag, such as , on a standalone line.
	 * Takes an optional dictionary of attributes.
	 * @throws IOException 
	 */
	public void openElement(String element) throws IOException {
		openElement(element, null);
	}
	
	public void openElement(String element, String[][] attributes) throws IOException {
		startElement(element, attributes, ">\n");
	}
	
	/**
	 * Write an element close tag, such as , on a standalone line.
	 * If indent=False is passed, indentation will not be added.
	 * @throws IOException 
	 */
	public void closeElement() throws IOException {
		closeElement(true);
	}
	
	public void closeElement(boolean indent) throws IOException {
		String[] bits = deindent();
		String element = bits[0];
		String space = bits[1];
		if (indent)
			writeRaw(space + "\n");
		else
			writeRaw("\n");
	}
	
	/**
	 * Write an element with a text node included, such as foo,
	 * on a standalone line. If the text is empty, an empty element will
	 * be output as . Takes an optional list of tuples with attribute
	 * names and values.
	 * @throws IOException 
	 */
	public void textElement(String element, String text) throws IOException {
		textElement(element, text, null);
	}
	
	public void textElement(String element, String text, String[][] attributes) throws IOException {
		if (text==null || text.length() == 0) {
			emptyElement(element, attributes);
		} else {
			startElement(element, attributes, ">");
			writeEscaped(text);
			closeElement(false);
		}
	}
	
	void startElement(String element, String[][] attributes, String terminator) throws IOException {
		writeRaw(indent(element));
		writeRaw('<');
		writeRaw(element);
		if (attributes != null) {
			for (int i = 0; i < attributes.length; i++) {
				writeRaw(' ');
				writeRaw(attributes[i][0]);
				writeRaw("=\"");
				writeEscaped(attributes[i][1]);
				writeRaw('"');
			}
		}
		writeRaw(terminator);
	}
	
	/**Send an encoded Unicode string to the output stream.
	 * @throws IOException */
	void writeRaw(String data) throws IOException {
		writer.write(data);
	}
	
	void writeRaw(char c) throws IOException {
		writer.write(c);
	}
	
	void writeEscaped(String data) throws IOException {
		int end = data.length();
		for (int i = 0; i < end; i++) {
			char c = data.charAt(i);
			switch (c) {
			case '&':
				writer.write("&");
				break;
			case '<':
				writer.write("<");
				break;
			case '>':
				writer.write(">");
				break;
			case '"':
				writer.write(""");
				break;
			default:
				writer.write(c);
			}
		}
	}
	
	private String indent(String element) {
		int level = stack.size();
		stack.add(element);
		return spaces(level);
	}
	
	private String[] deindent() {
		String element = (String)stack.remove(stack.size() - 1);
		String space = spaces(stack.size());
		return new String[] {element, space};
	}
	
	private String spaces(int level) {
		StringBuffer buffer = new StringBuffer();
		for (int i = 0; i < level * 2; i++)
			buffer.append(' ');
		return buffer.toString();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy