All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.client.example.handle.HTMLCleanerHandle Maven / Gradle / Ivy

/*
 * Copyright 2012-2015 MarkLogic Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.marklogic.client.example.handle;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;

import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactXmlSerializer;
import org.htmlcleaner.DefaultTagProvider;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.ITagInfoProvider;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XmlSerializer;

import com.marklogic.client.io.Format;
import com.marklogic.client.io.BaseHandle;
import com.marklogic.client.io.OutputStreamSender;
import com.marklogic.client.io.marker.OperationNotSupported;
import com.marklogic.client.io.marker.XMLWriteHandle;

/**
 * An HTMLCleaner Handle writes an HTML document to the database
 * as an XHTML document.
 */
public class HTMLCleanerHandle
	extends BaseHandle
	implements OutputStreamSender, XMLWriteHandle
{
	private CleanerProperties configuration;
	private ITagInfoProvider  rulesProvider;
	private HtmlCleaner       parser;
	private XmlSerializer     serializer;
	private TagNode           content;

	public HTMLCleanerHandle() {
		super();
		super.setFormat(Format.XML);
   		setResendable(true);
	}
	public HTMLCleanerHandle(TagNode content) {
		this();
    	set(content);
	}

	public CleanerProperties getConfiguration() {
		if (configuration == null)
			configuration = makeConfiguration();
		return configuration;
	}
	public void setConfiguration(CleanerProperties configuration) {
		this.configuration = configuration;
	}
	protected CleanerProperties makeConfiguration() {
		return new CleanerProperties();
	}

	public ITagInfoProvider getRulesProvider() {
		if (rulesProvider == null)
			rulesProvider = makeRulesProvider();
		return rulesProvider;
	}
	public void setRulesProvider(ITagInfoProvider rulesProvider) {
		this.rulesProvider = rulesProvider;
	}
	protected ITagInfoProvider makeRulesProvider() {
		return new DefaultTagProvider();
	}

	public HtmlCleaner getParser() {
		if (parser == null)
			parser = makeParser();
		return parser;
	}
	public void setParser(HtmlCleaner parser) {
		this.parser = parser;
	}
	protected HtmlCleaner makeParser() {
		return new HtmlCleaner(getRulesProvider(), getConfiguration());
	}

	public XmlSerializer getSerializer() {
		if (serializer == null)
			serializer = makeSerializer();
		return serializer;
	}
	public void setSerializer(XmlSerializer serializer) {
		this.serializer = serializer;
	}
	protected XmlSerializer makeSerializer() {
		CleanerProperties configuration = getConfiguration();
		return new CompactXmlSerializer(
				(configuration != null) ?
						configuration : getParser().getProperties()
				);
	}

	public TagNode get() {
		return content;
	}
    public void set(TagNode content) {
    	this.content = content;
    }
    public HTMLCleanerHandle with(TagNode content) {
    	set(content);
    	return this;
    }

    public void set(File content, String charset) throws IOException {
    	set(getParser().clean(content, charset));
    }
    public HTMLCleanerHandle with(File content, String charset)
    throws IOException {
    	set(content, charset);
    	return this;
    }

    public void set(InputStream content, String charset) throws IOException {
    	set(getParser().clean(content, charset));
    }
    public HTMLCleanerHandle with(InputStream content, String charset)
    throws IOException {
    	set(content, charset);
    	return this;
    }

    public void set(Reader content) throws IOException {
    	set(getParser().clean(content));
    }
    public HTMLCleanerHandle with(Reader content) throws IOException {
    	set(content);
    	return this;
    }

    public void set(String content) throws IOException {
    	set(getParser().clean(content));
    }
    public HTMLCleanerHandle with(String content) throws IOException {
    	set(content);
    	return this;
    }

	public void setFormat(Format format) {
		if (format != Format.XML)
			throw new IllegalArgumentException(
					"HTMLCleanerHandle supports the XML format only");
	}

	@Override
	protected OutputStreamSender sendContent() {
		if (content == null) {
			throw new IllegalStateException("No content to write");
		}

		return this;
	}
	@Override
	public void write(OutputStream out) throws IOException {
		getSerializer().writeToStream(content, out, "UTF-8");
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy