All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openfuxml.transform.XhtmlTransformer Maven / Gradle / Ivy

There is a newer version: 1.3.4
Show newest version
package org.openfuxml.transform;

import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.text.MessageFormat;

import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Entities;
import org.openfuxml.addon.wiki.FormattingXMLStreamWriter;
import org.openfuxml.addon.wiki.WikiTemplates;
import org.openfuxml.addon.wiki.processor.ofx.OfxHtmlContentHandler;
import org.openfuxml.addon.wiki.processor.util.AbstractWikiProcessor;
import org.openfuxml.addon.wiki.util.IgnoreDtdEntityResolver;
import org.openfuxml.content.ofx.Section;
import org.openfuxml.trancoder.XhtmlSpecialChars;
import org.openfuxml.xml.OfxNsPrefixMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import net.sf.exlp.util.xml.JaxbUtil;

public class XhtmlTransformer extends AbstractWikiProcessor
{
	final static Logger logger = LoggerFactory.getLogger(XhtmlTransformer.class);
	
//	private OfxContentTrimmer ofxContentTrimmer;
//	import org.openfuxml.renderer.processor.post.OfxContentTrimmer;
	
	public XhtmlTransformer()
	{
		WikiTemplates.init();
//		ofxContentTrimmer = new OfxContentTrimmer();
	}
		
	public Section process(String htmlContent)
	{
//		logger.info(htmlContent);
		org.jsoup.nodes.Document sDoc = Jsoup.parse(htmlContent);
		sDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
	    String xhtmlContent = sDoc.body().html();
//	    logger.info(xhtmlContent);
	    
		try
		{
			String xml = xhtml2Ofx(xhtmlContent);
			if(logger.isTraceEnabled()){logger.info(xml);}
			
//			Document doc = JDomUtil.txtToDoc(xml);
//			JDomUtil.debug(doc);
			
			Section section = JaxbUtil.load(xml.getBytes("UTF-8"),Section.class);
			return section;
		}
		catch (IOException e) {logger.error("",e);}
		catch (ParserConfigurationException e) {logger.error("",e);}
		catch (XMLStreamException e) {logger.error("",e);}
		catch (SAXException e)
		{
			System.out.println(xhtmlContent);
			logger.error("",e);
		}
//		catch (JDOMException e) {logger.error("",e);}
		return null;
	}

	private String xhtml2Ofx(String xhtmlContent) throws IOException, ParserConfigurationException, XMLStreamException, SAXException
	{
		xhtmlContent = XhtmlSpecialChars.replace(xhtmlContent);
		Object[] objects = new Object[1];
		objects[0] = xhtmlContent;
		
		String html = MessageFormat.format(TransformerTemplates.html, objects);
		if(logger.isTraceEnabled()){logger.info(html);}
				
//		logger.info("Parsing: "+html.toString()+" characters");

		InputSource inputSource = new InputSource(new StringReader(html));

		SAXParserFactory factory = SAXParserFactory.newInstance();
		factory.setNamespaceAware(true);
		factory.setValidating(false);
		SAXParser saxParser = factory.newSAXParser();

		XMLReader xmlReader = saxParser.getXMLReader();
		xmlReader.setEntityResolver(IgnoreDtdEntityResolver.getInstance());

		StringWriter out = new StringWriter();
		XMLStreamWriter writer = createXMLStreamWriter(out);

		logger.trace("Using dummy String injectionDir");
		OfxHtmlContentHandler contentHandler = new OfxHtmlContentHandler(writer,".");

		xmlReader.setContentHandler(contentHandler);
		xmlReader.parse(inputSource);

		writer.close();

		String result = out.toString();
		result = addNS(result);
//		logger.debug(result);
		return result;
	}
	
	private String addNS(String xml)
	{
		int indexXml = xml.indexOf(">");
		int indexRoot = xml.substring(indexXml+1, xml.length()).indexOf(">");

		StringBuffer sb = new StringBuffer();
		sb.append(xml.substring(0,indexXml+indexRoot+1));
		sb.append(" xmlns:ofx=\"http://www.openfuxml.org\"");
		sb.append(" xmlns:").append(OfxNsPrefixMapper.prefix(OfxNsPrefixMapper.NS.text)).append("=\"").append(OfxNsPrefixMapper.namespace(OfxNsPrefixMapper.NS.text)).append("\"");
		sb.append(" xmlns:list=\"http://www.openfuxml.org/list\"");
		sb.append(" xmlns:table=\"http://www.openfuxml.org/table\"");
		sb.append(" xmlns:layout=\"http://www.openfuxml.org/layout\"");
		sb.append(" xmlns:wiki=\"http://www.openfuxml.org/wiki\"");
		sb.append(xml.substring(indexXml+indexRoot+1,xml.length()));
		
		return sb.toString();
	}

	protected XMLStreamWriter createXMLStreamWriter(Writer out)
	{
		XMLStreamWriter writer;
		try
		{
			writer = XMLOutputFactory.newInstance().createXMLStreamWriter(out);
		}
		catch (XMLStreamException e1) {throw new IllegalStateException(e1);}
		catch (FactoryConfigurationError e1) {throw new IllegalStateException(e1);}
		return new FormattingXMLStreamWriter(writer);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy