com.openhtmltopdf.bidi.ParagraphSplitter Maven / Gradle / Ivy

Go to download
package com.openhtmltopdf.bidi;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

import com.openhtmltopdf.css.style.CalculatedStyle;
import com.openhtmltopdf.layout.LayoutContext;

/**
 * This class aims to split text into paragraphs where they can be passed to the
 * BidiSplitter. Each text node in the document is attached to the closest element with display: block
 * which we assume paragraphs do not cross.
 */
public class ParagraphSplitter {
	
	public static class Paragraph {
        private final StringBuilder builder = new StringBuilder();
        private final List textNodes = new ArrayList();
        
        private final TreeMap splitPoints = new TreeMap();
        
        private Paragraph() { }
        
        private void add(String text, Text textNode) {
            builder.append(text);
            textNodes.add(textNode);
        }
        
        private void runBidiSplitter(BidiSplitter splitter, LayoutContext c) {
        	splitter.setParagraph(builder.toString(), c.getDefaultTextDirection());
        	copySplitPointsFromBidiSplitter(splitter);
        }
         
        /**
         * @param text
         * @return the first index of text from a Text node.
         */
        public int getFirstCharIndexInParagraph(Text text) {
        	
        	int position = 0;
        	
            for (Text t : textNodes) {
            	if (text == t) {
            		return position;
            	}
            
            	position += t.getLength();
            }
        	
            assert(false);
        	return -1;
        }
        
        private void copySplitPointsFromBidiSplitter(BidiSplitter splitter) {
        	int length = splitter.countTextRuns();
        	
        	for (int i = 0; i < length; i++) {
        		BidiTextRun run = splitter.getVisualRun(i);
        		splitPoints.put(run.getStart(), run);
        	}
        }
        
        /**
         * @param startIndexInParagraph
         * @return the BidiTextRun that starts at or above startIndexInPararagraph.
         */
        public BidiTextRun nextSplit(int startIndexInParagraph) {
            Map.Entry entry = splitPoints.ceilingEntry(startIndexInParagraph);
        	
        	if (entry != null)
        		return entry.getValue();
        	else
        		return null;
        }

        /**
         * @param startIndexInParagraph
         * @return the BidiTextRun that starts at or before startIndexInParagraph.
         */
        public BidiTextRun prevSplit(int startIndexInParagraph) {
            Map.Entry entry = splitPoints.floorEntry(startIndexInParagraph);
        	
        	if (entry != null)
        		return entry.getValue();
        	else
        		return null;
        }
	}
	
    private final Map paragraphs = new HashMap();
    
    public Paragraph lookupParagraph(Text node) {
    	return paragraphs.get(node);
    }
    
    public void splitRoot(LayoutContext c, Document doc) {
        Paragraph parent = new Paragraph();
        splitParagraphs(c, doc, parent);
	}

    /**
     * Run bidi splitting on the document's paragraphs.
     * @param c
     */
    public void runBidiOnParagraphs(LayoutContext c) {
    	for (Paragraph p : paragraphs.values())
    	{
    		p.runBidiSplitter(c.getBidiSplitterFactory().createBidiSplitter(), c);
    	}
    }
    
    private void splitParagraphs(LayoutContext c, Node parent, Paragraph nearestBlock) {
        Node node = parent.getFirstChild();
        
        if (node == null) {
            return;
        }
        
    	do {
            if (node.getNodeType() == Node.TEXT_NODE) {
                String text = ((Text) node).getData();
            	nearestBlock.add(text, (Text) node);
            	paragraphs.put((Text) node, nearestBlock);
            }
            else if (node.getNodeType() == Node.ELEMENT_NODE) {
            	Element element = (Element) node;
                CalculatedStyle style = c.getSharedContext().getStyle(element);
            	
                if (style.isSpecifiedAsBlock()) {
                    splitParagraphs(c, element, new Paragraph());
                }
                else {
                    splitParagraphs(c, element, nearestBlock);
                }
            }
        } while ((node = node.getNextSibling()) != null);
    }
}