All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.shef.dcs.sti.parser.table.BrowsableHelper Maven / Gradle / Ivy

The newest version!
package uk.ac.shef.dcs.sti.parser.table;

import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.commons.io.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import uk.ac.shef.dcs.sti.STIException;

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

import static org.joox.JOOX.$;

/**
 * Created by - on 26/07/2016.
 */
public class BrowsableHelper {
    static List createBrowsableElements(List tables, Document doc){
        int count=1;
        List xpaths = new ArrayList<>();
        for(Node tableNode: tables){
            String xpath = $(tableNode).xpath();
            Node prevTableParent = tableNode.getParentNode();
            Node prevTableNextSib=tableNode.getNextSibling();
            Node newTableParent=doc.createElement("div");
            if(prevTableParent!=null) {
                Element checkbox = doc.createElement("input");
                checkbox.setAttribute("type","checkbox");
                checkbox.setAttribute("name","table"+count);
                checkbox.setAttribute("class","targetTables");
                checkbox.setAttribute("checked","true");
                prevTableParent.insertBefore(checkbox, tableNode);

                Element span =doc.createElement("span");
                span.setAttribute("style","background-color:red");
                span.setTextContent("check this box to annotate table#"+count);
                prevTableParent.insertBefore(span, tableNode);

                prevTableParent.removeChild(tableNode);
                newTableParent.appendChild(tableNode);
                if(prevTableNextSib==null)
                    prevTableParent.appendChild(newTableParent);
                else
                    prevTableParent.insertBefore(newTableParent, prevTableNextSib);
            }

            int lastSlash=xpath.lastIndexOf("/TABLE");
            if(lastSlash!=-1){
                String lastSuffix=xpath.substring(lastSlash);
                xpath=xpath.substring(0, lastSlash);
                xpath+="/DIV["+count+"]";//+lastSuffix;
            }

            xpaths.add(xpath);
            count++;
        }
        return xpaths;
    }

    static void output(String inFile, String outputFolder, Document doc) throws STIException {
        File in = new File(inFile);
        File outFile = new File(outputFolder+ File.separator+in.getName());
        if(in.toString().equals(outFile.toString())){
            //rename input file
            in.renameTo(new File(in.toString()+".original"));
        }
        try {
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            StreamResult result = new StreamResult(new StringWriter());
            DOMSource source = new DOMSource(doc);
            transformer.transform(source, result);
            String value=result.getWriter().toString();
            PrintWriter p = new PrintWriter(outFile);
            p.println(value);
            p.close();
        }catch (Exception ioe){
            throw new STIException(ioe);
        }
    }

    static Document createDocument(TagSoupParser parser, String inFile, String sourceId) throws STIException{
        String input;
        try {
            input = FileUtils.readFileToString(new File(inFile));
        } catch (IOException e) {
            throw new STIException(e);
        }
        parser = new TagSoupParser(new ByteArrayInputStream(input.getBytes()), sourceId,"UTF-8");
        Document doc = null;
        try {
            doc = parser.getDOM();
        } catch (IOException e) {
        }

        return doc;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy