All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.xmlcalabash.extensions.RDFa Maven / Gradle / Ivy

package com.xmlcalabash.extensions;

import com.xmlcalabash.core.XProcConstants;
import com.xmlcalabash.core.XProcException;
import com.xmlcalabash.core.XProcRuntime;
import com.xmlcalabash.io.ReadablePipe;
import com.xmlcalabash.io.WritablePipe;
import com.xmlcalabash.library.DefaultStep;
import com.xmlcalabash.runtime.XAtomicStep;
import com.xmlcalabash.util.S9apiUtils;
import com.xmlcalabash.util.TreeWriter;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmSequenceIterator;
import org.semarglproject.rdf.ParseException;
import org.semarglproject.rdf.rdfa.RdfaParser;
import org.semarglproject.sink.TripleSink;
import org.semarglproject.source.StreamProcessor;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Calendar;
import java.util.Random;
import java.util.Vector;

/**
 * Created by IntelliJ IDEA.
 * User: ndw
 * Date: Oct 8, 2008
 * Time: 7:44:07 AM
 * To change this template use File | Settings | File Templates.
 */

public class RDFa extends RDFStep {
    private long count = 0;

    public RDFa(XProcRuntime runtime, XAtomicStep step) {
        super(runtime,step);
    }

    public void run() throws SaxonApiException {
        super.run();

        XdmNode doc = source.read();

        try {
            Sink sink = new Sink();
            StreamProcessor sp = new StreamProcessor(RdfaParser.connect(sink));

            // HACK!!!
            // FIXME: set serializer properties appropriately!
            Serializer serializer = makeSerializer();
            StringWriter writer = new StringWriter();
            serializer.setOutputWriter(writer);
            S9apiUtils.serialize(runtime, doc, serializer);
            writer.close();

            ByteArrayInputStream bais = new ByteArrayInputStream(writer.toString().getBytes("UTF-8"));
            sp.process(bais, doc.getBaseURI().toASCIIString());
        } catch (IOException e) {
            throw new XProcException(e);
        } catch (ParseException e) {
            throw new XProcException(e);
        }
    }

    private class Sink implements TripleSink {
        TreeWriter tree = null;
        String baseURI = null;
        long randomValue = 0;
        long milliSecs = 0;

        public Sink() {
            Random random = new Random();
            randomValue = random.nextLong();
            Calendar cal = Calendar.getInstance();
            milliSecs = cal.getTimeInMillis();
        }

        @Override
        public void addNonLiteral(String subj, String pred, String obj) {
            tree.addStartElement(sem_triple);
            tree.startContent();
            tree.addStartElement(sem_subject);
            tree.startContent();
            tree.addText(patchURI(subj));
            tree.addEndElement();
            tree.addStartElement(sem_predicate);
            tree.startContent();
            tree.addText(patchURI(pred));
            tree.addEndElement();
            tree.addStartElement(sem_object);
            tree.startContent();
            tree.addText(patchURI(obj));
            tree.addEndElement();
            tree.addEndElement();
            nextFile();
        }

        @Override
        public void addPlainLiteral(String subj, String pred, String obj, String lang) {
            tree.addStartElement(sem_triple);
            tree.startContent();
            tree.addStartElement(sem_subject);
            tree.startContent();
            tree.addText(patchURI(subj));
            tree.addEndElement();
            tree.addStartElement(sem_predicate);
            tree.startContent();
            tree.addText(patchURI(pred));
            tree.addEndElement();
            tree.addStartElement(sem_object);

            if (lang == null || "".equals(lang)) {
                tree.addAttribute(_datatype, "http://www.w3.org/2001/XMLSchema#string");
            } else {
                tree.addAttribute(XProcConstants.xml_lang,lang);
            }

            tree.startContent();
            tree.addText(obj);
            tree.addEndElement();
            tree.addEndElement();
            nextFile();
        }

        @Override
        public void addTypedLiteral(String subj, String pred, String obj, String datatype) {
            if (datatype == null) {
                datatype = "http://www.w3.org/2001/XMLSchema#string";
            }
            tree.addStartElement(sem_triple);
            tree.startContent();
            tree.addStartElement(sem_subject);
            tree.startContent();
            tree.addText(patchURI(subj));
            tree.addEndElement();
            tree.addStartElement(sem_predicate);
            tree.startContent();
            tree.addText(patchURI(pred));
            tree.addEndElement();
            tree.addStartElement(sem_object);
            tree.addAttribute(_datatype, datatype);
            tree.startContent();
            tree.addText(obj);
            tree.addEndElement();
            tree.addEndElement();
            nextFile();
        }

        @Override
        public void setBaseUri(String s) {
            baseURI = s;
        }

        @Override
        public void startStream() throws ParseException {
            tree = new TreeWriter(runtime);
            tree.startDocument(step.getNode().getBaseURI());
            tree.addStartElement(sem_triples);
            tree.startContent();
        }

        @Override
        public void endStream() throws ParseException {
            tree.addEndElement();
            tree.endDocument();
            if (count > 0) {
                XdmNode out = tree.getResult();
                result.write(out);
            }
        }

        @Override
        public boolean setProperty(String key, Object value) {
            return false;
        }

        private void nextFile() {
            count += 1;
            if (count >= limit) {
                tree.addEndElement();
                tree.endDocument();

                XdmNode out = tree.getResult();
                result.write(out);

                tree = new TreeWriter(runtime);
                tree.startDocument(step.getNode().getBaseURI());
                tree.addStartElement(sem_triples);
                tree.startContent();

                count = 0;
            }
        }

        private String patchURI(String uri) {
            if (uri.startsWith("_:")) {
                return "http://marklogic.com/semantics/blank/"
                        + Long.toHexString(fuse(scramble(milliSecs),randomValue))
                        + "/" + uri;
            } else {
                return uri;
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy