All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kitesdk.morphline.saxon.XQueryBuilder Maven / Gradle / Ivy

/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.kitesdk.morphline.saxon;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import javax.xml.stream.XMLStreamException;

import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmNodeKind;
import net.sf.saxon.s9api.XdmSequenceIterator;
import net.sf.saxon.s9api.XdmValue;
import net.sf.saxon.trace.XQueryTraceListener;
import net.sf.saxon.value.UntypedAtomicValue;

import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Configs;

import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

/**
 * Command that parses an InputStream that contains an XML document and runs the given XQuery over
 * the XML document. For each item in the query result sequence, the command emits a morphline record
 * containing the item's name-value pairs.
 */
public final class XQueryBuilder implements CommandBuilder {

  @Override
  public Collection getNames() {
    return Collections.singletonList("xquery");
  }

  @Override
  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    try {
      return new XQuery(this, config, parent, child, context);
    } catch (SaxonApiException e) {
      throw new MorphlineCompilationException("Cannot compile", config, e);
    } catch (IOException e) {
      throw new MorphlineCompilationException("Cannot compile", config, e);
    } catch (XMLStreamException e) {
      throw new MorphlineCompilationException("Cannot compile", config, e);
    }
  }
  
  
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ///////////////////////////////////////////////////////////////////////////////
  private static final class XQuery extends SaxonCommand {
    
    /*
     * TODO: Add option to support streaming via fragmentPath.
     * 
     * TODO: Add option to support serializing each item in the result sequence according to the XML
     * Output Method of the 
     * W3C XQuery/XSLT2 Serialization Spec, with sequence normalization as defined therein.
     */
    
    private final List fragments = new ArrayList();
  
    public XQuery(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SaxonApiException, IOException, XMLStreamException {
      super(builder, config, parent, child, context);
      
      List fragmentConfigs = getConfigs().getConfigList(config, "fragments");
      if (fragmentConfigs.size() == 0) {
        throw new MorphlineCompilationException("At least one fragment must be defined", config);
      }
      if (fragmentConfigs.size() > 1) {
        throw new MorphlineCompilationException("More than one fragment is not yet supported", config);
      }
      for (Config fragment : fragmentConfigs) {
        String fragmentPath = getConfigs().getString(fragment, "fragmentPath");
        if (!fragmentPath.equals("/")) {
          throw new MorphlineCompilationException("Non-root fragment paths are not yet supported", config);
        }  
        
        XQueryCompiler compiler = processor.newXQueryCompiler();
        compiler.setErrorListener(new DefaultErrorListener());
        compiler.setCompileWithTracing(isTracing);
        compiler.setLanguageVersion(getConfigs().getString(config, "languageVersion", "1.0"));
        
        XQueryExecutable executable = null;
        String query = getConfigs().getString(fragment, "queryString", null);
        if (query != null) {
          executable = compiler.compile(query);     
        }
        String queryFile = getConfigs().getString(fragment, "queryFile", null);
        if (queryFile != null) {
          executable = compiler.compile(new File(queryFile));
        }
        if (query == null && queryFile == null) {
          throw new MorphlineCompilationException("Either query or queryFile must be defined", config);
        }
        if (query != null && queryFile != null) {
          throw new MorphlineCompilationException("Must not define both query and queryFile at the same time", config);
        }
        
        XQueryEvaluator evaluator = executable.load();
        Config variables = getConfigs().getConfig(fragment, "externalVariables", ConfigFactory.empty());
        for (Map.Entry entry : new Configs().getEntrySet(variables)) {
          XdmValue xdmValue = XdmNode.wrap(new UntypedAtomicValue(entry.getValue().toString()));
          evaluator.setExternalVariable(new QName(entry.getKey()), xdmValue);
        }
        Config fileVariables = getConfigs().getConfig(fragment, "externalFileVariables", ConfigFactory.empty());
        for (Map.Entry entry : new Configs().getEntrySet(fileVariables)) {
          File file = new File(entry.getValue().toString());
          XdmValue doc = parseXmlDocument(file);
          evaluator.setExternalVariable(new QName(entry.getKey()), doc);
        }
        if (isTracing) {
          evaluator.setTraceListener(new XQueryTraceListener()); // TODO redirect from stderr to SLF4J
        }

        fragments.add(new Fragment(fragmentPath, evaluator));
      }  
      validateArguments();
    }
  
    @Override
    protected boolean doProcess2(Record inputRecord, InputStream stream) throws SaxonApiException, XMLStreamException {
      incrementNumRecords();      
      for (Fragment fragment : fragments) {
        Record template = inputRecord.copy();
        removeAttachments(template);
        XdmNode document = parseXmlDocument(stream);
        LOG.trace("XQuery input document: {}", document);
        XQueryEvaluator evaluator = fragment.xQueryEvaluator;
        evaluator.setContextItem(document);
        
        int i = 0;
        for (XdmItem item : evaluator) {
          i++;
          if (LOG.isTraceEnabled()) {
            LOG.trace("XQuery result sequence item #{} is of class: {} with value: {}", new Object[] { i,
                item.getUnderlyingValue().getClass().getName(), item });
          }
          if (item.isAtomicValue()) {
            LOG.debug("Ignoring atomic value in result sequence: {}", item);
            continue;
          }
          XdmNode node = (XdmNode) item;
          Record outputRecord = template.copy();
          boolean isNonEmpty = addRecordValues(node, Axis.SELF, XdmNodeKind.ATTRIBUTE, outputRecord);
          isNonEmpty = addRecordValues(node, Axis.ATTRIBUTE, XdmNodeKind.ATTRIBUTE, outputRecord) || isNonEmpty;
          isNonEmpty = addRecordValues(node, Axis.CHILD, XdmNodeKind.ELEMENT, outputRecord) || isNonEmpty;
          if (isNonEmpty) { // pass record to next command in chain   
            if (!getChild().process(outputRecord)) { 
              return false;
            }
          }
        }
      }      
      return true;
    }

    // extract fields from query result sequence
    private boolean addRecordValues(XdmNode node, Axis axis, XdmNodeKind nodeTest, Record record) {
      boolean isEmpty = true;
      XdmSequenceIterator iter = node.axisIterator(axis); 
      while (iter.hasNext()) {
        XdmNode child = (XdmNode) iter.next();
        if (child.getNodeKind() == nodeTest) { 
          String strValue = child.getStringValue();
          if (strValue.length() > 0) {
            record.put(child.getNodeName().getLocalName(), strValue);
            isEmpty = false;
          }
        }
      }
      return !isEmpty;
    }
    

    ///////////////////////////////////////////////////////////////////////////////
    // Nested classes:
    ///////////////////////////////////////////////////////////////////////////////    
    private static final class Fragment {
      
      private final String fragmentPath;     
      private final XQueryEvaluator xQueryEvaluator;     
      
      public Fragment(String fragmentPath, XQueryEvaluator xQueryEvaluator) {
        this.fragmentPath = fragmentPath;
        this.xQueryEvaluator = xQueryEvaluator;
      }
    }
    
  }  
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy