All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sequoiadb.hive.SdbSplit Maven / Gradle / Ivy

The newest version!
package com.sequoiadb.hive;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BasicBSONList;

import com.sequoiadb.base.DBCollection;
import com.sequoiadb.base.DBCursor;
import com.sequoiadb.base.Node;
import com.sequoiadb.base.ReplicaGroup;
import com.sequoiadb.base.Sequoiadb;
import com.sequoiadb.exception.BaseException;

public class SdbSplit extends FileSplit implements InputSplit {
    private String collectionSpaceName;
    private String collectionName;
    public static final Log LOG = LogFactory.getLog(SdbSplit.class.getName());

    private static final String[] EMPTY_ARRAY = new String[] {};
    private SdbConnAddr sdbAddr;

    public SdbSplit() {
        super((Path) null, 0, 0, EMPTY_ARRAY);

    }

    public SdbSplit(String host, int port, String collectionSpaceName,
                    String collectionName, Path dummyPath) {
        super(dummyPath, 0, 0, EMPTY_ARRAY);
        this.sdbAddr = new SdbConnAddr(host, port);
        this.collectionSpaceName = collectionSpaceName;
        this.collectionName = collectionName;
    }

    @Override
    public void readFields(final DataInput input) throws IOException {
        super.readFields(input);
        this.sdbAddr = new SdbConnAddr();
        sdbAddr.setHost(input.readUTF());
        sdbAddr.setPort(input.readInt());
        this.collectionSpaceName=input.readUTF();
        this.collectionName=input.readUTF();

    }

    @Override
    public void write(final DataOutput output) throws IOException {
        super.write(output);
        output.writeUTF(this.sdbAddr.getHost());
        output.writeInt(this.sdbAddr.getPort());
        output.writeUTF(this.collectionSpaceName);
        output.writeUTF(this.collectionName);
    }

    public SdbConnAddr getSdbAddr() {
        return this.sdbAddr;
    }

    @Override
    public long getLength() {
        return super.getLength();
    }

    /* Data is remote for all nodes. */
    @Override
    public String[] getLocations() throws IOException {
        return new String[] { sdbAddr.getHost() };
    }

    @Override
    public String toString() {
        return "SdbSplit [collectionSpaceName=" + collectionSpaceName
                + ", collectionName=" + collectionName + ", sdbAddr=" + sdbAddr
                + "]";
    }

    public static InputSplit[] getSplits(JobConf conf, int numSplits) {
        LOG.debug("Entry getSplits function");

        SdbConnAddr[] sdbAddrList = ConfigurationUtil
                .getAddrList(ConfigurationUtil.getDBAddr(conf));

        if (sdbAddrList == null || sdbAddrList.length == 0) {
            throw new IllegalArgumentException("The argument "
                    + ConfigurationUtil.DB_ADDR + " must be set.");
        }

        final Path[] tablePaths = FileInputFormat.getInputPaths(conf);

        // ///////////////////////////////
        Sequoiadb sdb = null;
        BaseException lastException = null;

        SdbConnAddr curCoordAddr = null;
        for (SdbConnAddr addr : sdbAddrList) {
            try {
                sdb = new Sequoiadb(addr.getHost(), addr.getPort(), null, null);
                // Save current coord address.
                curCoordAddr = addr;
                break;
            } catch (BaseException e) {
                LOG.info("Connect coords error:" + addr);

                lastException = e;
                continue;
            }
        }
        if (sdb == null) {
            LOG.info("Connect coords error:" + lastException);
            throw lastException;
        }

        String spaceName = null;
        String clName = null;
        if (ConfigurationUtil.getCsName(conf) == null
                && ConfigurationUtil.getClName(conf) == null) {
            spaceName = ConfigurationUtil.getSpaceName(conf);
            clName = ConfigurationUtil.getCollectionName(conf);
        } else {
            spaceName = ConfigurationUtil.getCsName(conf);
            clName = ConfigurationUtil.getClName(conf);
        }
        StringBuilder snapCondBuilder = new StringBuilder();
        String snapCond = snapCondBuilder.append("{Name:\"").append(spaceName)
                .append('.').append(clName).append("\"}").toString();
        List splits = new LinkedList();
        if(sdb.getCollectionSpace(spaceName)==null) return (InputSplit[])(splits.toArray());
        DBCollection collection = sdb.getCollectionSpace(spaceName)
                .getCollection(clName);
        if(collection==null)   return  (InputSplit[])(splits.toArray());
        DBCursor explainCurl = collection.explain(null, null, null, null, 0, 0,
                0, new BasicBSONObject("Run", false));
        Set subCls = new HashSet();
        while (explainCurl.hasNext()) {
            BSONObject explainBson = explainCurl.getNext();
            String nodeName = (String) explainBson.get("NodeName");
            String hostName = nodeName.split(":")[0];
            int port = Integer.parseInt(nodeName.split(":")[1]);
            try {
                List list = (List) explainBson
                        .get("SubCollections");
                for (int i = 0; i < list.size(); i++) {
                    String totalName = (String) list.get(i).get("Name");
                    String[] items = totalName.split("\\.");
                    SdbSplit split=new SdbSplit(hostName, port, items[0], items[1],
                            tablePaths[0]);
                    splits.add(split);
                }
            } catch (Exception e) {
                splits.add(new SdbSplit(hostName, port, spaceName, clName,
                        tablePaths[0]));
            }
        }
        sdb.disconnect();
        return splits.toArray(new InputSplit[splits.size()]);
    }

    public String getCollectionSpaceName() {
        return collectionSpaceName;
    }

    public void setCollectionSpaceName(String collectionSpaceName) {
        this.collectionSpaceName = collectionSpaceName;
    }

    public String getCollectionName() {
        return collectionName;
    }

    public void setCollectionName(String collectionName) {
        this.collectionName = collectionName;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy