All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.FaunusSerializer Maven / Gradle / Ivy

package com.thinkaurelius.titan.hadoop;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.*;
import com.thinkaurelius.titan.diskstorage.ReadBuffer;
import com.thinkaurelius.titan.diskstorage.StaticBuffer;
import com.thinkaurelius.titan.diskstorage.configuration.Configuration;
import com.thinkaurelius.titan.diskstorage.util.ReadArrayBuffer;
import com.thinkaurelius.titan.graphdb.database.serialize.Serializer;
import com.thinkaurelius.titan.graphdb.database.serialize.StandardSerializer;
import com.thinkaurelius.titan.hadoop.FaunusPathElement.MicroElement;
import com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration;
import com.thinkaurelius.titan.util.datastructures.IterablesUtil;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.util.ExceptionFactory;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import static com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration.KRYO_MAX_OUTPUT_SIZE;

/**
 * @author Matthias Broecheler ([email protected])
 */
public class FaunusSerializer {

    // This is volatile to support double-checked locking
    private static volatile Serializer standardSerializer;

    private final FaunusSchemaManager types;
    private final boolean trackState;
    private final boolean trackPaths;
    private final Configuration configuration;

    private static final Logger log =
            LoggerFactory.getLogger(FaunusSerializer.class);

    public FaunusSerializer(final Configuration configuration) {
        Preconditions.checkNotNull(configuration);
        this.types = FaunusSchemaManager.getTypeManager(configuration);
        this.configuration = configuration;
        this.trackState = configuration.get(TitanHadoopConfiguration.PIPELINE_TRACK_STATE);
        this.trackPaths = configuration.get(TitanHadoopConfiguration.PIPELINE_TRACK_PATHS);
    }

    public void writeVertex(final FaunusVertex vertex, final DataOutput out) throws IOException {
        //Need to write the id up front for the comparator
        WritableUtils.writeVLong(out, vertex.id);
        Schema schema = new Schema();
        vertex.updateSchema(schema);
        schema.writeSchema(out);
        writePathElement(vertex, schema, out);
        writeEdges(vertex, vertex.inAdjacency, out, Direction.IN, schema);
        FaunusVertexLabel vl = (FaunusVertexLabel)vertex.getVertexLabel();
        out.writeUTF(vl.isDefault()?"":vl.getName());
    }

    public void readVertex(final FaunusVertex vertex, final DataInput in) throws IOException {
        WritableUtils.readVLong(in);
        Schema schema = readSchema(in);
        readPathElement(vertex, schema, in);
        vertex.inAdjacency = readEdges(vertex, in, Direction.IN, schema);
        String labelName = in.readUTF();
        vertex.setVertexLabel(StringUtils.isBlank(labelName)?FaunusVertexLabel.DEFAULT_VERTEXLABEL:
                                    types.getVertexLabel(labelName));
    }

    public void writeEdge(final StandardFaunusEdge edge, final DataOutput out) throws IOException {
        writePathElement(edge, out);
        WritableUtils.writeVLong(out, edge.inVertex);
        WritableUtils.writeVLong(out, edge.outVertex);
        writeFaunusType(edge.getType(), out);
    }

    public void readEdge(final StandardFaunusEdge edge, final DataInput in) throws IOException {
        readPathElement(edge, in);
        edge.inVertex = WritableUtils.readVLong(in);
        edge.outVertex = WritableUtils.readVLong(in);
        edge.setLabel((FaunusEdgeLabel)readFaunusType(in));
    }

    public void writeProperty(final StandardFaunusProperty property, final DataOutput out) throws IOException {
        writePathElement(property, out);
        WritableUtils.writeVLong(out, property.vertexid);
        serializeObject(out,property.getValue());
        writeFaunusType(property.getType(), out);
    }

    public void readProperty(final StandardFaunusProperty property, final DataInput in) throws IOException {
        readPathElement(property, in);
        property.vertexid = WritableUtils.readVLong(in);
        property.value = deserializeObject(in);
        property.setKey((FaunusPropertyKey)readFaunusType(in));
    }

    private void readPathElement(final FaunusPathElement element, final DataInput in) throws IOException {
        readPathElement(element, null, in);
    }

    private void writePathElement(final FaunusPathElement element, final DataOutput out) throws IOException {
        writePathElement(element, null, out);
    }

    private void readPathElement(final FaunusPathElement element, Schema schema, final DataInput in) throws IOException {
        readElement(element, schema, in);
        if (trackPaths) {
            List> paths = readElementPaths(in);
            element.tracker = new FaunusPathElement.Tracker(paths,
                    (element instanceof FaunusVertex) ? new FaunusVertex.MicroVertex(element.id) : new StandardFaunusEdge.MicroEdge(element.id));

            log.trace("readPathElement element={} paths={}", element, paths);
        } else {
            element.pathCounter = WritableUtils.readVLong(in);
            element.tracker = FaunusPathElement.DEFAULT_TRACK;
        }
    }

    private void writePathElement(final FaunusPathElement element, final Schema schema, final DataOutput out) throws IOException {
        writeElement(element, schema, out);
        if (trackPaths)
            writeElementPaths(element.tracker.paths, out);
        else
            WritableUtils.writeVLong(out, element.pathCounter);
    }

    private void readElement(final FaunusElement element, Schema schema, final DataInput in) throws IOException {
        element.id = WritableUtils.readVLong(in);
        if (trackState) element.setLifeCycle(in.readByte());
        element.outAdjacency = readEdges(element,in,Direction.OUT,schema);
    }

    private void writeElement(final FaunusElement element, final Schema schema, final DataOutput out) throws IOException {
        Preconditions.checkArgument(trackState || !element.isRemoved());
        WritableUtils.writeVLong(out, element.id);
        if (trackState) out.writeByte(element.getLifeCycle());
        writeEdges(element, element.outAdjacency, out, Direction.OUT, schema);
    }



    private void serializeObject(final DataOutput out, Object value) throws IOException {
        final com.thinkaurelius.titan.graphdb.database.serialize.DataOutput o = getStandardSerializer().getDataOutput(40);
        o.writeClassAndObject(value);
        final StaticBuffer buffer = o.getStaticBuffer();
        WritableUtils.writeVInt(out, buffer.length());
        out.write(buffer.as(StaticBuffer.ARRAY_FACTORY));
    }

    private Object deserializeObject(final DataInput in) throws IOException {
        int byteLength = WritableUtils.readVInt(in);
        byte[] bytes = new byte[byteLength];
        in.readFully(bytes);
        final ReadBuffer buffer = new ReadArrayBuffer(bytes);
        return getStandardSerializer().readClassAndObject(buffer);
    }

    /**
     * Return the StandardSerializer singleton shared between all instances of FaunusSerializer.
     *
     * If it has not yet been initialized, then the singleton is created using the maximum
     * Kryo buffer size configured in the calling FaunusSerializer.
     *
     * @return
     */
    private Serializer getStandardSerializer() {
        if (null == standardSerializer) { // N.B. standardSerializer is volatile
            synchronized (FaunusSerializer.class) {
                if (null == standardSerializer) {
                    int maxOutputBufSize = configuration.get(KRYO_MAX_OUTPUT_SIZE);
                    standardSerializer = new StandardSerializer(true, maxOutputBufSize);
                }
            }
        }
        // TODO consider checking whether actual output buffer size matches config, create new StandardSerializer if mismatched?  Might not be worth it
        return standardSerializer;
    }

    private  Iterable filterDeletedRelations(Iterable elements) {
        if (trackState) return elements;
        else return Iterables.filter(elements, new Predicate() {
            @Override
            public boolean apply(@Nullable T element) {
                return !element.isRemoved();
            }
        });
    }

    private SetMultimap readEdges(final FaunusElement element, final DataInput in, final Direction direction, final Schema schema) throws IOException {
        final SetMultimap adjacency = HashMultimap.create();
        int numTypes = WritableUtils.readVInt(in);
        for (int i = 0; i < numTypes; i++) {
            FaunusRelationType type;
            if (schema == null) type = readFaunusType(in);
            else type = schema.getType(WritableUtils.readVLong(in));
            final int size = WritableUtils.readVInt(in);
            for (int j = 0; j < size; j++) {
                FaunusRelation relation;
                if (element instanceof FaunusVertex) {
                    if (type.isEdgeLabel()) {
                        final StandardFaunusEdge edge = new StandardFaunusEdge(configuration);
                        edge.setLabel((FaunusEdgeLabel)type);
                        readPathElement(edge, schema, in);
                        long otherId = WritableUtils.readVLong(in);
                        switch (direction) {
                            case IN:
                                edge.inVertex = element.getLongId();
                                edge.outVertex = otherId;
                                break;
                            case OUT:
                                edge.outVertex = element.getLongId();
                                edge.inVertex = otherId;
                                break;
                            default:
                                throw ExceptionFactory.bothIsNotSupported();
                        }
                        relation = edge;
                        log.trace("readEdges edge={} paths={}", edge, edge.tracker.paths);
                    } else {
                        assert type.isPropertyKey() && direction==Direction.OUT;
                        final StandardFaunusProperty property = new StandardFaunusProperty(configuration);
                        property.setKey((FaunusPropertyKey) type);
                        readPathElement(property, schema, in);
                        property.value = deserializeObject(in);
                        relation = property;
                    }
                } else {
                    byte lifecycle = trackState?in.readByte():-1;
                    if (type.isEdgeLabel()) {
                        relation = new SimpleFaunusEdge((FaunusEdgeLabel)type,new FaunusVertex(configuration,WritableUtils.readVLong(in)));
                    } else {
                        assert type.isPropertyKey() && direction==Direction.OUT;
                        relation = new SimpleFaunusProperty((FaunusPropertyKey)type,deserializeObject(in));
                    }
                    if (trackState) relation.setLifeCycle(lifecycle);
                }
                adjacency.put(type, relation);
            }
        }
        if (adjacency.isEmpty()) return FaunusElement.EMPTY_ADJACENCY;
        return adjacency;
    }

    private void writeEdges(final FaunusElement element, final SetMultimap edges, final DataOutput out, final Direction direction, final Schema schema) throws IOException {
        Map counts = Maps.newHashMap();
        int typeCount = 0;
        for (FaunusRelationType type : edges.keySet()) {
            int count = IterablesUtil.size(filterDeletedRelations(edges.get(type)));
            counts.put(type, count);
            if (count > 0) typeCount++;
        }

        WritableUtils.writeVInt(out, typeCount);
        for (FaunusRelationType type : edges.keySet()) {
            if (counts.get(type) == 0) continue;

            if (schema == null) writeFaunusType(type, out);
            else WritableUtils.writeVLong(out, schema.getTypeId(type));

            WritableUtils.writeVInt(out, counts.get(type));
            Iterable subset = filterDeletedRelations(edges.get(type));
            for (final FaunusRelation rel : subset) {
                if (element instanceof FaunusVertex) {
                    assert rel instanceof StandardFaunusRelation;
                    writePathElement((StandardFaunusRelation)rel,schema,out);
                } else {
                    assert rel instanceof SimpleFaunusRelation;
                    if (trackState) out.writeByte(((SimpleFaunusRelation)rel).getLifeCycle());
                }
                if (rel.isEdge()) {
                    WritableUtils.writeVLong(out, ((FaunusEdge)rel).getVertexId(direction.opposite()));
                } else {
                    serializeObject(out,((FaunusProperty)rel).getValue());
                }
            }
        }
    }

    private void writeElementPaths(final List> paths, final DataOutput out) throws IOException {
        if (null == paths) {
            WritableUtils.writeVInt(out, 0);
        } else {
            WritableUtils.writeVInt(out, paths.size());
            for (final List path : paths) {
                WritableUtils.writeVInt(out, path.size());
                for (MicroElement element : path) {
                    if (element instanceof FaunusVertex.MicroVertex)
                        out.writeChar('v');
                    else
                        out.writeChar('e');
                    WritableUtils.writeVLong(out, element.getId());
                }
            }
        }
    }

    private List> readElementPaths(final DataInput in) throws IOException {
        int pathsSize = WritableUtils.readVInt(in);
        if (pathsSize == 0)
            return new ArrayList>();
        else {
            final List> paths = new ArrayList>(pathsSize);
            for (int i = 0; i < pathsSize; i++) {
                int pathSize = WritableUtils.readVInt(in);
                final List path = new ArrayList(pathSize);
                for (int j = 0; j < pathSize; j++) {
                    char type = in.readChar();
                    if (type == 'v')
                        path.add(new FaunusVertex.MicroVertex(WritableUtils.readVLong(in)));
                    else
                        path.add(new StandardFaunusEdge.MicroEdge(WritableUtils.readVLong(in)));
                }
                paths.add(path);
            }
            return paths;
        }
    }

    private void writeFaunusType(final FaunusRelationType type, final DataOutput out) throws IOException {
        out.writeByte(type.isPropertyKey()?0:1);
        out.writeUTF(type.getName());
    }

    private FaunusRelationType readFaunusType(final DataInput in) throws IOException {
        int type = in.readByte();
        String typeName = in.readUTF();
        assert type==0 || type==1;
        if (type==0) return types.getOrCreatePropertyKey(typeName);
        else return types.getOrCreateEdgeLabel(typeName);
    }


    class Schema {

        private final BiMap localTypes;
        private long count = 1;

        private Schema() {
            this(8);
        }

        private Schema(int size) {
            this.localTypes = HashBiMap.create(size);
        }

        void add(String type) {
            this.add(types.getRelationType(type));
        }

        void add(FaunusRelationType type) {
            if (!localTypes.containsKey(type)) localTypes.put(type, count++);
        }

        void addAll(Iterable types) {
            for (FaunusRelationType type : types) add(type);
        }

        long getTypeId(FaunusRelationType type) {
            Long id = localTypes.get(type);
            Preconditions.checkArgument(id != null, "Type is not part of the schema: " + type);
            return id;
        }

        FaunusRelationType getType(long id) {
            FaunusRelationType type = localTypes.inverse().get(id);
            Preconditions.checkArgument(type != null, "Type is not part of the schema: " + id);
            return type;
        }

        private void add(FaunusRelationType type, long index) {
            Preconditions.checkArgument(!localTypes.containsValue(index));
            localTypes.put(type, index);
            count = index + 1;
        }

        private void writeSchema(final DataOutput out) throws IOException {
            WritableUtils.writeVInt(out, localTypes.size());
            for (Map.Entry entry : localTypes.entrySet()) {
                writeFaunusType(entry.getKey(), out);
                WritableUtils.writeVLong(out, entry.getValue());
            }
        }

    }

    private Schema readSchema(final DataInput in) throws IOException {
        int size = WritableUtils.readVInt(in);
        Schema schema = new Schema(size);
        for (int i = 0; i < size; i++) {
            schema.add(readFaunusType(in), WritableUtils.readVLong(in));
        }
        return schema;
    }

    static {
        WritableComparator.define(FaunusPathElement.class, new Comparator());
    }

    public static class Comparator extends WritableComparator {
        public Comparator() {
            super(FaunusPathElement.class);
        }

        @Override
        public int compare(final byte[] element1, final int start1, final int length1, final byte[] element2, final int start2, final int length2) {
            try {
                return Long.valueOf(readVLong(element1, start1)).compareTo(readVLong(element2, start2));
            } catch (IOException e) {
                return -1;
            }
        }

        @Override
        public int compare(final WritableComparable a, final WritableComparable b) {
            if (a instanceof FaunusElement && b instanceof FaunusElement)
                return ((Long) (((FaunusElement) a).getLongId())).compareTo(((FaunusElement) b).getLongId());
            else
                return super.compare(a, b);
        }
    }

    //################################################
    // Serialization for vanilla Blueprints
    //################################################


    /**
     * All graph element identifiers must be of the long data type.  Implementations of this
     * interface makes it possible to control the conversion of the identifier in the
     * VertexToHadoopBinary utility class.
     *
     * @author Stephen Mallette (http://stephen.genoprime.com)
     */
//    public static interface ElementIdHandler {
//        long convertIdentifier(final Element element);
//    }
//
//    public void writeVertex(final Vertex vertex, final ElementIdHandler elementIdHandler, final DataOutput out) throws IOException {
//        Schema schema = new Schema();
//        //Convert properties and update schema
//        Multimap properties = getProperties(vertex);
//        for (HadoopType type : properties.keySet()) schema.add(type);
//        for (Edge edge : vertex.getEdges(Direction.BOTH)) {
//            schema.add(edge.getLabel());
//            for (String key : edge.getPropertyKeys()) schema.add(key);
//        }
//
//        WritableUtils.writeVLong(out, elementIdHandler.convertIdentifier(vertex));
//        schema.writeSchema(out);
//        WritableUtils.writeVLong(out, elementIdHandler.convertIdentifier(vertex));
//        if (trackState) out.writeByte(ElementState.NEW.getByteValue());
//        writeProperties(properties, schema, out);
//        out.writeBoolean(false);
//        WritableUtils.writeVLong(out, 0);
//        writeEdges(vertex, Direction.IN, elementIdHandler, schema, out);
//        writeEdges(vertex, Direction.OUT, elementIdHandler, schema, out);
//
//    }
//
//    private Multimap getProperties(Element element) {
//        Multimap properties = HashMultimap.create();
//        for (String key : element.getPropertyKeys()) {
//            HadoopType type = types.get(key);
//            properties.put(type, new FaunusProperty(type, element.getProperty(key)));
//        }
//        return properties;
//    }
//
//    private void writeEdges(final Vertex vertex, final Direction direction, final ElementIdHandler elementIdHandler,
//                            final Schema schema, final DataOutput out) throws IOException {
//        final Multiset labelCount = HashMultiset.create();
//        for (final Edge edge : vertex.getEdges(direction)) {
//            labelCount.add(edge.getLabel());
//        }
//        WritableUtils.writeVInt(out, labelCount.elementSet().size());
//        for (String label : labelCount.elementSet()) {
//            HadoopType type = types.get(label);
//            WritableUtils.writeVLong(out, schema.getTypeId(type));
//            WritableUtils.writeVInt(out, labelCount.count(label));
//            for (final Edge edge : vertex.getEdges(direction, label)) {
//                WritableUtils.writeVLong(out, elementIdHandler.convertIdentifier(edge));
//                if (trackState) out.writeByte(ElementState.NEW.getByteValue());
//                writeProperties(getProperties(edge), schema, out);
//                out.writeBoolean(false);
//                WritableUtils.writeVLong(out, 0);
//                WritableUtils.writeVLong(out, elementIdHandler.convertIdentifier(edge.getVertex(direction.opposite())));
//            }
//        }
//    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy