All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.mindmaps.graql.internal.analytics.MedianVertexProgram Maven / Gradle / Ivy

/*
 * MindmapsDB - A Distributed Semantic Database
 * Copyright (C) 2016  Mindmaps Research Ltd
 *
 * MindmapsDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * MindmapsDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with MindmapsDB. If not, see .
 */

package io.mindmaps.graql.internal.analytics;

import com.google.common.collect.Sets;
import io.mindmaps.concept.ResourceType;
import io.mindmaps.util.Schema;
import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.process.computer.Memory;
import org.apache.tinkerpop.gremlin.process.computer.MessageScope;
import org.apache.tinkerpop.gremlin.process.computer.Messenger;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

/**
 * This class implements quick select algorithm to find the median.
 */

public class MedianVertexProgram extends MindmapsVertexProgram {

    private final MessageScope.Local countMessageScopeIn = MessageScope.Local.of(__::inE);
    private final MessageScope.Local countMessageScopeOut = MessageScope.Local.of(__::outE);

    public static final int MAX_ITERATION = 20;
    private static final String RESOURCE_DATA_TYPE = "medianVertexProgram.resourceDataType";
    private static final String RESOURCE_TYPE = "medianVertexProgram.statisticsResourceType";

    // element key
    public static final String DEGREE = "medianVertexProgram.degree";
    public static final String LABEL = "medianVertexProgram.label";

    // memory key
    public static final String COUNT = "medianVertexProgram.count";
    public static final String INDEX_START = "medianVertexProgram.indexStart";
    public static final String INDEX_END = "medianVertexProgram.indexEnd";
    public static final String INDEX_MEDIAN = "medianVertexProgram.indexMedian";
    public static final String MEDIAN = "medianVertexProgram.median";
    public static final String PIVOT = "medianVertexProgram.pivot";
    public static final String PIVOT_POSITIVE = "medianVertexProgram.pivotPositive";
    public static final String PIVOT_NEGATIVE = "medianVertexProgram.pivotNegative";
    public static final String POSITIVE_COUNT = "medianVertexProgram.positiveCount";
    public static final String NEGATIVE_COUNT = "medianVertexProgram.negativeCount";
    public static final String FOUND = "medianVertexProgram.found";
    public static final String LABEL_SELECTED = "medianVertexProgram.labelSelected";

    private static final Set ELEMENT_COMPUTE_KEYS = Sets.newHashSet(DEGREE, LABEL);
    private static final Set MEMORY_COMPUTE_KEYS = Sets.newHashSet(COUNT, MEDIAN, FOUND,
            INDEX_START, INDEX_END, INDEX_MEDIAN, PIVOT, PIVOT_POSITIVE, PIVOT_NEGATIVE,
            POSITIVE_COUNT, NEGATIVE_COUNT, LABEL_SELECTED);

    private Set statisticsResourceTypes = new HashSet<>();

    public MedianVertexProgram() {
    }

    public MedianVertexProgram(Set selectedTypes,
                               Set statisticsResourceTypes, String resourceDataType) {
        this.selectedTypes = selectedTypes;
        this.statisticsResourceTypes = statisticsResourceTypes;

        String resourceDataTypeValue = resourceDataType.equals(ResourceType.DataType.LONG.getName()) ?
                Schema.ConceptProperty.VALUE_LONG.name() : Schema.ConceptProperty.VALUE_DOUBLE.name();
        persistentProperties.put(RESOURCE_DATA_TYPE, resourceDataTypeValue);
    }

    @Override
    public GraphComputer.Persist getPreferredPersist() {
        return GraphComputer.Persist.NOTHING;
    }

    @Override
    public GraphComputer.ResultGraph getPreferredResultGraph() {
        return GraphComputer.ResultGraph.ORIGINAL;
    }

    @Override
    public Set getElementComputeKeys() {
        return ELEMENT_COMPUTE_KEYS;
    }

    @Override
    public Set getMemoryComputeKeys() {
        return MEMORY_COMPUTE_KEYS;
    }

    @Override
    public Set getMessageScopes(final Memory memory) {
        final Set set = new HashSet<>();
        if (memory.getIteration() < 4) {
            set.add(this.countMessageScopeOut);
            set.add(this.countMessageScopeIn);
        }
        return set;
    }

    @Override
    public void storeState(final Configuration configuration) {
        super.storeState(configuration);
        statisticsResourceTypes.forEach(
                typeId -> configuration.addProperty(RESOURCE_TYPE + "." + typeId, typeId));
    }

    @Override
    public void loadState(final Graph graph, final Configuration configuration) {
        super.loadState(graph, configuration);
        configuration.subset(RESOURCE_TYPE).getKeys().forEachRemaining(key ->
                statisticsResourceTypes.add((String) configuration.getProperty(RESOURCE_TYPE + "." + key)));
    }

    @Override
    public void setup(final Memory memory) {
        LOGGER.debug("MedianVertexProgram Started !!!!!!!!");
        memory.set(COUNT, 0L);
        memory.set(LABEL_SELECTED, memory.getIteration());
        memory.set(NEGATIVE_COUNT, 0L);
        memory.set(POSITIVE_COUNT, 0L);
        memory.set(FOUND, false);
        if (persistentProperties.get(RESOURCE_DATA_TYPE).equals(Schema.ConceptProperty.VALUE_LONG.name())) {
            memory.set(MEDIAN, 0L);
            memory.set(PIVOT, 0L);
            memory.set(PIVOT_NEGATIVE, 0L);
            memory.set(PIVOT_POSITIVE, 0L);
        } else {
            memory.set(MEDIAN, 0D);
            memory.set(PIVOT, 0D);
            memory.set(PIVOT_NEGATIVE, 0D);
            memory.set(PIVOT_POSITIVE, 0D);
        }
    }

    @Override
    public void safeExecute(final Vertex vertex, Messenger messenger, final Memory memory) {
        switch (memory.getIteration()) {
            case 0:
                if (selectedTypes.contains(Utility.getVertexType(vertex))) {
                    String type = vertex.value(Schema.ConceptProperty.BASE_TYPE.name());
                    if (type.equals(Schema.BaseType.ENTITY.name()) || type.equals(Schema.BaseType.RESOURCE.name())) {
                        messenger.sendMessage(this.countMessageScopeIn, 1L);
                    } else if (type.equals(Schema.BaseType.RELATION.name())) {
                        messenger.sendMessage(this.countMessageScopeIn, 1L);
                        messenger.sendMessage(this.countMessageScopeOut, -1L);
                    }
                }
                break;
            case 1:
                String type = vertex.value(Schema.ConceptProperty.BASE_TYPE.name());
                if (type.equals(Schema.BaseType.CASTING.name())) {
                    boolean hasRolePlayer = false;
                    long assertionCount = 0;
                    Iterator iterator = messenger.receiveMessages();
                    while (iterator.hasNext()) {
                        long message = iterator.next();
                        if (message < 0) assertionCount++;
                        else hasRolePlayer = true;
                    }
                    if (hasRolePlayer) {
                        messenger.sendMessage(this.countMessageScopeIn, 1L);
                        messenger.sendMessage(this.countMessageScopeOut, assertionCount);
                    }
                }
                break;
            case 2:
                if (statisticsResourceTypes.contains(Utility.getVertexType(vertex))) {
                    // put degree
                    long edgeCount = IteratorUtils.reduce(messenger.receiveMessages(), 0L, (a, b) -> a + b);
                    vertex.property(DEGREE, edgeCount);
                    //TODO: select three values in each iteration, pick the median of the three as pivot
                    // select pivot randomly
                    if (edgeCount > 0) {
                        memory.set(PIVOT,
                                vertex.value((String) persistentProperties.get(RESOURCE_DATA_TYPE)));
                        memory.incr(COUNT, edgeCount);
                    }
                }
                break;
            case 3:
                if (statisticsResourceTypes.contains(Utility.getVertexType(vertex)) &&
                        (long) vertex.value(DEGREE) > 0) {
                    Number value = vertex.value((String) persistentProperties.get(RESOURCE_DATA_TYPE));
                    if (value.doubleValue() < memory.get(PIVOT).doubleValue()) {
                        vertex.property(LABEL, -memory.getIteration());
                        memory.incr(NEGATIVE_COUNT, vertex.value(DEGREE));
                        memory.set(PIVOT_NEGATIVE, value);
                    } else if (value.doubleValue() > memory.get(PIVOT).doubleValue()) {
                        vertex.property(LABEL, memory.getIteration());
                        memory.incr(POSITIVE_COUNT, vertex.value(DEGREE));
                        memory.set(PIVOT_POSITIVE, value);
                    } else {
                        // also assign a label to pivot, so all the selected resources have LABEL
                        vertex.property(LABEL, 0);
                    }
                }
                break;

            // default case is almost the same as case 3, except that in case 3 no vertex has LABEL
            default:
                if (statisticsResourceTypes.contains(Utility.getVertexType(vertex)) &&
                        (long) vertex.value(DEGREE) > 0 &&
                        (int) vertex.value(LABEL) == memory.get(LABEL_SELECTED)) {
                    Number value = vertex.value((String) persistentProperties.get(RESOURCE_DATA_TYPE));
                    if (value.doubleValue() < memory.get(PIVOT).doubleValue()) {
                        vertex.property(LABEL, -memory.getIteration());
                        memory.incr(NEGATIVE_COUNT, vertex.value(DEGREE));
                        memory.set(PIVOT_NEGATIVE, value);
                    } else if (value.doubleValue() > memory.get(PIVOT).doubleValue()) {
                        vertex.property(LABEL, memory.getIteration());
                        memory.incr(POSITIVE_COUNT, vertex.value(DEGREE));
                        memory.set(PIVOT_POSITIVE, value);
                    }
                }
                break;
        }
    }

    @Override
    public boolean terminate(final Memory memory) {
        LOGGER.debug("Iteration: " + memory.getIteration());

        if (memory.getIteration() == 2) {
            memory.set(INDEX_START, 0L);
            memory.set(INDEX_END, memory.get(COUNT) - 1L);
            memory.set(INDEX_MEDIAN, (memory.get(COUNT) - 1L) / 2L);

            LOGGER.debug("count: " + memory.get(COUNT));
            LOGGER.debug("first pivot: " + memory.get(PIVOT));

        } else if (memory.getIteration() > 2) {

            long indexNegativeEnd = memory.get(INDEX_START) + memory.get(NEGATIVE_COUNT) - 1;
            long indexPositiveStart = memory.get(INDEX_END) - memory.get(POSITIVE_COUNT) + 1;

            LOGGER.debug("pivot: " + memory.get(PIVOT));

            LOGGER.debug(memory.get(INDEX_START) + ", " + indexNegativeEnd);
            LOGGER.debug(indexPositiveStart + ", " + memory.get(INDEX_END));

            LOGGER.debug("negative count: " + memory.get(NEGATIVE_COUNT));
            LOGGER.debug("positive count: " + memory.get(POSITIVE_COUNT));

            LOGGER.debug("negative pivot: " + memory.get(PIVOT_NEGATIVE));
            LOGGER.debug("positive pivot: " + memory.get(PIVOT_POSITIVE));

            if (indexNegativeEnd < memory.get(INDEX_MEDIAN)) {
                if (indexPositiveStart > memory.get(INDEX_MEDIAN)) {
                    memory.set(FOUND, true);
                    LOGGER.debug("FOUND IT!!!");
                } else {
                    memory.set(INDEX_START, indexPositiveStart);
                    memory.set(PIVOT, memory.get(PIVOT_POSITIVE));
                    memory.set(LABEL_SELECTED, memory.getIteration());
                    LOGGER.debug("new pivot: " + memory.get(PIVOT));
                }
            } else {
                memory.set(INDEX_END, indexNegativeEnd);
                memory.set(PIVOT, memory.get(PIVOT_NEGATIVE));
                memory.set(LABEL_SELECTED, -memory.getIteration());
                LOGGER.debug("new pivot: " + memory.get(PIVOT));
            }
            memory.set(MEDIAN, memory.get(PIVOT));

            memory.set(POSITIVE_COUNT, 0L);
            memory.set(NEGATIVE_COUNT, 0L);
        }

        return memory.get(FOUND) || memory.getIteration() >= MAX_ITERATION;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy