All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.graphaware.relcount.full.internal.cache.AverageCardinalityGeneralizationStrategy Maven / Gradle / Ivy

/*
 * Copyright (c) 2013 GraphAware
 *
 * This file is part of GraphAware.
 *
 * GraphAware is free software: you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation, either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details. You should have received a copy of
 * the GNU General Public License along with this program.  If not, see
 * .
 */

package com.graphaware.relcount.full.internal.cache;

import com.graphaware.propertycontainer.dto.common.relationship.HasType;
import com.graphaware.propertycontainer.dto.common.relationship.HasTypeAndDirection;
import com.graphaware.propertycontainer.dto.common.relationship.Type;
import com.graphaware.propertycontainer.dto.common.relationship.TypeAndDirection;
import com.graphaware.relcount.full.internal.dto.property.CompactiblePropertiesImpl;
import com.graphaware.relcount.full.internal.dto.relationship.CompactibleRelationship;

import java.util.*;

/**
 * A {@link GeneralizationStrategy} with an "average property cardinality" heuristic.
 * 

* A human-friendly explanation of what this strategy is trying to achieve is getting rid of (generalizing) properties with * frequently changing values (like timestamp on a relationship), whilst keeping the ones that change less frequently, * thus providing more value (like strength of a friendship). */ public class AverageCardinalityGeneralizationStrategy implements GeneralizationStrategy { /** * All possible property keys of relationships with the given type and direction. */ private final Map> keysByTypeAndDirection = new HashMap<>(); /** * All possible property keys of relationships with the given type. */ private final Map> keysByType = new HashMap<>(); /** * Number of a relationships with the given type. */ private final Map degreeByType = new HashMap<>(); /** * All possible values of a property of relationships with the given type. Null is also a possible value. * Wildcard isn't a possible value for this Map, see below. */ private final Map>> valuesByTypeAndKey = new HashMap<>(); /** * Maximum possible number of different values of a property of relationships with the given type, judged * by looking at wildcards in the cached counts. For example, if there is a cached count for relationship * FRIEND_OF#OUTGOING#TIMESTAMP#_ANY_ = 5, then 5 different timestamp values are assumed. */ private final Map> wildcardsByTypeAndKey = new HashMap<>(); /** * {@inheritDoc} */ @Override public List produceGeneralizations(Map cachedCounts) { populateKeysAndDegree(cachedCounts); populateValuesAndWildcards(cachedCounts); List result = new LinkedList<>(); for (ScoredCompactibleRelationship scoredGeneralization : produceScoredGeneralizations(cachedCounts)) { result.add(scoredGeneralization.getCompactibleRelationship()); } return result; } /** * Pass through the cached counts and populate {@link #keysByTypeAndDirection} and {@link #keysByType}. * * @param cachedCounts to analyze. */ private void populateKeysAndDegree(Map cachedCounts) { for (Map.Entry cachedCountEntry : cachedCounts.entrySet()) { CompactibleRelationship cachedCount = cachedCountEntry.getKey(); HasTypeAndDirection typeAndDirection = new TypeAndDirection(cachedCount); if (!keysByTypeAndDirection.containsKey(typeAndDirection)) { keysByTypeAndDirection.put(typeAndDirection, new HashSet()); } keysByTypeAndDirection.get(typeAndDirection).addAll(cachedCount.getProperties().keySet()); HasType type = new Type(cachedCount); if (!keysByType.containsKey(type)) { keysByType.put(type, new HashSet()); } keysByType.get(type).addAll(cachedCount.getProperties().keySet()); if (!degreeByType.containsKey(type)) { degreeByType.put(type, 0); } degreeByType.put(type, degreeByType.get(type) + cachedCountEntry.getValue()); } } /** * Pass through the cached counts and populate {@link #valuesByTypeAndKey} and {@link #wildcardsByTypeAndKey}. * * @param cachedCounts to analyze. */ private void populateValuesAndWildcards(Map cachedCounts) { for (Map.Entry cachedCountEntry : cachedCounts.entrySet()) { CompactibleRelationship cachedCount = cachedCountEntry.getKey(); Type cachedCountType = new Type(cachedCount); if (!valuesByTypeAndKey.containsKey(cachedCountType)) { valuesByTypeAndKey.put(cachedCountType, new HashMap>()); } if (!wildcardsByTypeAndKey.containsKey(cachedCountType)) { wildcardsByTypeAndKey.put(cachedCountType, new HashMap()); } for (String key : keysByType.get(cachedCountType)) { Map> valuesByKey = valuesByTypeAndKey.get(cachedCountType); Map wildcardsByKey = wildcardsByTypeAndKey.get(cachedCountType); if (!valuesByKey.containsKey(key)) { valuesByKey.put(key, new HashSet()); } if (!wildcardsByKey.containsKey(key)) { wildcardsByKey.put(key, 0); } if (!cachedCount.getProperties().containsKey(key)) { valuesByKey.get(key).add(null); continue; } if (cachedCount.getProperties().get(key).equals(CompactiblePropertiesImpl.ANY_VALUE)) { wildcardsByKey.put(key, wildcardsByKey.get(key) + cachedCountEntry.getValue()); continue; } valuesByKey.get(key).add(cachedCount.getProperties().get(key)); } } } /** * Produce all generalizations of the cached counts and score them. * * @param cachedCounts to generalize. * @return scored generalizations sorted by descending score. */ private Set produceScoredGeneralizations(Map cachedCounts) { Set result = new TreeSet<>(); //Generate all possible generalizations Set generalizations = new HashSet<>(); for (CompactibleRelationship cached : cachedCounts.keySet()) { generalizations.addAll(cached.generateAllMoreGeneral(keysByTypeAndDirection.get(new TypeAndDirection(cached)))); } for (CompactibleRelationship generalization : generalizations) { double score = 1.0; HasType type = new Type(generalization); Map wildcardsByKey = wildcardsByTypeAndKey.get(type); Map> valuesByKey = valuesByTypeAndKey.get(type); Collection keys = keysByType.get(type); for (String key : keys) { if (!generalization.getProperties().containsKey(key) || !CompactiblePropertiesImpl.ANY_VALUE.equals(generalization.getProperties().get(key))) { score = score + 1; } else { int totalCount = degreeByType.get(type); int noWildcardValues = wildcardsByKey.get(key); //maximum number of values "contained" by the wildcard int noConcreteValues = valuesByKey.get(key).size(); score = score + (((double) (noWildcardValues + noConcreteValues)) / (totalCount + 1)); } } score = score / (keys.size() + 1); result.add(new ScoredCompactibleRelationship(generalization, score)); } return result; } /** * Encapsulation of a {@link CompactibleRelationship} (a generalization) and its score. Comparable so that objects * with the highest score come first. */ private class ScoredCompactibleRelationship implements Comparable { private final CompactibleRelationship compactibleRelationship; private final double score; private ScoredCompactibleRelationship(CompactibleRelationship compactibleRelationship, double score) { this.compactibleRelationship = compactibleRelationship; this.score = score; } public CompactibleRelationship getCompactibleRelationship() { return compactibleRelationship; } @Override public int compareTo(ScoredCompactibleRelationship o) { if (score > o.score) { return -1; } if (score < o.score) { return 1; } return compactibleRelationship.compareTo(o.compactibleRelationship); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy