All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.odps.graph.RemoveDuplicatesLoadingResolver Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.graph;

import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import com.aliyun.odps.io.Writable;
import com.aliyun.odps.io.WritableComparable;

/**
 * RemoveDuplicatesLoadingResolver 是解决 {@link GraphLoader} 载入图数据时引入的点冲突的一种实现.
 *
 * 

* 在图载入阶段,用户可以调用 {@link MutationContext} 的接口向图中添加、删除点或边,由此引入的冲突默认由此类解决, 用户也可以通过 * {@link GraphJob} 提供的 * {@linkplain JobConf#setLoadingVertexResolverClass(Class) * setLoadingVertexResolverClass} 方法指定自己的实现。 *

* *

* 对于同一个点ID,RemoveDuplicatesLoadingResolver 解决该点在图载入阶段的冲突是按照以下顺序进行的: *

    *
  1. 解决 {@linkplain MutationContext#addVertexRequest(Vertex) addVertexRequest} * 引起的冲突: 添加点时,选择第一个添加的点。 *
  2. 解决 {@linkplain MutationContext#addEdgeRequest(WritableComparable, Edge) * addEdgeRequest} 引起的冲突: 添加边时,首先删除点中已有的重复边(终点相同),然后添加不重复的边。 *
  3. * 忽略 * {@linkplain MutationContext#removeEdgeRequest(WritableComparable, WritableComparable) * removeEdgeRequest} 以及 {@linkplain MutationContext#removeVertexRequest(WritableComparable) * removeVertexRequest}。 *
*

* * @param * Vertex ID 类型 * @param * Vertex Value 类型 * @param * Edge Value 类型 * @param * Message 类型 * @see JobConf#setLoadingVertexResolverClass(Class) */ @SuppressWarnings("rawtypes") public class RemoveDuplicatesLoadingResolver extends LoadingVertexResolver { /** * 提供图载入时的一种去重冲突处理方法. * * *

* 首先处理添加点请求,然后处理添加边的请求,详细处理规则见:{@linkplain RemoveDuplicatesLoadingResolver * 本类说明} *

* * @param vertexId * 冲突点的ID * @param vertexChanges * 关于该点的添加和删除请求 */ @Override public Vertex resolve(I vertexId, VertexChanges vertexChanges) throws IOException { /** * 1. If creation of vertex desired, pick first vertex. */ Vertex vertex = addVertexIfDesired(vertexId, vertexChanges); if (vertex != null) { /** 2. If edge addition, add the unique edges */ addEdges(vertexId, vertex, vertexChanges); } else { System.err.println("Ignore all addEdgeRequests for vertex#" + vertexId); } return vertex; } /** * 图载入阶段,处理添加点的请求. * * @param vertexId * 请求添加的点的ID * @param vertexChanges * 包含请求添加的点 * @return 第一个请求添加的点,或者没有请求添加点时,返回null */ protected Vertex addVertexIfDesired(I vertexId, VertexChanges vertexChanges) { Vertex vertex = null; if (hasVertexAdditions(vertexChanges)) { vertex = vertexChanges.getAddedVertexList().get(0); } return vertex; } /** * 图载入阶段,处理添加边的请求. * * @param vertexId * 请求添加的边所在的点的ID * @param vertex * 请求点的边所在的点 * @param vertexChanges * 包含请求添加的边 * @throws IOException * 去除点本身拥有的重复边,以及请求添加的边中的重复边 */ protected void addEdges(I vertexId, Vertex vertex, VertexChanges vertexChanges) throws IOException { // I. Remove duplicate edges from vertex's edge list. Set destVertexId = new HashSet(); if (vertex.hasEdges()) { List> edgeList = vertex.getEdges(); for (Iterator> edges = edgeList.iterator(); edges.hasNext(); ) { Edge edge = edges.next(); if (destVertexId.contains(edge.getDestVertexId())) { edges.remove(); } else { destVertexId.add(edge.getDestVertexId()); } } } if (hasEdgeAdditions(vertexChanges)) { // II. Ignore duplicate edge request for (Edge edge : vertexChanges.getAddedEdgeList()) { if (destVertexId.contains(edge.getDestVertexId())) { continue; } destVertexId.add(edge.getDestVertexId()); vertex.addEdge(edge.getDestVertexId(), edge.getValue()); } } } /** * 检查是否存在添加点的请求。 * * @param changes * 待检查的点变化的集合 * @return 集合中包含添加点的请求,返回true,否则返回false */ protected boolean hasVertexAdditions(VertexChanges changes) { return changes != null && changes.getAddedVertexList() != null && !changes.getAddedVertexList().isEmpty(); } /** * 检查是否存在添加边的请求。 * * @param changes * 待检查的点变化的集合 * @return 集合中包含添加边的请求,则返回true,否则返回false */ protected boolean hasEdgeAdditions(VertexChanges changes) { return changes != null && changes.getAddedEdgeList() != null && !changes.getAddedEdgeList().isEmpty(); } }