org.piax.gtrans.ov.ring.rq.RQManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of piax-compat Show documentation
Show all versions of piax-compat Show documentation
A backward compatibility package for PIAX
The newest version!
/*
* RQManager.java - A node of range queriable overlay.
*
* Copyright (c) 2015 Kota Abe / PIAX development team
*
* You can redistribute it and/or modify it under either the terms of
* the AGPLv3 or PIAX binary code license. See the file COPYING
* included in the PIAX package for more in detail.
*
* $Id: Link.java 1172 2015-05-18 14:31:59Z teranisi $
*/
package org.piax.gtrans.ov.ring.rq;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import org.piax.ayame.ov.rq.DKRangeRValue;
import org.piax.common.DdllKey;
import org.piax.common.Endpoint;
import org.piax.common.Id;
import org.piax.common.ObjectId;
import org.piax.common.PeerId;
import org.piax.common.TransportId;
import org.piax.common.subspace.Range;
import org.piax.gtrans.ChannelTransport;
import org.piax.gtrans.IdConflictException;
import org.piax.gtrans.RPCException;
import org.piax.gtrans.RemoteValue;
import org.piax.gtrans.TransOptions;
import org.piax.gtrans.TransOptions.DeliveryMode;
import org.piax.gtrans.TransOptions.ResponseType;
import org.piax.gtrans.impl.NestedMessage;
import org.piax.gtrans.ov.Link;
import org.piax.gtrans.ov.ddll.Node;
import org.piax.gtrans.ov.ddll.Node.InsertPoint;
import org.piax.gtrans.ov.ddll.Node.Mode;
import org.piax.gtrans.ov.ddll.NodeManagerIf;
import org.piax.gtrans.ov.ring.NoSuchKeyException;
import org.piax.gtrans.ov.ring.RingManager;
import org.piax.gtrans.ov.ring.RingVNode;
import org.piax.gtrans.ov.ring.RingVNode.VNodeMode;
import org.piax.gtrans.ov.ring.TemporaryIOException;
import org.piax.gtrans.ov.ring.UnavailableException;
import org.piax.gtrans.ov.sg.SkipGraph;
import org.piax.util.StrictMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/*
* - Usual Case
*
* +------------------------+
* | RQMessage (received) |
* +------------------------+
* |rqRet ^
* v |parentMsg
* FrameWork +------------------------+[rqDisseminate]
* +--------+ | RQReturn |
* |msgStore| +------------------------+
* | | |childMsgs* ^
* | | v |rqRet
* | | id* +------------------------+[RQMessage#newChildInstance]
* | |------->| RQMessage (to child) |
* +--------+ +------------------------+
*
*
* - Fast Retransmission
*
* Fast Retransmission では,ACKがタイムアウトした RQMessage (to child) を指定して
* rqDisseminate() を呼ぶ.再送で送られるRQMessageは,RQReturnのchildMsgsに追加される.
* 再送した後で最初の RQMessage に対する応答が到着する可能性があるため,
* RQMessage (to child 1) は削除しない.
* 削除は,RQReturn#dispose が呼ばれた際に行う.
*
* +------------------------+
* | RQMessage (received) |
* +------------------------+
* |rqRet ^
* v |parentMsg
* FrameWork +------------------------+
* +--------+ | RQReturn | [rqDisseminate]
* |msgStore| +------------------------+
* | | |childMsgs ^ ^
* | | | | |rqRet
* | | id* | | +---------------------+[RQMessage#newChildInstance]
* | |----------)|(--------)|(>|RQMessage(to child 1)| (timed-out instance)
* | | | | +---------------------+
* | | v |rqRet
* | | id* +----------------------+
* | |------->|RQMessage (to child 2)| (retransmit instance)
* +--------+ +----------------------+
*
* queryがexpireしたら,RQReturn#disposeが呼ばれるので,ここでchildMsgsからRQMessageを削除する.
*
*
* - Slow Retransmission Case
*
* +------------------------+
* | RQMessage (received) |
* +------------------------+
* |rqRet ^
* | | +-------------------+ [RQReturn#retransmit()]
* | | |RQMessage (retrans)|
* | | +-------------------+
* | | |rqRet
* v |parentMsg v
* FrameWork +------------------------+
* +--------+ | RQReturn | [rqDisseminate]
* |msgStore| +------------------------+
* | | |childMsgs* ^
* | | v |rqRet
* | | id* +------------------------+ [RQMessage#newChildInstance]
* | |------->| RQMessage (to child) |
* +--------+ +------------------------+
*
* RQMessage (retrans) では,gap 部分だけが subranges に指定されている.
*/
/**
* this class adds range query functionality to the Ring network.
*
* @param the type of Endpoint in the underlying network.
*
*/
public class RQManager extends RingManager implements
RQIf {
/*--- logger ---*/
static final Logger logger = LoggerFactory.getLogger(RQManager.class);
/*
* every QID_EXPIRETION_TASK_PERIOD, all QueryId entries older than
* QID_EXPIRE milliseconds are removed.
*/
/** expiration time for purging stale QueryIDs */
public static int QID_EXPIRE = 120 * 1000; // 2min
/** period for executing a task for purging stale QueryIDs */
public static int QID_EXPIRATION_TASK_PERIOD = 20 * 1000; // 20sec
/**
* Range Queryでトラバース中に通信エラーが起きた場合に戻れるノード数
*/
public static int RQ_NRECENT = 10;
/** the period for flushing partial results in intermediate nodes */
public static int RQ_FLUSH_PERIOD = 2000;
/** additional grace time before removing RQReturn in intermediate nodes */
public static int RQ_EXPIRATION_GRACE = 5 * 1000;
/** range query retransmission period */
public static int RQ_RETRANS_PERIOD = 10 * 1000;
/** used as the query string for finding insert points.
* see {@link SkipGraph#find(Endpoint, DdllKey, boolean)} */
public final static String QUERY_INSERT_POINT_SPECIAL =
"*InsertPointSpecial*";
public final static String QUERY_KEY_SPECIAL = "*QueryKeySpecial*";
public final static ObjectId RQ_QUERY_AT_FIND = new ObjectId("*QueryAtFind*");
/** timeout for {@link #find(Endpoint, DdllKey, boolean, Object, TransOptions)} */
public static int FIND_INSERT_POINT_TIMEOUT = 30 * 1000;
/** pseudo PeerID used by {@link #rqDisseminate(RQMessage, NavigableMap)} */
protected final static PeerId FIXPEERID = PeerId.PLUS_INFINITY;
/** pseudo Link instance that represents the link should be fixed */
public/*protected*/final Link FIXLEFT;
public final static boolean NEWALGORITHM = true;
protected final RQExecQueryCallback execQueryCallback;
// the algorithm used for locating insertion positions
private RQAlgorithm stdRQAlgo;
public RQManager(TransportId transId, ChannelTransport trans,
RQExecQueryCallback execQueryCallback)
throws IdConflictException, IOException {
super(transId, trans);
this.execQueryCallback = execQueryCallback;
FIXLEFT = new Link(myLocator, new DdllKey(0, FIXPEERID));
schedule(new PurgeTask(),
(long) (Math.random() * QID_EXPIRATION_TASK_PERIOD),
QID_EXPIRATION_TASK_PERIOD);
}
protected boolean preferDelegateNodeLeftSide() {
return true;
}
@Override
public RQVNode getVNode(Comparable> rawkey) {
return (RQVNode) keyHash.get(rawkey);
}
protected void setRQAlgorithm(RQAlgorithm algo) {
this.stdRQAlgo = algo;
}
@SuppressWarnings("unchecked")
@Override
public RQIf getStub(E addr, int rpcTimeout) {
return (RQIf) super.getStub(addr, rpcTimeout);
}
@SuppressWarnings("unchecked")
@Override
public RQIf getStub(Endpoint dst) {
return (RQIf) super.getStub(dst);
}
/**
* find a location to insert `key'.
*
* @param introducer
* the node to communicate with.
* @param key
* the query key
* @param query the object for query.
* @param opts the transport options.
* @return the insertion point for `key'
* @throws UnavailableException
* 自ノードあるいはseedにkeyが登録されていない
* @throws IOException
* communication error
*/
@Override
public InsertPoint findImmedNeighbors(E introducer, DdllKey key, Object query, TransOptions opts)
throws UnavailableException, IOException {
logger.debug("introducer={}, key={}", introducer, key);
NavigableMap links;
if (introducer == null) {
// use the local routing table
rtLockR();
links = getAvailableLinks();
rtUnlockR();
if (links.size() == 0) {
return null;
//throw new UnavailableException(
// "no key is available at local node");
}
} else { // ask the introducer
RQIf stub = getStub(introducer);
if (!NEWALGORITHM) {
Link[] remoteLinks;
try {
remoteLinks = stub.getLocalLinks();
} catch (RPCException e) {
logger.debug("", e);
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new IOException(e.getCause());
}
if (remoteLinks.length == 0) {
throw new UnavailableException(
"no key is available at remote node: " + introducer);
}
logger.debug("find: remoteLinks = {}",
Arrays.toString(remoteLinks));
links = new ConcurrentSkipListMap();
links.put(remoteLinks[0].key, remoteLinks[0]);
} else { // NEWALGORITHM
InsertPoint ip;
try {
ip = stub.findImmedNeighbors(null, key, query, opts);
} catch (RPCException e) {
logger.debug("", e);
if (e.getCause() instanceof IOException) {
throw (IOException) e.getCause();
}
throw new IOException(e.getCause());
}
return ip;
}
}
SubRange range = new SubRange(key, true, key, true);
TransOptions newOpts;
if (opts == null) {
newOpts = new TransOptions(FIND_INSERT_POINT_TIMEOUT, ResponseType.DIRECT);
}
else {
newOpts = opts;
}
long timeout = newOpts.getTimeout();
RQReturn rqRet =
rqStartKeyRange(Collections. singleton(range),
query == null ? QUERY_INSERT_POINT_SPECIAL : query,
newOpts,
RQ_RETRANS_PERIOD, links, stdRQAlgo);
logger.debug("find: waiting {}", rqRet);
try {
Collection> col = rqRet.get(timeout);
// the case where the key has been inserted
logger.debug("find: col = {}", col);
} catch (InterruptedException e) {
throw new IOException("range query timeout");
}
logger.debug("rqRet = {}", rqRet);
// 本来,col から insp を取得すべきだが,col には aux 情報がないので,
// 仕方なく rqRet.rvals から直接取得している.
for (DKRangeRValue> kr : rqRet.rvals.values()) {
if (kr.getRemoteValue().getOption() != null) {
InsertPoint insp = (InsertPoint) kr.getRemoteValue().getOption();
logger.debug("find: insert point = {}, {}", insp, kr);
return insp;
}
}
if (TransOptions.responseType(opts) == ResponseType.NO_RESPONSE) {
return null;
}
/*
* FIND_INSERT_POINT_TIMEOUT 内に挿入位置が得られなかった.
* skip graph を修復中などの場合,ここに到達する可能性がある.
* 呼び出し側でリトライさせるために TemporaryIOException をスローする.
*/
throw new TemporaryIOException("could not find insert point @" + getEndpoint() + " for " + key);
}
// professional version
public RQResults scalableRangeQueryPro(
Collection extends Range>> ranges, Object query, TransOptions opts) {
if (ranges.size() == 0) {
return new RQResults();
}
RQReturn rqRet =
rqStartRawRange(ranges, query, opts, RQ_RETRANS_PERIOD, null, stdRQAlgo);
return rqRet.results;
}
/**
* perform a range query (internal).
*
* @param ranges
* ranges for the range query
* @param query
* the query object
* @param opts
* transmission option
* @param retransPeriod
* slow retransmission period (in msec)
* @param allLinks
* all links to split the ranges.
* @param rqAlgo
* range query algorithm
* @return RQReturn
*/
protected RQReturn rqStartRawRange(Collection extends Range>> ranges,
Object query, TransOptions opts, int retransPeriod,
NavigableMap allLinks,
RQAlgorithm rqAlgo) {
// convert ranges of Comparable> into ranges of .
Collection subRanges = new ArrayList();
for (Range extends Comparable>> range : ranges) {
SubRange keyRange = convertToSubRange(range);
keyRange.assignId(); // root id
subRanges.add(keyRange);
}
return rqStartKeyRange(subRanges, query, opts, retransPeriod,
allLinks, rqAlgo);
}
public static SubRange convertToSubRange(
Range extends Comparable>> range) {
SubRange keyRange =
new SubRange(
new DdllKey(range.from, range.fromInclusive
? PeerId.MINUS_INFINITY : PeerId.PLUS_INFINITY),
true,
new DdllKey(range.to, range.toInclusive
? PeerId.PLUS_INFINITY : PeerId.MINUS_INFINITY),
false);
return keyRange;
}
private RQReturn rqStartKeyRange(Collection ranges, Object query,
TransOptions opts, int retransPeriod,
NavigableMap allLinks, RQAlgorithm rqAlgo) {
QueryId qid = new QueryId(peerId, rand.nextLong());
if (opts == null) {
opts = new TransOptions(); // use default
}
RQMessage msg = rqAlgo.newRQMessage4Root(msgframe, ranges, qid, query,
opts);
rqDisseminate(msg, allLinks);
return msg.rqRet;
}
public void rqDisseminate(RQMessage msg) {
rqDisseminate(msg, null);
}
// this method is overridden by ChordSharp
public void rqDisseminate(RQMessage msg,
NavigableMap allLinks) {
rtLockW();
try {
rqDisseminate0(msg, allLinks);
} finally {
rtUnlockW();
}
}
/*
* レンジクエリの考え方:
*
* 10----------------->60------------->100
* 10--------->40----->60----->80----->100
* 10->20->30->40->50->60->70->80->90->100
*
* Node 10 から [30, 70] で range query した場合:
*
* 10の動作:
* 40に RQMessage [30, 60),
* 60に RQMessage [60, 70]を送信
*
* 40 ([30, 60) を受信している) の動作:
* 30に RQMessage [30, 40),
* 50に RQMessage [50, 60)を送信
*
* 左端の範囲は注意が必要である.
* 30が受信した [30, 40) は,実際は ((30, -infinity), (40, +infinity))]
* と解釈する.((-30, -infinity), (30, ???)) の範囲が残るため,
* (-30, -infinity)の左ノード(ここでは20)にクエリを転送する.
* 20と,20のLevel 0の右リンク(30)によって
* ((-30, -infinity), (30, ???)) の範囲はカバーされるため,この範囲は処理済み
* として消去する.
*/
private void rqDisseminate0(RQMessage msg,
NavigableMap allLinks) {
final String h = "rqDiss(id=" + msg.msgId + ")";
logger.debug("{}: msg = {}", h, msg);
if (allLinks == null) {
//rtLockR();
allLinks = getAvailableLinks();
//rtUnlockR();
}
if (allLinks.isEmpty()) {
// 挿入が完了していないノードにクエリが転送された.
// (強制終了してすぐに再挿入した場合も,そのことを知らない
// ノードからクエリが転送される可能性がある).
// ここでは単にクエリを無視することにする.
logger.warn("routing table is empty!: {}", this);
return;
}
/*
* split ranges into subranges and assign a delegate node for them.
* also aggregate each subranges by destination peerIds.
*/
StrictMap> map =
new StrictMap>(
new HashMap>());
List> rvals =
new ArrayList>();
for (SubRange subRange : msg.subRanges) {
List subsubRanges =
rqSplit(msg.query, subRange, allLinks, msg.failedLinks,
rvals, msg.getRangeQueryAlgorithm());
if (subsubRanges == null) {
continue;
}
logger.debug("subsubRanges = {}", subsubRanges);
for (SubRange kr : subsubRanges) {
PeerId pid =
(kr.getLink() == FIXLEFT ? FIXPEERID
: kr.getLink().key.getPeerId());
List list = map.get(pid);
if (list == null) {
list = new ArrayList();
map.put(pid, list);
}
list.add(kr);
}
}
logger.debug("{}: aggregated: {}", h, map);
logger.debug("{}: msg = {}", h, msg.toString());
/*
* prepare RQReturn for catching results from children
*/
if (msg.rqRet == null) {
msg.rqRet = new RQReturn(this, msg, msg.opts, msg.isRoot);
}
RQReturn rqRet = msg.rqRet;
if (TransOptions.inspect(msg.opts)) {
rqRet.updateHops(msg.hops);
}
Collection paths = new HashSet();
/*
* send the aggregated requests to children.
* also gather failed ranges that should be retransmit.
*
* 担当ノードが FIXLEFT (FIXPEERID) の範囲を failedRanges に集める.
*/
List> failedRanges = new ArrayList<>();
//synchronized (rqRet) {
for (Map.Entry> ent : map.entrySet()) {
Id p = ent.getKey();
if (p.equals(FIXPEERID)) {
failedRanges.addAll(ent.getValue());
} else if (!p.equals(peerId)) {
List subRanges = ent.getValue();
logger.debug("{}: forward {}, {}", h, p, subRanges);
RQMessage m = msg.newChildInstance(subRanges);
Link l = subRanges.get(0).getLink();
/*rqRet.childMsgs.put(l, m);
m.send(l);*/
rqRet.sendChildMessage(l, m);
if (TransOptions.inspect(msg.opts)) {
// generate MessagePath
DdllKey from = keyHash.firstEntry().getValue().getKey();
// 送信側で MessagePath を生成しているのでホップ数は +1 している
MessagePath mp =
new MessagePath(msg.hops + 1, from, l.key,
subRanges);
logger.debug("mp={}", mp);
paths.add(mp);
}
}
}
//}
// N----------C
// N------B---C
// N-->A--B---C
// RQ [r1)[r2)
// [--r3--)
// A, Bが故障している場合,failedRanges には r1 と r2 が入る.
// これをマージする(r3).この処理は必須ではない.
failedRanges = RangeUtils.concatAdjacentRanges(failedRanges);
logger.debug(h + ": merged failedRanges = " + failedRanges);
/*
* fix the local routing table
* XXX: think!
*/
if (!msg.failedLinks.isEmpty()) {
/*rtLockR();
try {
for (RingVNode sgnode : keyHash.values()) {
for (Endpoint link : msg.failedLinks) {
sgnode.fixLeftLinks(link, msg.failedLinks, msg,
failedRanges);
}
}
} finally {
rtUnlockR();
}*/
}
/*
* execute range query for locally resolvable range.
*/
msg.getRangeQueryAlgorithm()
.rqExecuteLocal(msg, map.get(peerId), rvals);
logger.debug("rqDisseminate: rvals = {}", rvals);
/*
* store the results of execQuery into rqRet.
* note that addRemoteValue() may send a reply internally.
*/
//synchronized (rqRet) {
if (TransOptions.inspect(msg.opts)) {
rqRet.addMessagePaths(paths);
}
rqRet.addRemoteValues(rvals);
//}
//if ((TransOptions.responseType(msg.opts) == ResponseType.DIRECT) && !msg.isRoot) {
ResponseType rtype = TransOptions.responseType(msg.opts);
if (((rtype == ResponseType.DIRECT) && !msg.isRoot) ||
(rtype == ResponseType.NO_RESPONSE)) {
rqRet.flush();
rqRet.dispose();
}
logger.debug("rqDisseminate finished");
}
/**
* Split a range into subranges, by the keys in allLinks. Also assign an
* appropriate remote delegate node for each subrange.
*
* @param query the query object.
* @param range0 the range to be split
* @param allLinks all links to split ranges.
* @param failedLinks the failed links.
* @param rvals return values for each range.
* @param rqAlgo the algorithm for the range query.
* @return the list of subranges.
*/
protected List rqSplit(Object query, final SubRange range0,
final NavigableMap allLinks,
Collection failedLinks,
List> rvals, RQAlgorithm rqAlgo) {
String h = "rqSplit";
if (failedLinks == null) {
failedLinks = Collections.emptySet();
}
/*
* failedLinkを作成した時点と、ここに到達した時は、状況が異なるので
* 自身がfailedLinkに含まれることはありうる
*/
for (Iterator fi = failedLinks.iterator(); fi.hasNext();) {
Endpoint f = fi.next();
// 自身ならば削除する
if (getEndpoint().equals(f)) {
fi.remove();
}
/*if (f.key.getUniqId().equals(new UniqId(peerId))) {
failedLinks.remove(f);
}*/
}
// Range [---------------] に対し,
// N-----A (L0)
// のように,L0において自ノード(N)と右ノード(A)の間にrangeの左端が入る場合,rangeを
// 以下のように縮小する.
// Range ...... [------------]
// Shrunk [--)
// 縮小された区間(Shrunk)には,値が存在しない.これを示すため,ダミーの値nullを
// rvalsに追加する.
SubRange range = range0;
rtLockR();
try {
for (RingVNode node : keyHash.values()) {
if (node.getMode() != VNodeMode.INSERTED) {
continue;
}
/*Tile t = node.getTile(0);
if (t == null || t.mode != LvState.INSERTED) {
continue;
}*/
Link right = node.getSuccessor();
logger.debug("{}, node = {}, range = {}, right = {}", h, node,
range, right);
assert right != null;
if (!range.contains(node.getKey())) {
// add the information about the shrunk area to the rvals.
// this information is essential for the requesting peer to
// determine whether all results have been received.
Range removed =
RangeUtils.removedRange(range, node.getKey(),
right.key);
if (removed == null) {
continue;
}
// add a dummy value for the shrunk range
RemoteValue