com.inin.analytics.elasticsearch.index.routing.ElasticsearchRoutingStrategyV1 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-lambda Show documentation
Show all versions of elasticsearch-lambda Show documentation
Framework For Lambda Architecture on Elasticsearch
package com.inin.analytics.elasticsearch.index.routing;
import java.util.HashMap;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.inin.analytics.elasticsearch.index.rotation.ElasticSearchIndexMetadata;
import com.inin.analytics.elasticsearch.util.MurmurHash;
/**
* This routing strategy for elasticsearch. Read up on what routing does here
* http://www.elasticsearch.org/blog/customizing-your-document-routing/
*
* Perhaps you have 10 shards per index and you don't wish to query every shard
* every time you do a search against an org. A simple sharding strategy would
* put all the data for 1 org on 1 shard using consistant hashing on orgId. However that
* has the potential to hotspot some shards if an org with a lot of data comes
* through.
*
* This attempts to alleviate that by making the # subset of shards configurable. EG
* numShards = 10, numShardsPerOrg = 3, all of an org's data will be split to one of
* 3 shards. Which one of the 3 is determined by hashing the conversationId.
*
* Note: DO NOT CHANGE THIS CLASS. It's immutable once it's been used to generate ES indexes
* so changing it affects data routing and will make data appear unavailable b/c its looking
* in the wrong shard. The correct thing to do is to make a newer version of this class,
* say ElasticsearchRoutingStrategyV2 and see to it that the hadoop jobs to rebuild
* the ES indexes not only use it, but update zookeeper with which implementation
* indexes were built with. That way you can evolve the routing strategy without breaking
* anything.
*
* @author drew
*
*/
public class ElasticsearchRoutingStrategyV1 implements ElasticsearchRoutingStrategy, java.io.Serializable {
private static final long serialVersionUID = 1L;
private int numShardsPerOrg;
private int numShards;
private Map shardToRout = new HashMap<>();
/**
* Adapted from DjbHashFunction & PlainOperationRouting in Elasticsearch. This is the default hashing
* Algorithm for doc routing. We need this to reverse engineer routing strings that rout to
* the shard we want.
*
* @param value
* @return
*/
public int hash(String value) {
long hash = 5381;
for (int i = 0; i < value.length(); i++) {
hash = ((hash << 5) + hash) + value.charAt(i);
}
return Math.abs((int) hash % numShards);
}
public void init() {
Integer x = 0;
while(shardToRout.size() < numShards) {
Integer hash = hash(x.toString());
if(shardToRout.get(x) == null) {
shardToRout.put(x, hash);
}
x++;
}
}
public ElasticsearchRoutingStrategyV1() {
}
@Override
public void configure(ElasticSearchIndexMetadata rotatedIndexMetadata) {
Preconditions.checkNotNull(rotatedIndexMetadata.getNumShardsPerOrg(), "Num shards per org must not be null with " + this.getClass().getSimpleName());
Preconditions.checkNotNull(rotatedIndexMetadata.getNumShards(), "Num shards must not be null with " + this.getClass().getSimpleName());
this.numShardsPerOrg = rotatedIndexMetadata.getNumShardsPerOrg();
this.numShards = rotatedIndexMetadata.getNumShards();
init();
}
@Override
public int getNumShardsPerOrg() {
return numShardsPerOrg;
}
@Override
public int getNumShards() {
return numShards;
}
public Map getShardToRout() {
return shardToRout;
}
/**
* For an orgId & convId, get the shard routing for a document.
*
* Note: ES re-hashes routing values so shard 1 wont necessarily mean
* your data ends up in shard 1. However, if you realize that
* then you're in a bad place.
*
*
* @param orgId
* @param convId
* @param numShards
* @param numShardsPerOrg
* @return
*/
@Override
public String getRoutingHash(String orgId, String convId) {
Preconditions.checkArgument(numShards >= numShardsPerOrg, "Misconfigured, numShards must be >= numShardsPerOrg");
int orgIdHash = getOrgIdHash(orgId, numShards);
int convIdHash = Math.abs(MurmurHash.getInstance().hash(convId.getBytes(), 0)) % numShardsPerOrg;
int shard = orgIdHash + convIdHash;
while(shard >= numShards) {
shard -= numShards;
}
return shardToRout.get(shard).toString();
}
/**
* When searching data for an Org, you may desire to only search the shards
* which hold data for that Org. This gives you a list of possible shard routings.
*
* @param orgId
* @param numShards
* @param numShardsPerOrg
* @return
*/
@Override
public String[] getPossibleRoutingHashes(String orgId) {
int orgIdHash = getOrgIdHash(orgId, numShards);
String[] possibleShards = new String[numShardsPerOrg];
for(int x = 0; x < numShardsPerOrg; x ++) {
int shard = orgIdHash + x;
while(shard >= numShards) {
shard -= numShards;
}
possibleShards[x] = shardToRout.get(shard).toString();
}
return possibleShards;
}
private int getOrgIdHash(String orgId, int numShards) {
return Math.abs(MurmurHash.getInstance().hash(orgId.getBytes(), 0)) % numShards;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + numShards;
result = prime * result + numShardsPerOrg;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ElasticsearchRoutingStrategyV1 other = (ElasticsearchRoutingStrategyV1) obj;
if (numShards != other.numShards)
return false;
if (numShardsPerOrg != other.numShardsPerOrg)
return false;
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy