All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.gridgain.grid.ggfs.GridGgfsGroupDataBlocksKeyMapper Maven / Gradle / Ivy

/* 
 Copyright (C) GridGain Systems. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */

/*  _________        _____ __________________        _____
 *  __  ____/___________(_)______  /__  ____/______ ____(_)_______
 *  _  / __  __  ___/__  / _  __  / _  / __  _  __ `/__  / __  __ \
 *  / /_/ /  _  /    _  /  / /_/ /  / /_/ /  / /_/ / _  /  _  / / /
 *  \____/   /_/     /_/   \_,__/   \____/   \__,_/  /_/   /_/ /_/
 */

package org.gridgain.grid.ggfs;

import org.gridgain.grid.kernal.processors.cache.*;
import org.gridgain.grid.kernal.processors.ggfs.*;
import org.gridgain.grid.util.typedef.internal.*;

/**
 * {@code GGFS} class providing ability to group file's data blocks together on one node.
 * All blocks within the same group are guaranteed to be cached together on the same node.
 * Group size parameter controls how many sequential blocks will be cached together on the same node.
 * 

* For example, if block size is {@code 64kb} and group size is {@code 256}, then each group will contain * {@code 64kb * 256 = 16Mb}. Larger group sizes would reduce number of splits required to run map-reduce * tasks, but will increase inequality of data size being stored on different nodes. *

* Note that {@link #groupSize()} parameter must correlate to Hadoop split size parameter defined * in Hadoop via {@code mapred.max.split.size} property. Ideally you want all blocks accessed * within one split to be mapped to {@code 1} group, so they can be located on the same grid node. * For example, default Hadoop split size is {@code 64mb} and default {@code GGFS} block size * is {@code 64kb}. This means that to make sure that each split goes only through blocks on * the same node (without hopping between nodes over network), we have to make the {@link #groupSize()} * value be equal to {@code 64mb / 64kb = 1024}. *

* It is required for {@code GGFS} data cache to be configured with this mapper. Here is an * example of how it can be specified in XML configuration: *

 * <bean id="cacheCfgBase" class="org.gridgain.grid.cache.GridCacheConfiguration" abstract="true">
 *     ...
 *     <property name="affinityMapper">
 *         <bean class="org.gridgain.grid.ggfs.GridGgfsGroupDataBlocksKeyMapper">
 *             <!-- How many sequential blocks will be stored on the same node. -->
 *             <constructor-arg value="512"/>
 *         </bean>
 *     </property>
 *     ...
 * </bean>
 * 
*/ public class GridGgfsGroupDataBlocksKeyMapper extends GridCacheDefaultAffinityKeyMapper { /** */ private static final long serialVersionUID = 0L; /** Size of the group. */ private final int grpSize; /*** * Constructs affinity mapper to group several data blocks with the same key. * * @param grpSize Size of the group in blocks. */ public GridGgfsGroupDataBlocksKeyMapper(int grpSize) { A.ensure(grpSize >= 1, "grpSize >= 1"); this.grpSize = grpSize; } /** {@inheritDoc} */ @Override public Object affinityKey(Object key) { if (key != null && GridGgfsBlockKey.class.equals(key.getClass())) { GridGgfsBlockKey blockKey = (GridGgfsBlockKey)key; if (blockKey.affinityKey() != null) return blockKey.affinityKey(); long grpId = blockKey.getBlockId() / grpSize; return blockKey.getFileId().hashCode() + (int)(grpId ^ (grpId >>> 32)); } return super.affinityKey(key); } /** * @return Size of the group. */ public int groupSize() { return grpSize; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(GridGgfsGroupDataBlocksKeyMapper.class, this); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy