All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.janusgraph.diskstorage.SimpleScanJob Maven / Gradle / Ivy

There is a newer version: 1.2.0-20241120-125614.80ef1d9
Show newest version
// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.janusgraph.diskstorage;


import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import org.janusgraph.diskstorage.configuration.BasicConfiguration;
import org.janusgraph.diskstorage.configuration.ConfigNamespace;
import org.janusgraph.diskstorage.configuration.ConfigOption;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.configuration.ModifiableConfiguration;
import org.janusgraph.diskstorage.configuration.backend.CommonsConfiguration;
import org.janusgraph.diskstorage.keycolumnvalue.SliceQuery;
import org.janusgraph.diskstorage.keycolumnvalue.scan.ScanJob;
import org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics;
import org.janusgraph.diskstorage.util.BufferUtil;
import org.janusgraph.diskstorage.util.Hex;
import org.janusgraph.diskstorage.util.StaticArrayBuffer;
import org.janusgraph.util.system.ConfigurationUtil;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.function.Predicate;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

public class SimpleScanJob implements ScanJob {

    public static final String TOTAL_COUNT = "total";
    public static final String KEY_COUNT = "keys";
    public static final String SETUP_COUNT = "setup";
    public static final String TEARDOWN_COUNT = "teardown";

    public static final ConfigNamespace ROOT_NS = new ConfigNamespace(null, "simplescan", "testing job");

    public static final ConfigOption HEX_QUERIES =
            new ConfigOption<>(ROOT_NS, "queries", "comma-delimited, hex-encoded queries",
                    ConfigOption.Type.LOCAL, String.class);

    public static final ConfigOption KEY_FILTER_ID_MODULUS =
            new ConfigOption<>(ROOT_NS, "id-modulus",
                    "ID extracted from key must be divisible by this to pass the key filter",
                    ConfigOption.Type.LOCAL, Long.class);

    public static final ConfigOption KEY_FILTER_ID_MODULAR_VALUE =
            new ConfigOption<>(ROOT_NS, "id-modular-value",
                    "ID in modular arithmetic",
                    ConfigOption.Type.LOCAL, Long.class);

    private List qs;
    private Predicate keyFilter;

    public SimpleScanJob() {
        qs = null;
        keyFilter = k -> true;
    }

    public SimpleScanJob(List qs, Predicate keyFilter) {
        this.qs = qs;
        this.keyFilter = keyFilter;
    }

    @Override
    public SimpleScanJob clone() {
        return new SimpleScanJob(qs,keyFilter);
    }

    public SimpleScanJob(SliceQuery q) {
        this(ImmutableList.of(q),k -> true);
    }

    @Override
    public void workerIterationStart(Configuration config, Configuration graphConfig, ScanMetrics metrics) {
        assertNotNull(config);
        metrics.incrementCustom(SETUP_COUNT);

        if (config.has(HEX_QUERIES)) {
            String[] queryStrings = config.get(HEX_QUERIES).split(":");
            List queries = new LinkedList<>();
            for (String qString : queryStrings) {
                String[] queryTokens = qString.split("/");
                StaticBuffer start = StaticArrayBuffer.of(Hex.hexToBytes(queryTokens[0]));
                StaticBuffer end = StaticArrayBuffer.of(Hex.hexToBytes(queryTokens[1]));
                SliceQuery query = new SliceQuery(start, end);
                int limit = Integer.parseInt(queryTokens[2]);
                if (0 <= limit) {
                    query.setLimit(limit);
                }
                queries.add(query);
            }
            qs = queries;
        }

        if (config.has(KEY_FILTER_ID_MODULUS)) {
            final long mod = config.get(KEY_FILTER_ID_MODULUS);
            final long modVal;
            if (config.has(KEY_FILTER_ID_MODULAR_VALUE)) {
                modVal = config.get(KEY_FILTER_ID_MODULAR_VALUE);
            } else {
                modVal = 0;
            }
            keyFilter = k -> KeyValueStoreUtil.getID(k) % mod == modVal;
        }
    }

    public void workerIterationEnd(ScanMetrics metrics) {
        metrics.incrementCustom(TEARDOWN_COUNT);
    }


    @Override
    public void process(StaticBuffer key, Map entries, ScanMetrics metrics) {
        assertNotNull(key);
        assertTrue(keyFilter.test(key));
        metrics.incrementCustom(KEY_COUNT);
        assertNotNull(entries);
        assertTrue(qs.size() >= entries.size());
        for (SliceQuery q : qs) {
            if (!entries.containsKey(q)) {
                continue;
            }
            EntryList result = entries.get(q);
            metrics.incrementCustom(TOTAL_COUNT,result.size());
        }
    }

    @Override
    public List getQueries() {
        return qs;
    }

    @Override
    public Predicate getKeyFilter() {
        return keyFilter;
    }

    private static String encodeQueries(List queries) {

        List queryStrings = new ArrayList<>(queries.size());

        for (SliceQuery query : queries) {
            String start = Hex.bytesToHex(query.getSliceStart().as(StaticBuffer.ARRAY_FACTORY));
            String end = Hex.bytesToHex(query.getSliceEnd().as(StaticBuffer.ARRAY_FACTORY));

            final int limit;
            if (query.hasLimit()) {
                limit = query.getLimit();
            } else {
                limit = -1;
            }

            queryStrings.add(String.format("%s/%s/%d", start, end, limit));
        }

        return Joiner.on(":").join(queryStrings);
    }

    public static void runBasicTests(int keys, int columns, SimpleScanJobRunner runner)
            throws InterruptedException, ExecutionException, BackendException, IOException {

        Configuration conf1 = getJobConf(
                ImmutableList.of(new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128))));
        ScanMetrics result1 = runner.run(new SimpleScanJob(), conf1, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys,result1.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*columns/4*3,result1.getCustom(SimpleScanJob.TOTAL_COUNT));
        /* These assertions are not valid on Hadoop.  The Hadoop implementation uses
         * Hadoop Counters to store ScanMetrics.  These Counters are shared
         * clusterwide.  Hence there will be as many setups and teardowns as there
         * are input splits -- generally more than one.  So these don't apply:
         *
         *  assertEquals(1, result1.getCustom(SimpleScanJob.SETUP_COUNT));
         *  assertEquals(1, result1.getCustom(SimpleScanJob.TEARDOWN_COUNT));
         *
         * However, even on Hadoop, we can expect both of the following to hold:
         * 1. The number of setups must equal the number of teardowns
         * 2. The number of setups (teardowns) must be positive
         */
        assertEquals(result1.getCustom(SimpleScanJob.SETUP_COUNT), result1.getCustom(SimpleScanJob.TEARDOWN_COUNT),
            "Number of ScanJob setup calls must equal number of ScanJob teardown calls");
        assertTrue(0 < result1.getCustom(SimpleScanJob.SETUP_COUNT),
            "Number of ScanJob setup/teardown calls must be positive");

        Configuration conf2 = getJobConf(ImmutableList.of(new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(5)));
        ScanMetrics result2 = runner.run(new SimpleScanJob(), conf2, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys,result2.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*5,result2.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf3 = getJobConf(ImmutableList.of(new SliceQuery(KeyValueStoreUtil.getBuffer(0), KeyValueStoreUtil.getBuffer(5))));
        ScanMetrics result3 = runner.run(new SimpleScanJob(), conf3, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys,result3.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*5,result3.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf4 = getJobConf(ImmutableList.of(
                new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(1),
                new SliceQuery(KeyValueStoreUtil.getBuffer(0), KeyValueStoreUtil.getBuffer(5))));
        ScanMetrics result4 = runner.run(new SimpleScanJob(), conf4, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys,result4.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*6,result4.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf5 = getJobConf(ImmutableList.of(
                new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(1),
                new SliceQuery(KeyValueStoreUtil.getBuffer(2), KeyValueStoreUtil.getBuffer(4)),
                new SliceQuery(KeyValueStoreUtil.getBuffer(6), KeyValueStoreUtil.getBuffer(8)),
                new SliceQuery(KeyValueStoreUtil.getBuffer(10), KeyValueStoreUtil.getBuffer(20)).setLimit(4)));

        ScanMetrics result5 = runner.run(new SimpleScanJob(), conf5, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys, result5.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*9,result5.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf6 = getJobConf(
                ImmutableList.of(new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(5)), 2L);
        ScanMetrics result6 = runner.run(new SimpleScanJob(), conf6, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys/2,result6.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys/2*5,result6.getCustom(SimpleScanJob.TOTAL_COUNT));


        Configuration conf7 = getJobConf(ImmutableList.of(
                new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(1),
                new SliceQuery(KeyValueStoreUtil.getBuffer(2), KeyValueStoreUtil.getBuffer(4)),
                new SliceQuery(KeyValueStoreUtil.getBuffer(31), KeyValueStoreUtil.getBuffer(35)),
                new SliceQuery(KeyValueStoreUtil.getBuffer(36), KeyValueStoreUtil.getBuffer(40)).setLimit(1)));
        ScanMetrics result7 = runner.run(new SimpleScanJob(), conf7, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys,result7.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys*3+keys/2*5,result7.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf8 = getJobConf(ImmutableList.of(
                new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(1),
                new SliceQuery(KeyValueStoreUtil.getBuffer(31), KeyValueStoreUtil.getBuffer(35))), 2L, 1L);
        ScanMetrics result8 = runner.run(new SimpleScanJob(), conf8, SimpleScanJob.class.getName() + "#ROOT_NS");
//                        k -> KeyValueStoreUtil.getID(k) % 2 == 1));
        assertEquals(keys/2,result8.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys/2*5,result8.getCustom(SimpleScanJob.TOTAL_COUNT));

        Configuration conf9 = getJobConf(ImmutableList.of(
                    new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(128)).setLimit(1),
                    new SliceQuery(KeyValueStoreUtil.getBuffer(31), KeyValueStoreUtil.getBuffer(35))), 2L);
//                        k -> KeyValueStoreUtil.getID(k) % 2 == 0));
        ScanMetrics result9 = runner.run(new SimpleScanJob(), conf9, SimpleScanJob.class.getName() + "#ROOT_NS");
        assertEquals(keys/2,result9.getCustom(SimpleScanJob.KEY_COUNT));
        assertEquals(keys/2,result9.getCustom(SimpleScanJob.TOTAL_COUNT));

        try {
            Configuration conf10 = getJobConf(ImmutableList.of(
                    new SliceQuery(StaticArrayBuffer.of(new byte[]{(byte) 2}), BufferUtil.oneBuffer(1)),
                    new SliceQuery(BufferUtil.zeroBuffer(1), BufferUtil.oneBuffer(1))));
            runner.run(new SimpleScanJob(), conf10, SimpleScanJob.class.getName() + "#ROOT_NS");
            fail();
        } catch (Exception e) {
            //assertTrue(e instanceof ExecutionException && e.getCause() instanceof IllegalArgumentException);
        }
    }

    public static Configuration getJobConf(List queries) {
        return getJobConf(queries, null, null);
    }

    public static Configuration getJobConf(List queries, Long modulus) {
        return getJobConf(queries, modulus, null);
    }

    public static Configuration getJobConf(List queries, Long modulus, Long modVal) {
        ModifiableConfiguration conf2 =
                new ModifiableConfiguration(SimpleScanJob.ROOT_NS,
                        new CommonsConfiguration(ConfigurationUtil.createBaseConfiguration()), BasicConfiguration.Restriction.NONE);
        if (null != queries)
            conf2.set(HEX_QUERIES, encodeQueries(queries));
        if (null != modulus)
            conf2.set(KEY_FILTER_ID_MODULUS, modulus);
        if (null != modVal)
            conf2.set(KEY_FILTER_ID_MODULAR_VALUE, modVal);
        return conf2;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy