All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.graphaware.importer.BatchImporter Maven / Gradle / Ivy

Go to download

Importer high-performance multi-threaded initial data load into Neo4j from CSV/SQL with custom logic

The newest version!
/*
 * Copyright (c) 2013-2016 GraphAware
 *
 * This file is part of the GraphAware Framework.
 *
 * GraphAware Framework is free software: you can redistribute it and/or modify it under the terms
 * of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License,
 * or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details. You should have received a copy of the
 * GNU General Public License along with this program.  If not, see .
 */

package com.graphaware.importer;

import com.graphaware.importer.cache.Caches;
import com.graphaware.importer.cache.MapDBCaches;
import com.graphaware.importer.cli.CommandLineParser;
import com.graphaware.importer.config.ImportConfig;
import com.graphaware.importer.context.ImportContext;
import com.graphaware.importer.context.SimpleImportContext;
import com.graphaware.importer.data.Data;
import com.graphaware.importer.data.location.DataLocator;
import com.graphaware.importer.data.location.FileLocator;
import com.graphaware.importer.importer.Importer;
import com.graphaware.importer.plan.DefaultExecutionPlan;
import com.graphaware.importer.plan.ExecutionPlan;
import com.graphaware.importer.stats.LoggingStatisticsCollector;
import com.graphaware.importer.stats.StatisticsCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.Map;
import java.util.Set;

/**
 * Abstract base class for batch importers. In order to implement one, extend this class, implement at least the mandatory
 * methods, and create a main method that will call onto {@link #run(String[])}, like this:
 * 

*

* * public static void main(String[] args) { * new MyBatchImporter().run(args); * } * * * @param type of the import config used by this importer. */ public abstract class BatchImporter { private static final Logger LOG = LoggerFactory.getLogger(BatchImporter.class); private final Object monitor = new Object(); private Set importers; /** * Run this importer. * * @param args command-line arguments. */ public void run(String[] args) { try { LOG.info("Creating import config..."); T config = commandLineParser().parseArgs(args); if (config == null) { return; } LOG.info("Creating import context..."); ImportContext context = createContext(config); LOG.info("Fully bootstrapping context..."); context.fullBootstrap(); LOG.info("Checking context..."); context.check(); LOG.info("Creating statistics and starting timing..."); StatisticsCollector stats = createStats(context); stats.startTiming(); LOG.info("Creating importers..."); Set importers = importers(); LOG.info("Creating execution plan..."); ExecutionPlan plan = new DefaultExecutionPlan(importers, context); LOG.info("Performing import..."); performImport(context, plan); LOG.info("Creating indices..."); createIndices(plan); LOG.info("Shutting down context..."); context.shutdown(); LOG.info("Context shut down."); stats.printTiming(); LOG.info("IMPORT SUCCESSFUL"); } catch (Throwable throwable) { LOG.error("An exception occurred: ", throwable); LOG.info("IMPORT FAILED"); } } /** * Create a command-line parser. * * @return parser. */ protected abstract CommandLineParser commandLineParser(); /** * Create an import context. * * @param config configurations built from command-line arguments. * @return context. */ protected ImportContext createContext(T config) { return new SimpleImportContext(config, createCaches(), createInputDataLocator(config), createOutputDataLocator(config)); } /** * Create caches used throughout the import. * * @return caches. */ protected Caches createCaches() { return new MapDBCaches(); } /** * Create a locator for import input data. * * @param config import config. * @return data locator. */ protected abstract DataLocator createInputDataLocator(T config); /** * Create a locator for import output data. * * @param config import config. * @return data locator. */ protected DataLocator createOutputDataLocator(T config) { return new FileLocator(config.getOutputDir(), output()); } /** * Create a mapping between output data and their logical name. Returns an empty map by default. * * @return output mapping. */ protected Map output() { return Collections.emptyMap(); } /** * Create a statistics collector for the entire import. * * @param context context. * @return stats collector. */ protected StatisticsCollector createStats(ImportContext context) { return new LoggingStatisticsCollector("IMPORT"); } /** * Get a set of importers that will perform the import. * * @return importers. */ protected final Set importers() { if (importers == null) { importers = createImporters(); } return importers; } /** * Create a set of importers that will perform the import. * * @return importers. */ protected abstract Set createImporters(); /** * Perform a multi-threaded import. * * @param context import context. * @param executionPlan import execution plan. */ private void performImport(ImportContext context, final ExecutionPlan executionPlan) { for (Importer importer : executionPlan.getOrderedImporters()) { LOG.info("Preparing " + importer.name() + "..."); importer.prepare(context); } for (final Importer importer : executionPlan.getOrderedImporters()) { new Thread(new Runnable() { @Override public void run() { while (!executionPlan.canRun(importer)) { synchronized (monitor) { try { monitor.wait(); } catch (InterruptedException e) { //ok } } } try { importer.performImport(); } finally { synchronized (monitor) { executionPlan.clearCaches(); monitor.notifyAll(); } } } }, "IMPORTER - " + importer.name()).start(); } while (!executionPlan.allFinished()) { synchronized (monitor) { try { monitor.wait(10000); } catch (InterruptedException e) { //ok } } } LOG.info("Destroying caches..."); context.caches().destroy(); LOG.info("Caches destroyed."); } private void createIndices(ExecutionPlan executionPlan) { for (Importer batchImporter : executionPlan.getOrderedImporters()) { batchImporter.createIndices(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy