
org.apache.jena.tdb2.loader.DataLoader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.tdb2.loader;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import org.apache.jena.dboe.base.block.FileMode;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.system.StreamRDF;
/**
* Bulk loaders improve the loading of data into datasets. Each bulk loader has
* consequences in achieving its improvements, including in some cases locking out all
* other access to the dataset while the loading is underway.
*
* Finding the best loader to use takes experimentation.
* Loading speed depends on hardware, particularly for the parallel bulk loader.
*
* Giving a loader more heap space does not improve performance, and will likely decrease it.
* All loaders use OS file system caching, not in JVM caches (except when run in {@link FileMode#direct direct file mode}s
* for special circumstances).
*
*
basic
* The basic loader is full transactional and good for incrementally adding data up to a few million triples/quads
* to large datasets and does not max out the hardware so it is suitable for runtime operation at larger scales.
*
* sequential
* A fully transactional loader that loads the primary indexes then does multiple passes to load the secondary indexes.
* This maximises RAM file system caching effects.
* It can be useful when hardware is restricted and I/O is slow (disk, not non volatile storage liek SSDs).
*
* phased
* The phased loader use some multiple threads to process data and to index the {@code DatasetGraph}.
* It proceeds by loading data into the primary indexes, then, separately, builds the other indexes.
* Loading is not fully transaction-safe in the presence of persistent
* storage problems or a JVM/machine crash when finishing writing.
* Otherwise it is transactional.
*
* parallel
* The parallel loader use multiple threads to process data and to index the {@code DatasetGraph}.
* Loading is not fully transaction-safe in the presence of persistent
* storage problems or a JVM/machine crash when finishing writing.
* Otherwise it is transactional.
* Because it uses many threads to write to persistent storage,
* it can interfere with performance of other applications on the machine it is run on.
*
* {@code DataLoader} API
*
* To use a {@code DataLoader}:
*
*
* loader.startBulk();
* try {
* send data ...
* use stream()
* or load(files)
* or a mixture.
* loader.finishBulk();
* } catch (RuntimeException ex) {
* loader.finishException(ex);
* .. optionally rethrow exception ..
* }
*
*
* @see LoaderFactory LoaderFactory for creating DataLoaders.
* @see Loader Loader for convenience operations to invoke the default loader.
*/
public interface DataLoader {
/** Start bulk loading. */
public void startBulk();
/**
* Finish bulk loading. This operation waits until the loading process has completed
* and all changes have been committed.
*/
public void finishBulk();
/**
* Alternative finish in case something went wrong.
* This operation attempts to clear up and abort the changes.
* If there was a file system problem with the {@code DatasetGraph} being
* loaded, then recovery may not have been possible.
* The ability of loaders to cleanup is implementation specific.
*/
public void finishException(Exception ex);
/**
* Load files with syntax given by the file name extension,
* or URLs, with content negotiation.
* @param filenames
*/
public void load(List filenames);
/**
* Load from an {@link InputStream} with the given syntax.
* @param label Label for progress monitor
* @param input
* @param syntax
*/
public void loadFromInputStream(String label, InputStream input, Lang syntax);
/**
* Load files with syntax given by the file name extension,
* or URLs, with content negotiation.
* @param filenames
*/
default public void load(String ... filenames) { load(Arrays.asList(filenames)); }
/** Send data to the loader by {@link StreamRDF} */
public StreamRDF stream();
/** Return count of triples sent to the loader and added. This is not a count of unique triples. */
public long countTriples();
/** Return count of quads sent to the loader and added. This is not a count of unique quads. */
public long countQuads();
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy