All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ac.simons.neo4j.migrations.formats.csv.AbstractLoadCSVMigration Maven / Gradle / Ivy

/*
 * Copyright 2020-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ac.simons.neo4j.migrations.formats.csv;

import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.CRC32;

import org.neo4j.driver.Query;
import org.neo4j.driver.Session;

import ac.simons.neo4j.migrations.core.JavaBasedMigration;
import ac.simons.neo4j.migrations.core.MigrationContext;
import ac.simons.neo4j.migrations.core.Migrations;

/**
 * This base class can be inherited inside applications that want to grab some data from a URI for example to trigger
 * a {@code LOAD CSV} statement inside Neo4j. Migrations inheriting from it will always be repeatable (when adhering to
 * the repeatable versioning scheme, such as {@code R000__LoadBaseData.java}) but also compute a CRC32 checksum from the
 * source URI, so that the migration is only repeated when the source actually has changed.
 *
 * @author Michael J. Simons
 * @since 2.0.1
 */
public abstract class AbstractLoadCSVMigration implements JavaBasedMigration {

	static final Logger LOGGER = Logger.getLogger(AbstractLoadCSVMigration.class.getName());

	private final HttpClient httpClient = HttpClient.newBuilder()
		.followRedirects(HttpClient.Redirect.NORMAL)
		.build();

	private final URI csvSource;

	private final boolean repeatable;

	@SuppressWarnings({"OptionalUsedAsFieldOrParameterType", "squid:S3077"})
	private volatile Optional checksum;

	/**
	 * You need to call this constructor, but your implementation must have a default, no-arg constructor to be loadable
	 * like any other Java based migration.
	 * @param csvSource The source to load data from
	 * @param repeatable Whether this is repeatable or not
	 */
	protected AbstractLoadCSVMigration(URI csvSource, boolean repeatable) {
		this.csvSource = csvSource;
		this.repeatable = repeatable;
	}

	@Override
	public final void apply(MigrationContext context) {

		var originalQuery = getQuery();
		try (Session session = context.getSession()) {
			var summary = session
				.run(new Query(originalQuery.text().formatted(csvSource), originalQuery.parameters()))
				.consume();
			LOGGER.log(Level.FINE, () -> String.format("Loaded CSV from %s resulting in %s", csvSource, summary.counters()));
		}
	}

	/**
	 * The statement returned by this method should have exactly one Java format specifier {@code %s} which will be used
	 * by us to insert the result of {@link #csvSource}. Any other parameters included with the query object will just
	 * be used as is.
	 *
	 * @return The Cypher statement to be used to load the CSV file.
	 */
	public abstract Query getQuery();

	/**
	 * Overwrite this method and apply all customization to the {@link HttpRequest.Builder builder} that you might need,
	 * such as authentication, additional headers and cookies. Everybody loves cookies.
	 *
	 * @param builder Pre-initialized with the target source
	 * @return The usable request
	 */
	public HttpRequest customizeRequest(HttpRequest.Builder builder) {
		return builder.build();
	}

	@Override
	public final boolean isRepeatable() {
		return repeatable;
	}

	// Having the value in a lazily initialized optional is the point here.
	@SuppressWarnings({"OptionalAssignedToNull", "squid:S2789"})
	@Override
	public final Optional getChecksum() {
		Optional availableChecksum = this.checksum;
		if (availableChecksum == null) {
			synchronized (this) {
				availableChecksum = this.checksum;
				if (availableChecksum == null) {
					this.checksum = Optional.ofNullable(computeChecksum());
					availableChecksum = this.checksum;
				}
			}
		}
		return availableChecksum;
	}

	private String computeChecksum() {
		try {
			var crc32 = new CRC32();
			var request = customizeRequest(HttpRequest
				.newBuilder(csvSource)
				.header("User-Agent", Migrations.getUserAgent())
				.GET());
			httpClient.send(request, HttpResponse.BodyHandlers.ofByteArrayConsumer(optionalBytes -> optionalBytes.ifPresent(crc32::update)));
			return Long.toString(crc32.getValue());
		} catch (IOException | InterruptedException e) {
			LOGGER.log(Level.WARNING, e, () -> String.format("Could not retrieve %s, checksum won't be available until next migration attempt.", csvSource));
			if (e instanceof InterruptedException) {
				// Restore interrupted state...
				Thread.currentThread().interrupt();
			}
			return null;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy