All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.icij.extract.report.SQLReportCodec Maven / Gradle / Ivy

There is a newer version: 7.4.0
Show newest version
package org.icij.extract.report;

import org.icij.extract.document.DocumentFactory;
import org.icij.extract.document.TikaDocument;
import org.icij.extract.extractor.ExtractionStatus;
import org.icij.extract.mysql.SQLMapCodec;
import org.icij.task.Options;
import org.icij.task.annotation.Option;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Map;

@Option(name = "reportIdKey", description = "For reports tables that have an ID column, specify the column name. This" +
		" will then be used as the unique key.", parameter = "name")
@Option(name = "reportForeignIdKey", description = "For reports that have a foreign ID column, specify the column " +
		"name.", parameter = "name")
@Option(name = "reportPathKey", description = "The report table key for storing the document path.", parameter = "name")
@Option(name = "reportStatusKey", description = "The table key for storing the report status.", parameter = "name")
@Option(name = "reportExceptionKey", description = "The table key for storing processing exceptions.", parameter =
		"name")
@Option(name = "reportSuccessStatus", description = "The status for a successfully extracted file.", parameter =
		"value")
@Option(name = "reportFailureStatus", description = "A general failure status value to use instead of the more " +
		"specific values.", parameter = "value")
public class SQLReportCodec implements SQLMapCodec {

	private final DocumentFactory factory;
	private final String idKey;
	private final String foreignIdKey;
	private final String pathKey;
	private final String statusKey;
	private final String exceptionKey;
	private final String successStatus;
	private final String failureStatus;

	SQLReportCodec(final DocumentFactory factory, final Options options) {
		this.factory = factory;
		this.idKey = options.get("reportIdKey").value().orElse(null);
		this.foreignIdKey = options.get("reportForeignIdKey").value().orElse(null);
		this.pathKey = options.get("reportPathKey").value().orElse("path");
		this.statusKey = options.get("reportStatusKey").value().orElse("extraction_status");
		this.exceptionKey = options.get("reportExceptionKey").value().orElse("exception");
		this.successStatus = options.get("reportSuccessStatus").value().orElse(null);
		this.failureStatus = options.get("reportFailureStatus").value().orElse(null);
	}

	SQLReportCodec(final DocumentFactory factory) {
		this.factory = factory;
		this.idKey = null;
		this.foreignIdKey = null;
		this.pathKey = "path";
		this.statusKey = "extraction_status";
		this.exceptionKey = "exception";
		this.successStatus = null;
		this.failureStatus = null;
	}

	@Override
	public Map encodeKey(final Object o) {
		final TikaDocument tikaDocument = (TikaDocument) o;
		final Map map = new HashMap<>();

		if (null != idKey) {
			map.put(idKey, tikaDocument.getId());
		}

		if (null != foreignIdKey) {
			map.put(foreignIdKey, tikaDocument.getForeignId());
		}

		map.put(pathKey, tikaDocument.getPath().toString());
		return map;
	}

	@Override
	public Path decodeKey(final ResultSet rs) throws SQLException {
		final Path path = Paths.get(rs.getString(pathKey));
		final TikaDocument tikaDocument;

		if (null != idKey) {
			tikaDocument = factory.create(rs.getString(idKey), path);
		} else {
			tikaDocument = factory.create(path);
		}

		if (null != foreignIdKey) {
			tikaDocument.setForeignId(rs.getString(foreignIdKey));
		}

		return tikaDocument.getPath();
	}

	@Override
	public Map encodeValue(final Object o) {
		final Report report = (Report) o;
		final ExtractionStatus status = report.getStatus();
		final Map map = new HashMap<>();

		if (successStatus != null && status == ExtractionStatus.SUCCESS) {
			map.put(statusKey, successStatus);
		} else if (failureStatus != null && status.getCode() > 0) {
			map.put(statusKey, failureStatus);
		} else {
			map.put(statusKey, status.toString());
		}

		// Either encode the exception or put an explicit null, so if an old value is already set in the database then
		// this will reset it.
		if (null != exceptionKey) {
			if (report.getException().isPresent()) {
				map.put(exceptionKey, report.getException().get());
			} else {
				map.put(exceptionKey, null);
			}
		}

		return map;
	}

	@Override
	public Report decodeValue(final ResultSet rs) throws SQLException {
		final String status = rs.getString(statusKey);
		final Exception exception;

		final byte[] exceptionBytes = rs.getBytes(exceptionKey);

		if (null != exceptionBytes && exceptionBytes.length > 0) {
			try (final ObjectInputStream si = new ObjectInputStream(new ByteArrayInputStream(exceptionBytes))) {
				exception = (Exception) si.readObject();
			} catch (final IOException | ClassNotFoundException | ClassCastException e) {
				throw new IllegalArgumentException("Unable to decode exception bytes.", e);
			}
		} else {
			exception = null;
		}

		// Assume that the status is stored as an ENUM in the table, so decode as the ExtractionStatus name
		// rather than integer value. This is different to Redis codec, where integer values are stored to save
		// memory. MySQL converts ENUM strings to integers internally, so no such manual device is needed there.
		if (null == status) {
			return null;
		}

		if (null != successStatus && status.equals(successStatus)) {
			return new Report(ExtractionStatus.SUCCESS);
		}

		if (null != failureStatus && status.equals(failureStatus)) {
			return new Report(ExtractionStatus.FAILURE_UNKNOWN, exception);
		}

		// Return null if the value can't be decoded into an enum. This allows arbitrary values to be stored in
		// the status column.
		try {
			return new Report(ExtractionStatus.valueOf(status), exception);
		} catch (IllegalArgumentException e) {
			return null;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy