All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.icij.extract.tasks.SpewDumpTask Maven / Gradle / Ivy

There is a newer version: 7.4.0
Show newest version
package org.icij.extract.tasks;

import org.icij.extract.document.TikaDocument;
import org.icij.extract.extractor.ExtractionStatus;
import org.icij.extract.report.ReportMap;
import org.icij.extract.report.ReportMapFactory;
import org.icij.extract.report.Reporter;
import org.icij.extract.spewer.SolrSpewer;
import org.icij.extract.spewer.SpewerFactory;
import org.icij.spewer.Spewer;
import org.icij.task.DefaultTask;
import org.icij.task.annotation.OptionsClass;
import org.icij.task.annotation.Task;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.file.Paths;

@Task("Spew a dump file.")
@OptionsClass(SpewerFactory.class)
@OptionsClass(ReportMapFactory.class)
public class SpewDumpTask extends DefaultTask {

	private static final Logger logger = LoggerFactory.getLogger(SpewTask.class);

	@Override
	public Void call() throws Exception {
		throw new IllegalArgumentException("No paths supplied.");
	}

	@Override
	public Void call(final String[] arguments) throws Exception {
		try (final ReportMap reportMap = new ReportMapFactory(options).create();
			 final Spewer spewer = SpewerFactory.createSpewer(options)) {
			final Reporter reporter = new Reporter(reportMap);

			if (spewer instanceof SolrSpewer) {
				((SolrSpewer) spewer).dump(false);
			}

			for (String path : arguments) {
				logger.info(String.format("Spewing document from \"%s\".", path));

				final TikaDocument[] tikaDocuments = spewer.write(Paths.get(path));

				for (TikaDocument tikaDocument : tikaDocuments) {
					logger.info(String.format("Spewed \"%s\".", tikaDocument.getPath()));
					reporter.save(tikaDocument.getPath(), ExtractionStatus.SUCCESS);
				}
			}
		}

		return null;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy