All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gc.iotools.fmt.detect.droid.DroidDetectorImpl Maven / Gradle / Ivy

The newest version!
package com.gc.iotools.fmt.detect.droid;

/*
 * Copyright (c) 2008, 2014 Gabriele Contini. This source code is released
 * under the BSD License.
 */
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

import uk.gov.nationalarchives.droid.base.FileFormatHit;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
import uk.gov.nationalarchives.droid.binFileReader.IdentificationFile;
import uk.gov.nationalarchives.droid.binFileReader.RandomAccessByteReader;
import uk.gov.nationalarchives.droid.signatureFile.FFSignatureFile;
import uk.gov.nationalarchives.droid.signatureFile.FileFormat;
import uk.gov.nationalarchives.droid.signatureFile.InternalSignature;
import uk.gov.nationalarchives.droid.xmlReader.SAXModelBuilder;

import com.gc.iotools.fmt.base.DetectionLibrary;
import com.gc.iotools.fmt.base.FormatEnum;
import com.gc.iotools.fmt.base.FormatId;
import com.gc.iotools.fmt.base.ResettableInputStream;

/**
 * Implementation of a FileDetector that relies on droid classes.
 * 
 * @author dvd.smnt
 */
public class DroidDetectorImpl implements DetectionLibrary {
	/**
	 * Namespace for the xml file format signatures file.
	 */
	private static final String SIGNATURE_FILE_NS = "http://www.nationalarchives.gov.uk/pronom/SignatureFile";
	private static final String SIGNATURE_FILE = "DROID_SignatureFile_V18.xml";
	private static final String MAPPING_FILE = "mapping.properties";

	private final Class formatEnumClass;
	private static final Map CONF_MAP = new HashMap();
	private final String configFile;

	private final Properties mapping;
	private static final Logger LOG = LoggerFactory
			.getLogger(DroidDetectorImpl.class);

	public DroidDetectorImpl() {
		this(FormatEnum.class, SIGNATURE_FILE, MAPPING_FILE);
	}

	public DroidDetectorImpl(
			final Class formatEnumClass,
			final String signatureFile, final String mappingFileStr) {
		final Class clazz = (formatEnumClass == null ? FormatEnum.class
				: formatEnumClass);
		if (!(FormatEnum.class.isAssignableFrom(clazz))) {
			throw new IllegalArgumentException(" [" + formatEnumClass
					+ "] should be an subclass of [" + FormatEnum.class + "]");
		}
		this.formatEnumClass = clazz;
		this.configFile = StringUtils.isBlank(signatureFile) ? SIGNATURE_FILE
				: signatureFile;

		if (!CONF_MAP.containsKey(this.configFile)) {
			final URL confFile = DroidDetectorImpl.class
					.getResource(this.configFile);
			try {
				if ((confFile == null) || (confFile.openConnection() == null)) {
					throw new IllegalArgumentException("Configuration file ["
							+ signatureFile + "] not found or not readable.");
				}
			} catch (final IOException e) {
				throw new IllegalArgumentException(
						"Problem reading configuration file ["
								+ signatureFile + "] url[" + confFile + "]",
						e);
			}
			final FFSignatureFile fsigfile = parseSigFile(confFile);
			CONF_MAP.put(this.configFile, fsigfile);
		}
		this.mapping = new Properties();
		try {
			final InputStream mappingIs = DroidDetectorImpl.class
					.getResourceAsStream(mappingFileStr);
			this.mapping.load(mappingIs);
			mappingIs.close();
		} catch (final IOException e) {
			throw new IllegalArgumentException("can't load resource["
					+ mappingFileStr + "]", e);
		}
	}

	/**
	 * {@inheritDoc}
	 */
	public FormatId detect(final FormatEnum[] enabledFormats,
			final ResettableInputStream stream) throws IOException {
		final IdentificationFile idFile = new IdentificationFile("-");
//		File file = File.createTempFile("io-tools-doc", ".doc");
//		FileOutputStream fos = new FileOutputStream(file);
//		IOUtils.copy(stream, fos);
//		fos.close();
		// System.out.println("Data written to [" + file.getName() + "]");
		stream.resetToBeginning();
		final ByteReader testFile = new RandomAccessByteReader(idFile, stream);
//		FileByteReader testFile = new FileByteReader(new IdentificationFile(
//				file.getAbsolutePath()), true, file.getAbsolutePath());
		final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
		FFSignatureFile reduced = reduceDetectedSequences(fsigfile,
				enabledFormats);
		reduced.runFileIdentification(testFile);
		final int n = testFile.getNumHits();
		FormatId fenumId = new FormatId(FormatEnum.UNKNOWN, null);
		final Collection enabledFormatCollection = Arrays
				.asList(enabledFormats);
		for (int i = 0; (i < n)
				&& (FormatEnum.UNKNOWN.equals(fenumId.format)); i++) {
			final FileFormatHit ffhit = testFile.getHit(i);
			final uk.gov.nationalarchives.droid.signatureFile.FileFormat fileFormat = ffhit
					.getFileFormat();
			final FormatId tmpFid = getFormatEnum(fileFormat);
			if (FormatEnum.UNLISTED.equals(tmpFid.format)) {
				LOG.warn("Format number[" + fileFormat.getID()
						+ "] not found in configured mapping. format ["
						+ fileFormat.getName() + "] was returned as ["
						+ FormatEnum.UNLISTED + "] version["
						+ fileFormat.getName() + "]");
			}
			if (enabledFormatCollection.contains(tmpFid.format)) {
				fenumId = tmpFid;
			}
		}
		return fenumId;
	}

	private FFSignatureFile reduceDetectedSequences(FFSignatureFile fsig,
			FormatEnum[] enabled) {
		Collection fformats = new ArrayList();
		Collection intSigs = new ArrayList();
		Collection enabledColl = new ArrayList(
				Arrays.asList(enabled));
		Map internalSignatureMap = new HashMap();

		for (int i = 0; i < fsig.getNumInternalSignatures(); i++) {
			InternalSignature intSignature = fsig.getInternalSignature(i);
			internalSignatureMap.put(intSignature.getID(), intSignature);
		}
		int n = fsig.getNumFileFormats();
		for (int i = 0; i < n; i++) {
			FileFormat ff = fsig.getFileFormat(i);
			final FormatId formatEnum = getFormatEnum(ff);
			if (ff.getNumInternalSignatures() > 0
					&& enabledColl.contains(formatEnum.format)) {
				fformats.add(ff);
				for (int j = 0; j < ff.getNumInternalSignatures(); j++) {
					int intSigId = ff.getInternalSignatureID(j);
					InternalSignature intSig = internalSignatureMap
							.get(intSigId);
					if (intSig != null) {
						intSigs.add(intSig);
					} else {
						LOG.warn("Internal signature id[" + intSig
								+ "] not found.");
					}
				}
			}
		}
		return new FFSignatureFile(fformats, intSigs);
	}

	/**
	 * {@inheritDoc}
	 */
	public FormatEnum[] getDetectedFormats() {
		final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
		final Collection result = new ArrayList();
		for (int i = 0; i < fsigfile.getNumFileFormats(); i++) {
			final FileFormat fformat = fsigfile.getFileFormat(i);
			final FormatId fid = getFormatEnum(fformat);
			final FormatEnum fenum = fid.format;
			if (!FormatEnum.UNLISTED.equals(fenum)
					&& !FormatEnum.UNKNOWN.equals(fenum)) {
				result.add(fenum);
			}
		}
		return result.toArray(new FormatEnum[result.size()]);
	}

	private FormatId getFormatEnum(final FileFormat id) {
		FormatEnum fenum = null;
		for (final Object key : this.mapping.keySet()) {
			final String value = this.mapping.getProperty((String) key);
			if (value.contains(Integer.toString(id.getID()))) {
				final String kname = (String) key;
				fenum = FormatEnum.getEnum(this.formatEnumClass, kname);
				break;
			}
		}
		FormatId result;
		if (fenum == null) {
			result = new FormatId(FormatEnum.UNLISTED, id.getName());
		} else {
			result = new FormatId(fenum, id.getVersion());
		}
		return result;
	}

	private XMLReader getXMLReader(final SAXModelBuilder mb) throws Exception {
		final SAXParserFactory factory = SAXParserFactory.newInstance();
		factory.setNamespaceAware(true);
		// factory.setValidating(true);
		final SAXParser saxParser = factory.newSAXParser();
		final XMLReader parser = saxParser.getXMLReader();
		// URL url = DroidDetectorImpl.class
		// .getResource("DROID_SignatureFile.xsd");
		// parser.setProperty(
		// "http://java.sun.com/xml/jaxp/properties/schemaSource", url);
		mb.setupNamespace(SIGNATURE_FILE_NS, true);
		parser.setContentHandler(mb);
		return parser;
	}

	// private void addFiles() {
	// // Process each file in array of filenames
	// for (int fileNum = 0; fileNum < this.myFiles.length
	// && !this.myAnalysisController.isAnalysisCancelled(); fileNum++) {
	// this.logger.recordPath(this.myFiles[fileNum]);
	// if (UrlByteReader.isURL(this.myFiles[fileNum])) {
	// // File is a URL
	// processFile(new IdentificationFile(this.myFiles[fileNum]));
	// } else if (InputStreamByteReader
	// .isInputStream(this.myFiles[fileNum])) {
	// // File is an input stream
	// processFile(new IdentificationFile(this.myFiles[fileNum]));
	// } else {
	// // File is local
	// addFile(this.myFiles[fileNum]);
	// }
	// }
	// }

	private FFSignatureFile parseSigFile(final URL signatureFileURL) {

		final SAXModelBuilder mb = new SAXModelBuilder();
		try {
			final XMLReader parser = getXMLReader(mb);
			final InputStream signatureFileStream = signatureFileURL
					.openStream();
			parser.parse(new InputSource(signatureFileStream));
			signatureFileStream.close();
		} catch (final Exception e) {
			throw new IllegalStateException(
					"Error reading configuration file " + "["
							+ signatureFileURL + "]", e);
		}
		final FFSignatureFile fsgf = (FFSignatureFile) mb.getModel();
		fsgf.prepareForUse();
		return fsgf;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy