
com.gc.iotools.fmt.detect.droid.DroidDetectorImpl Maven / Gradle / Ivy
package com.gc.iotools.fmt.detect.droid;
/*
* Copyright (c) 2008, 2009 Davide Simonetti.
* This source code is released under the BSD Software License.
*/
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import uk.gov.nationalarchives.droid.base.FileFormatHit;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
import uk.gov.nationalarchives.droid.binFileReader.IdentificationFile;
import uk.gov.nationalarchives.droid.binFileReader.RandomAccessByteReader;
import uk.gov.nationalarchives.droid.signatureFile.FFSignatureFile;
import uk.gov.nationalarchives.droid.signatureFile.FileFormat;
import uk.gov.nationalarchives.droid.signatureFile.InternalSignature;
import uk.gov.nationalarchives.droid.xmlReader.SAXModelBuilder;
import com.gc.iotools.fmt.base.DetectionLibrary;
import com.gc.iotools.fmt.base.FormatEnum;
import com.gc.iotools.fmt.base.FormatId;
import com.gc.iotools.fmt.base.ResettableInputStream;
/**
* Implementation of a FileDetector that relies on droid classes.
*
* @author dvd.smnt
*
*/
public class DroidDetectorImpl implements DetectionLibrary {
/**
* Namespace for the xml file format signatures file.
*/
public static final String SIGNATURE_FILE_NS = "http://www.nationalarchives.gov.uk/pronom/SignatureFile";
private static final String SIGNATURE_FILE = "DROID_SignatureFile_V13.xml";
private static final String MAPPING_FILE = "mapping.properties";
private final Class> formatEnumClass;
private static final Map CONF_MAP = new HashMap();
private final String configFile;
private final Properties mapping;
private static final Logger LOG = LoggerFactory
.getLogger(DroidDetectorImpl.class);
public DroidDetectorImpl() {
this(FormatEnum.class, SIGNATURE_FILE, MAPPING_FILE);
}
public DroidDetectorImpl(final Class> formatEnumClass,
final String signatureFile, final String mappingFileStr) {
final Class> clazz = (formatEnumClass == null ? FormatEnum.class
: formatEnumClass);
if (!(FormatEnum.class.isAssignableFrom(clazz))) {
throw new IllegalArgumentException(" [" + formatEnumClass
+ "] should be an subclass of [" + FormatEnum.class + "]");
}
this.formatEnumClass = clazz;
this.configFile = StringUtils.isBlank(signatureFile) ? SIGNATURE_FILE
: signatureFile;
if (!CONF_MAP.containsKey(this.configFile)) {
final URL confFile = DroidDetectorImpl.class
.getResource(this.configFile);
try {
if ((confFile == null) || (confFile.openConnection() == null)) {
throw new IllegalArgumentException("Configuration file ["
+ signatureFile + "] not found or not readable.");
}
} catch (final IOException e) {
throw new IllegalArgumentException(
"Problem reading configuration file ["
+ signatureFile + "] url[" + confFile + "]",
e);
}
final FFSignatureFile fsigfile = parseSigFile(confFile);
CONF_MAP.put(this.configFile, fsigfile);
}
this.mapping = new Properties();
try {
this.mapping.load(DroidDetectorImpl.class
.getResourceAsStream(mappingFileStr));
} catch (final IOException e) {
throw new IllegalArgumentException("can't load resource["
+ mappingFileStr + "]", e);
}
}
/**
* {@inheritDoc}
*/
public FormatId detect(final FormatEnum[] enabledFormats,
final ResettableInputStream stream) throws IOException {
final IdentificationFile idFile = new IdentificationFile("-");
final ByteReader testFile = new RandomAccessByteReader(idFile, stream);
final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
FFSignatureFile reduced = reduceDetectedSequences(fsigfile,
enabledFormats);
reduced.runFileIdentification(testFile);
final int n = testFile.getNumHits();
FormatId fenumId = new FormatId(FormatEnum.UNKNOWN, null);
final Collection enabledFormatCollection = Arrays
.asList(enabledFormats);
for (int i = 0; (i < n)
&& (FormatEnum.UNKNOWN.equals(fenumId.format)); i++) {
final FileFormatHit ffhit = testFile.getHit(i);
final uk.gov.nationalarchives.droid.signatureFile.FileFormat fileFormat = ffhit
.getFileFormat();
final FormatId tmpFid = getFormatEnum(fileFormat);
if (FormatEnum.UNLISTED.equals(tmpFid.format)) {
LOG.warn("Format number[" + fileFormat.getID()
+ "] not found in configured mapping. format ["
+ fileFormat.getName() + "] was returned as ["
+ FormatEnum.UNLISTED + "] version["
+ fileFormat.getName() + "]");
}
if (enabledFormatCollection.contains(tmpFid.format)) {
fenumId = tmpFid;
}
}
return fenumId;
}
private FFSignatureFile reduceDetectedSequences(FFSignatureFile fsig,
FormatEnum[] enabled) {
Collection fformats = new ArrayList();
Collection intSigs = new ArrayList();
Collection enabledColl = new ArrayList(Arrays
.asList(enabled));
Map internalSignatureMap = new HashMap();
for (int i = 0; i < fsig.getNumInternalSignatures(); i++) {
InternalSignature intSignature = fsig.getInternalSignature(i);
internalSignatureMap.put(intSignature.getID(), intSignature);
}
int n = fsig.getNumFileFormats();
for (int i = 0; i < n; i++) {
FileFormat ff = fsig.getFileFormat(i);
final FormatId formatEnum = getFormatEnum(ff);
if (ff.getNumInternalSignatures() > 0
&& enabledColl.contains(formatEnum.format)) {
fformats.add(ff);
for (int j = 0; j < ff.getNumInternalSignatures(); j++) {
int intSigId = ff.getInternalSignatureID(j);
InternalSignature intSig = internalSignatureMap
.get(intSigId);
if (intSig != null) {
intSigs.add(intSig);
} else {
LOG.warn("Internal signature id[" + intSig
+ "] not found.");
}
}
}
}
return new FFSignatureFile(fformats, intSigs);
}
/**
* {@inheritDoc}
*/
public FormatEnum[] getDetectedFormats() {
final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
final Collection result = new ArrayList();
for (int i = 0; i < fsigfile.getNumFileFormats(); i++) {
final FileFormat fformat = fsigfile.getFileFormat(i);
final FormatId fid = getFormatEnum(fformat);
final FormatEnum fenum = fid.format;
if (!FormatEnum.UNLISTED.equals(fenum)
&& !FormatEnum.UNKNOWN.equals(fenum)) {
result.add(fenum);
}
}
return result.toArray(new FormatEnum[result.size()]);
}
private FormatId getFormatEnum(final FileFormat id) {
FormatEnum fenum = null;
for (final Object key : this.mapping.keySet()) {
final String value = this.mapping.getProperty((String) key);
if (value.contains(Integer.toString(id.getID()))) {
final String kname = (String) key;
fenum = FormatEnum.getEnum(this.formatEnumClass, kname);
break;
}
}
FormatId result;
if (fenum == null) {
result = new FormatId(FormatEnum.UNLISTED, id.getName());
} else {
result = new FormatId(fenum, id.getVersion());
}
return result;
}
private XMLReader getXMLReader(final SAXModelBuilder mb) throws Exception {
final SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
// factory.setValidating(true);
final SAXParser saxParser = factory.newSAXParser();
final XMLReader parser = saxParser.getXMLReader();
// URL url = DroidDetectorImpl.class
// .getResource("DROID_SignatureFile.xsd");
// parser.setProperty(
// "http://java.sun.com/xml/jaxp/properties/schemaSource", url);
mb.setupNamespace(SIGNATURE_FILE_NS, true);
parser.setContentHandler(mb);
return parser;
}
// private void addFiles() {
// // Process each file in array of filenames
// for (int fileNum = 0; fileNum < this.myFiles.length
// && !this.myAnalysisController.isAnalysisCancelled(); fileNum++) {
// this.logger.recordPath(this.myFiles[fileNum]);
// if (UrlByteReader.isURL(this.myFiles[fileNum])) {
// // File is a URL
// processFile(new IdentificationFile(this.myFiles[fileNum]));
// } else if (InputStreamByteReader
// .isInputStream(this.myFiles[fileNum])) {
// // File is an input stream
// processFile(new IdentificationFile(this.myFiles[fileNum]));
// } else {
// // File is local
// addFile(this.myFiles[fileNum]);
// }
// }
// }
private FFSignatureFile parseSigFile(final URL signatureFileURL) {
final SAXModelBuilder mb = new SAXModelBuilder();
try {
final XMLReader parser = getXMLReader(mb);
final InputStream signatureFileStream = signatureFileURL
.openStream();
parser.parse(new InputSource(signatureFileStream));
} catch (final Exception e) {
throw new IllegalStateException(
"Error reading configuration file " + "["
+ signatureFileURL + "]", e);
}
final FFSignatureFile fsgf = (FFSignatureFile) mb.getModel();
fsgf.prepareForUse();
return fsgf;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy