All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.medsea.mimeutil.detector.OpendesktopMimeDetector Maven / Gradle / Ivy

/*
 * Copyright 2007-2009 Medsea Business Solutions S.L.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * 

* The Opendesktop shared mime database contains glob rules and magic number * lookup information to enable applications to detect the mime types of * files. *

*

* This class uses the mime.cache file which is one of the files created by the * update-mime-database application. This file is a memory mapped file that enables * the database to be updated and copied without interrupting applications. *

*

* This implementation follows the memory mapped spec so it is not required * to restart an application using this mime detector should the underlying * mime.cache database change. *

*

* For a complete description of the information contained in this file please see: * http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html *

*

* This class also follows, where possible, the RECOMENDED order of detection as detailed in this spec. * Thanks go to Mathias Clasen at Red Hat for pointing me to the original xdgmime implementation * http://svn.gnome.org/viewvc/glib/trunk/gio/xdgmime/xdgmimecache.c?revision=7784&view=markup *

* @author Steven McArdle */ package eu.medsea.mimeutil.detector; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.medsea.mimeutil.MimeException; import eu.medsea.mimeutil.MimeType; import eu.medsea.mimeutil.MimeUtil; public class OpendesktopMimeDetector extends MimeDetector { private static Log log = LogFactory.getLog(OpendesktopMimeDetector.class); private static String mimeCacheFile = "/usr/share/mime/mime.cache"; private static String internalMimeCacheFile = "src/main/resources/mime.cache"; private ByteBuffer content; static { // This will cause this MimeDetector to be automatically // registerd when you create a new instance in your code // or by uncommenting the entry in the MimeDetectors file MimeUtil.addMimeDetector(new OpendesktopMimeDetector()); } public OpendesktopMimeDetector(final String mimeCacheFile) { init(mimeCacheFile); } public OpendesktopMimeDetector() { init(mimeCacheFile); } private void init(final String mimeCacheFile) { String cacheFile = mimeCacheFile; if(!new File(cacheFile).exists()) { cacheFile = internalMimeCacheFile; } // Map the mime.cache file as a memory mapped file FileChannel rCh = null; try { RandomAccessFile raf = null; raf = new RandomAccessFile(cacheFile,"r"); rCh = (raf).getChannel(); content = rCh.map(FileChannel.MapMode.READ_ONLY, 0, rCh.size()); }catch(Exception e) { throw new MimeException(e); }finally { if(rCh != null) { try { rCh.close(); }catch(Exception e) { log.error(e, e); } } } } public String getDescription() { return "Resolve mime types for files and streams using the Opendesktop shared mime.cache file. Version [" + getMajorVersion() + "." + getMinorVersion() + "]."; } /** * This method resolves mime types closely in accordance with the RECOMENDED order of detection * detailed in the Opendesktop shared mime database specification * http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html * See the Recommended checking order. */ public Collection getMimeTypesFile(File file) throws UnsupportedOperationException { Collection mimeTypes = new ArrayList(); // Lookup the globbing methods first lookupMimeTypesForGlobFileName(file, mimeTypes); if(!mimeTypes.isEmpty()) { mimeTypes = normalizeWeightedMimeList((List)mimeTypes); } // We only want to do this if it's a real file and either no globs matched // or we have multiple matching globs if(file.exists() && !file.isDirectory() && (mimeTypes.isEmpty() || mimeTypes.size() > 1)) { try { // Now lookup using the magic methods // The URL method will also use the java URLConnection getContentType() if there are no matches // TODO: This uses a DEPRICTED API call and will be removed for version 2.0 Collection _mimeTypes = getMimeTypes(file.toURL().openConnection()); if(!_mimeTypes.isEmpty()) { // Check for same mime type for(Iterator it = mimeTypes.iterator(); it.hasNext();) { String mimeType = (String)it.next(); if(_mimeTypes.contains(mimeType)) { mimeTypes = new ArrayList(); mimeTypes.add(mimeType); return mimeTypes; } // Check for mime type subtype for(Iterator _it = _mimeTypes.iterator(); _it.hasNext();) { String _mimeType = (String)_it.next(); if(isMimeTypeSubclass(mimeType, _mimeType)) { mimeTypes = new ArrayList(); mimeTypes.add(mimeType); return mimeTypes; } } } } }catch(Exception e) { throw new MimeException(e); } } return mimeTypes; } /** * This method is unable to perform glob matching as no name is available. * This means that it does not follow the recommended order of detection * defined in the shared mime database spec * http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html */ public Collection getMimeTypesInputStream(InputStream in) throws UnsupportedOperationException { Collection mimeTypes = new ArrayList(); lookupMimeTypesForMagicData(in, mimeTypes); return mimeTypes; } /** * This method is unable to perform glob matching as no name is available. * This means that it does not follow the recommended order of detection * defined in the shared mime database spec * http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-latest.html */ public Collection getMimeTypesByteArray(byte[] data) throws UnsupportedOperationException { List mimeTypes = new ArrayList(); lookupMagicData(data, mimeTypes); return mimeTypes; } public String dump() { return "{MAJOR_VERSION=" + getMajorVersion() + " MINOR_VERSION=" + getMinorVersion() + " ALIAS_LIST_OFFSET=" + getAliasListOffset() + " PARENT_LIST_OFFSET=" + getParentListOffset() + " LITERAL_LIST_OFFSET=" + getLiteralListOffset() + " REVERSE_SUFFIX_TREE_OFFSET=" + getReverseSuffixTreeOffset() + " GLOB_LIST_OFFSET=" + getGlobListOffset() + " MAGIC_LIST_OFFSET=" + getMagicListOffset() + " NAMESPACE_LIST_OFFSET=" + getNameSpaceListOffset() + " ICONS_LIST_OFFSET=" + getIconListOffset() + " GENERIC_ICONS_LIST_OFFSET=" + getGenericIconListOffset() + "}"; } private void lookupMimeTypesForMagicData(InputStream in, Collection mimeTypes) { int offset = 0; int len = getMaxExtents(); byte [] data = new byte [len]; try { // Since an InputStream might return only some data (not all // requested), we have to read in a loop until // either EOF is reached or the desired number of bytes have been // read. int restBytesToRead = len; while (restBytesToRead > 0) { int bytesRead = in.read(data, offset, restBytesToRead); if (bytesRead < 0) break; // EOF offset += bytesRead; restBytesToRead -= bytesRead; } } catch(IOException ioe) { throw new MimeException(ioe); } lookupMagicData(data, mimeTypes); } private void lookupMagicData(byte [] data, Collection mimeTypes) { int listOffset = getMagicListOffset(); int numEntries = content.getInt(listOffset); int offset = content.getInt(listOffset + 8); for(int i = 0; i < numEntries; i++) { String mimeType = compareToMagicData(offset + (16 * i), data); if(mimeType != null) { mimeTypes.add(mimeType); } else { String nonMatch = getMimeType(content.getInt(offset + (16 * i) + 4)); mimeTypes.remove(nonMatch); } } } private String compareToMagicData(int offset, byte [] data) { int priority = content.getInt(offset); int mimeOffset = content.getInt(offset + 4); int numMatches = content.getInt(offset + 8); int matchletOffset = content.getInt(offset + 12); for(int i = 0; i < numMatches; i++) { if(matchletMagicCompare(matchletOffset + (i * 32), data)) { return getMimeType(mimeOffset); } } return null; } private boolean matchletMagicCompare(int offset, byte [] data) { int rangeStart = content.getInt(offset); int rangeLength = content.getInt(offset + 4); int dataLength = content.getInt(offset + 12); int dataOffset = content.getInt(offset + 16); int maskOffset = content.getInt(offset + 20); for(int i = rangeStart; i <= rangeStart + rangeLength; i++) { boolean validMatch = true; if(i + dataLength > data.length) { return false; } if(maskOffset != 0) { for(int j = 0; j < dataLength; j++) { if((content.get(dataOffset + j) & content.get(maskOffset + j)) != (data[j+i] & content.get(maskOffset + j))) { validMatch = false; break; } } } else { for(int j = 0; j < dataLength; j++) { if(content.get(dataOffset + j) != data[j + i]) { validMatch = false; break; } } } if(validMatch) { return true; } } return false; } private void lookupGlobLiteral(String fileName, Collection mimeTypes) { int listOffset = getLiteralListOffset(); int numEntries = content.getInt(listOffset); int min = 0; int max = numEntries -1; while(max >=min) { int mid = (min + max) / 2; String literal = getString(content.getInt((listOffset + 4) + (12 * mid))); int cmp = literal.compareTo(fileName); if(cmp < 0) { min = mid + 1; }else if(cmp > 0) { max = mid - 1; } else { String mimeType = getMimeType(content.getInt((listOffset + 4) + (12 * mid) + 4)); int weight = content.getInt((listOffset + 4) + (12 * mid) + 8); mimeTypes.add(new MimeWeight(mimeType, literal, weight)); return; } } } private void lookupGlobFileNameMatch(String fileName, Collection mimeTypes) { int listOffset = getGlobListOffset(); int numEntries = content.getInt(listOffset); for(int i = 0; i < numEntries; i++) { int offset = content.getInt((listOffset + 4) + (12 * i)); int mimeTypeOffset = content.getInt((listOffset + 4) + (12 * i) + 4); int weight = content.getInt((listOffset + 4) + (12 * i) + 8); String pattern = getString(offset, true); String mimeType = getMimeType(mimeTypeOffset); if(fileName.matches(pattern)) { mimeTypes.add(new MimeWeight(mimeType, pattern, weight)); } } } private Collection normalizeWeightedMimeList(Collection weightedMimeTypes) { Collection mimeTypes = new LinkedHashSet(); // Sort the weightedMimeTypes Collections.sort((List)weightedMimeTypes, new Comparator() { public int compare(Object obj1, Object obj2) { return ((MimeWeight)obj1).weight - ((MimeWeight)obj2).weight; } }); // Keep only globs with the biggest weight. They are in weight order at this point int weight = 0; int patternLen = 0; for(Iterator it = weightedMimeTypes.iterator(); it.hasNext();) { MimeWeight mw = (MimeWeight)it.next(); if(weight < mw.weight){ weight = mw.weight; } if(weight >= mw.weight) { if(mw.pattern.length() > patternLen) { patternLen = mw.pattern.length(); } mimeTypes.add(mw.mimeType); } } // Now keep only the longest patterns for(Iterator it = weightedMimeTypes.iterator(); it.hasNext();) { MimeWeight mw = (MimeWeight)it.next(); if(mw.pattern.length() < patternLen) { mimeTypes.remove(mw.mimeType); } } // Could possibly have multiple mimeTypes here with the same weight and // pattern length. Can even multiple entries for the same type so lets remove // any duplicates by copying entries to a HashSet that can only have a singlr instance // of each type Collection _mimeTypes = new HashSet(); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { _mimeTypes.add(it.next()); } return _mimeTypes; } private void lookupMimeTypesForGlobFileName(File fileName, Collection mimeTypes) { if(fileName == null) { return; } lookupGlobLiteral(fileName.getName(), mimeTypes); if(!mimeTypes.isEmpty()) { return; } int len = fileName.getName().length(); lookupGlobSuffix(fileName.getName(), false, len, mimeTypes); if(mimeTypes.isEmpty()) { lookupGlobSuffix(fileName.getName(), true, len, mimeTypes); } if(mimeTypes.isEmpty()) { lookupGlobFileNameMatch(fileName.getName(), mimeTypes); } } private void lookupGlobSuffix(String fileName, boolean ignoreCase, int len, Collection mimeTypes) { int listOffset = getReverseSuffixTreeOffset(); int numEntries = content.getInt(listOffset); int offset = content.getInt(listOffset + 4); lookupGlobNodeSuffix(fileName, numEntries, offset, ignoreCase, len, mimeTypes, new StringBuffer()); } private void lookupGlobNodeSuffix(String fileName, int numEntries, int offset, boolean ignoreCase, int len, Collection mimeTypes, StringBuffer pattern) { char character = ignoreCase ? fileName.toLowerCase().charAt(len - 1) : fileName.charAt(len - 1); if(character == 0) { return; } int min = 0; int max = numEntries - 1; while(max >= min && len >= 0) { int mid = (min + max) / 2; char matchChar = (char)content.getInt(offset + (12 * mid)); if(matchChar < character) { min = mid + 1; } else if(matchChar > character) { max = mid - 1; } else { len--; int numChildren = content.getInt(offset + (12 * mid) + 4); int childOffset = content.getInt(offset + (12 * mid) + 8); if(len > 0) { pattern.append(matchChar); lookupGlobNodeSuffix(fileName, numChildren, childOffset, ignoreCase, len, mimeTypes, pattern); } if(mimeTypes.isEmpty()) { for(int i = 0; i < numChildren; i++) { matchChar = (char)content.getInt(childOffset + (12 * i)); if(matchChar != 0) { break; } int mimeOffset = content.getInt(childOffset + (12 * i) + 4); int weight = content.getInt(childOffset + (12 * i) + 8); mimeTypes.add(new MimeWeight(getMimeType(mimeOffset), pattern.toString(), weight)); } } return; } } } class MimeWeight { String mimeType; String pattern; int weight; MimeWeight(String mimeType, String pattern, int weight) { this.mimeType = mimeType; this.pattern = pattern; this.weight = weight; } } private int getMaxExtents() { return content.getInt(getMagicListOffset() + 4); } private String aliasLookup(String alias) { int aliasListOffset = getAliasListOffset(); int min = 0; int max = content.getInt(aliasListOffset) - 1; while(max >= min) { int mid = (min + max) / 2; content.position((aliasListOffset + 4) + (mid * 8)); int aliasOffset = content.getInt(); int mimeOffset = content.getInt(); int cmp = getMimeType(aliasOffset).compareTo(alias); if(cmp < 0) { min = mid + 1; } else if(cmp > 0) { max = mid - 1; } else { return getMimeType(mimeOffset); } } return null; } private String unaliasMimeType(String mimeType) { String lookup = aliasLookup(mimeType); return lookup == null ? mimeType : lookup; } private boolean isMimeTypeSubclass(String mimeType, String subClass) { String umimeType = unaliasMimeType(mimeType); String usubClass = unaliasMimeType(subClass); MimeType _mimeType = new MimeType(umimeType); MimeType _subClass = new MimeType(usubClass); if(umimeType.compareTo(usubClass) == 0) { return true; } if(isSuperType(usubClass) && (_mimeType.getMediaType().equals(_subClass.getMediaType()))) { return true; } // Handle special cases text/plain and application/octet-stream if(usubClass.equals("text/plain") && _mimeType.getMediaType().equals("text")) { return true; } if(usubClass.equals("application/octet-stream")) { return true; } int parentListOffset = getParentListOffset(); int numParents = content.getInt(parentListOffset); int min = 0; int max = numParents - 1; while(max >= min) { int med = (min + max) / 2; int offset = content.getInt((parentListOffset + 4) + (8 * med)); String parentMime = getMimeType(offset); int cmp = parentMime.compareTo(umimeType); if(cmp < 0) { min = med + 1; } else if(cmp > 0) { max = med - 1; } else { offset = content.getInt((parentListOffset + 4) + (8 * med) + 4); int _numParents = content.getInt(offset); for(int i = 0 ; i < _numParents; i++) { int parentOffset = content.getInt((offset + 4) + (4 * i)); if(isMimeTypeSubclass(getMimeType(parentOffset), usubClass)) { return true; } } break; } } return false; } private boolean isSuperType(String mimeType) { String type = mimeType.substring(mimeType.length() -2); if (type.equals("/*")) { return true; } return false; } private int getGenericIconListOffset() { return content.getInt(36); } private int getIconListOffset() { return content.getInt(32); } private int getNameSpaceListOffset() { return content.getInt(28); } private int getMagicListOffset() { return content.getInt(24); } private int getGlobListOffset(){ return content.getInt(20); } private int getReverseSuffixTreeOffset(){ return content.getInt(16); } private int getLiteralListOffset(){ return content.getInt(12); } private int getParentListOffset() { return content.getInt(8); } private int getAliasListOffset() { return content.getInt(4); } private short getMinorVersion() { return content.getShort(2); } private short getMajorVersion() { return content.getShort(0); } private String getMimeType(int offset) { String mimeType = getString(offset); MimeUtil.addKnownMimeType(new MimeType(mimeType)); return mimeType; } private String getString(int offset) { return getString(offset, false); } private String getString(int offset, boolean regularExpression) { int position = content.position(); content.position(offset); StringBuffer buf = new StringBuffer(); char c = 0; while((c = (char)content.get()) != 0) { if(regularExpression){ switch(c) { case '.': buf.append("\\"); break; case '*': case '+': case '?': buf.append("."); } } buf.append(c); } // Reset position content.position(position + 4); if(regularExpression) { buf.insert(0, '^'); buf.append('$'); } return buf.toString(); } /** * CAUSION: This is a TEST ONLY method and will not run on anybody else's environment * without serious modifications to paths and file names * @param args * @throws Exception */ public static void main(String [] args) throws Exception { OpendesktopMimeDetector mimeDetector = new OpendesktopMimeDetector(); log.debug(mimeDetector.dump()); if(mimeDetector.isMimeTypeSubclass("video/x-matroska", "application/x-matroska")) { log.debug("video/x-matroska is a base type of application/x-matroska"); } else { log.debug("video/x-matroska is NOT a base type of application/x-matroska"); } if(mimeDetector.isMimeTypeSubclass("application/x-matroska", "video/x-matroska")) { log.debug("application/x-matroska is a base type of video/x-matroska"); } else { log.debug("application/x-matroska is NOT a base type of video/x-matroska"); } String fileName = "src/main/java/eu/medsea/mimeutil/detector/OpendesktopMimeDetector.java"; Collection mimeTypes = mimeDetector.getMimeTypesFile(new File(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } fileName = "target/classes/eu/medsea/mimeutil/detector/OpendesktopMimeDetector.class"; mimeTypes = mimeDetector.getMimeTypesInputStream(new FileInputStream(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } MimeUtil.addMimeDetector(mimeDetector); fileName = "/projects/mimeutil/src/test/resources/e-svg.img"; mimeTypes = MimeUtil.getMimeTypes(new File(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } fileName = "/projects/mimeutil/src/test/resources/f.tar.gz"; mimeTypes = MimeUtil.getMimeTypes(new File(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } byte [] data = new byte[2048]; FileInputStream fis = new FileInputStream(fileName); fis.read(data, 0, 1024); mimeTypes = MimeUtil.getMimeTypes(data); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } fileName = "/projects/mimeutil/src/test/resources/e.svg"; mimeTypes = MimeUtil.getMimeTypes(new File(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } data = new byte[1024]; fis = new FileInputStream(fileName); fis.read(data, 0, 1024); mimeTypes = MimeUtil.getMimeTypes(data); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } // Deregister the default mime detectors and only use the current one MimeUtil.removeMimeDetector(MimeUtil.getMimeDetector("eu.medsea.mimeutil.detector.MagicMimeMimeDetector")); MimeUtil.removeMimeDetector(MimeUtil.getMimeDetector("eu.medsea.mimeutil.detector.ExtensionMimeDetector")); mimeTypes = MimeUtil.getMimeTypes(new File(fileName)); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } data = new byte[1024]; fis = new FileInputStream(fileName); fis.read(data, 0, 1024); mimeTypes = MimeUtil.getMimeTypes(data); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } mimeTypes = MimeUtil.getMimeTypes(new File(fileName).toURL().openConnection()); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } mimeTypes = MimeUtil.getMimeTypes(new BufferedInputStream(new FileInputStream(fileName))); for(Iterator it = mimeTypes.iterator(); it.hasNext();) { System.out.println(fileName + "=" + it.next() ); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy