org.apache.tika.mime.MimeType Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.mime;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Internet media type.
*/
public final class MimeType implements Comparable, Serializable {
/**
* Serial version UID.
*/
private static final long serialVersionUID = 4357830439860729201L;
/**
* Checks that the given string is a valid Internet media type name
* based on rules from RFC 2054 section 5.3. For validation purposes the
* rules can be simplified to the following:
*
* name := token "/" token
* token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
* tspecials := "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" /
* "\" / <"> / "/" / "[" / "]" / "?" / "="
*
*
* @param name name string
* @return true
if the string is a valid media type name,
* false
otherwise
*/
public static boolean isValid(String name) {
if (name == null) {
throw new IllegalArgumentException("Name is missing");
}
boolean slash = false;
for (int i = 0; i < name.length(); i++) {
char ch = name.charAt(i);
if (ch <= ' ' || ch >= 127 || ch == '(' || ch == ')' ||
ch == '<' || ch == '>' || ch == '@' || ch == ',' ||
ch == ';' || ch == ':' || ch == '\\' || ch == '"' ||
ch == '[' || ch == ']' || ch == '?' || ch == '=') {
return false;
} else if (ch == '/') {
if (slash || i == 0 || i + 1 == name.length()) {
return false;
}
slash = true;
}
}
return slash;
}
/**
* The normalized media type name.
*/
private final MediaType type;
/**
* The MimeType acronym
*/
private String acronym = "";
/**
* The http://en.wikipedia.org/wiki/Uniform_Type_Identifier
*/
private String uti = "";
/**
* Documentation Links
*/
private List links = Collections.emptyList();
/**
* Description of this media type.
*/
private String description = "";
/** The magics associated to this Mime-Type */
private List magics = null;
/** The root-XML associated to this Mime-Type */
private List rootXML = null;
/** The minimum length of data to provides for magic analyzis */
private int minLength = 0;
/**
* All known file extensions of this type, in order of preference
* (best first).
*/
private List extensions = null;
/**
* Creates a media type with the give name and containing media type
* registry. The name is expected to be valid and normalized to lower
* case. This constructor should only be called by
* {@link MimeTypes#forName(String)} to keep the media type registry
* up to date.
*
* @param type normalized media type name
*/
MimeType(MediaType type) {
if (type == null) {
throw new IllegalArgumentException("Media type name is missing");
}
this.type = type;
}
/**
* Returns the normalized media type name.
*
* @return media type
*/
public MediaType getType() {
return type;
}
/**
* Returns the name of this media type.
*
* @return media type name (lower case)
*/
public String getName() {
return type.toString();
}
/**
* Returns the description of this media type.
*
* @return media type description
*/
public String getDescription() {
return description;
}
/**
* Set the description of this media type.
*
* @param description media type description
*/
public void setDescription(String description) {
if (description == null) {
throw new IllegalArgumentException("Description is missing");
}
this.description = description;
}
/**
* Returns an acronym for this mime type.
*
* @return mime type acronym
*/
public String getAcronym() {
return acronym;
}
/**
* Set an acronym for the mime type
*
* @param acronym
*/
void setAcronym(String v) {
if (v == null) {
throw new IllegalArgumentException("Acronym is missing");
}
acronym = v;
}
/**
* Get the UTI for this mime type.
*
* @see http://en.wikipedia.org/wiki/Uniform_Type_Identifier
*
* @return The Uniform Type Identifier
*/
public String getUniformTypeIdentifier() {
return uti;
}
/**
* Set The Uniform Type Identifier
*
* @param uti
*/
void setUniformTypeIdentifier(String v) {
if (v == null) {
throw new IllegalArgumentException("Uniform Type Identifier is missing");
}
uti = v;
}
/**
* Get a list of links to help document this mime type
*
* @return an array of links (will never be null)
*/
public List getLinks() {
return links; // this is already unmodifiable
}
/**
* Add a link to this mime type
* @param link
*/
void addLink(URI link) {
if(link==null) {
throw new IllegalArgumentException("Missing Link");
}
List copy = new ArrayList(links.size()+1);
copy.addAll(links);
copy.add(link);
links = Collections.unmodifiableList(copy);
}
/**
* Add some rootXML info to this mime-type
*
* @param namespaceURI
* @param localName
*/
void addRootXML(String namespaceURI, String localName) {
if (rootXML == null) {
rootXML = new ArrayList();
}
rootXML.add(new RootXML(this, namespaceURI, localName));
}
boolean matchesXML(String namespaceURI, String localName) {
if (rootXML != null) {
for (RootXML xml : rootXML) {
if (xml.matches(namespaceURI, localName)) {
return true;
}
}
}
return false;
}
boolean hasRootXML() {
return rootXML != null;
}
List getMagics() {
if (magics != null) {
return magics;
} else {
return Collections.emptyList();
}
}
void addMagic(Magic magic) {
if (magic == null) {
return;
}
if (magics == null) {
magics = new ArrayList();
}
magics.add(magic);
}
int getMinLength() {
return minLength;
}
public boolean hasMagic() {
return magics != null;
}
public boolean matchesMagic(byte[] data) {
for (int i = 0; magics != null && i < magics.size(); i++) {
Magic magic = magics.get(i);
if (magic.eval(data)) {
return true;
}
}
return false;
}
public boolean matches(byte[] data) {
return matchesMagic(data);
}
/**
* Defines a RootXML description. RootXML is made of a localName and/or a
* namespaceURI.
*/
static class RootXML implements Serializable {
/**
* Serial version UID.
*/
private static final long serialVersionUID = 5140496601491000730L;
private MimeType type = null;
private String namespaceURI = null;
private String localName = null;
RootXML(MimeType type, String namespaceURI, String localName) {
if (isEmpty(namespaceURI) && isEmpty(localName)) {
throw new IllegalArgumentException(
"Both namespaceURI and localName cannot be empty");
}
this.type = type;
this.namespaceURI = namespaceURI;
this.localName = localName;
}
boolean matches(String namespaceURI, String localName) {
//Compare namespaces
if (!isEmpty(this.namespaceURI)) {
if (!this.namespaceURI.equals(namespaceURI)) {
return false;
}
}
else{
// else if it was empty then check to see if the provided namespaceURI
// is empty. If it is not, then these two aren't equal and return false
if(!isEmpty(namespaceURI)){
return false;
}
}
//Compare root element's local name
if (!isEmpty(this.localName)) {
if (!this.localName.equals(localName)) {
return false;
}
}
else{
// else if it was empty then check to see if the provided localName
// is empty. If it is not, then these two aren't equal and return false
if(!isEmpty(localName)){
return false;
}
}
return true;
}
/**
* Checks if a string is null or empty.
*/
private boolean isEmpty(String str) {
return (str == null) || (str.equals(""));
}
MimeType getType() {
return type;
}
String getNameSpaceURI() {
return namespaceURI;
}
String getLocalName() {
return localName;
}
public String toString() {
return type + ", " + namespaceURI + ", " + localName;
}
}
//----------------------------------------------------------< Comparable >
public int compareTo(MimeType mime) {
return type.compareTo(mime.type);
}
//--------------------------------------------------------------< Object >
public boolean equals(Object o) {
if (o instanceof MimeType) {
MimeType that = (MimeType) o;
return this.type.equals(that.type);
}
return false;
}
public int hashCode() {
return type.hashCode();
}
/**
* Returns the name of this media type.
*
* @return media type name
*/
public String toString() {
return type.toString();
}
/**
* Returns the preferred file extension of this type, or an empty string
* if no extensions are known. Use the {@link #getExtensions()} method to
* get the full list of known extensions of this type.
*
* @since Apache Tika 0.9
* @return preferred file extension or empty string
*/
public String getExtension() {
if (extensions == null) {
return "";
} else {
return extensions.get(0);
}
}
/**
* Returns the list of all known file extensions of this media type.
*
* @since Apache Tika 0.10
* @return known extensions in order of preference (best first)
*/
public List getExtensions() {
if (extensions != null) {
return Collections.unmodifiableList(extensions);
} else {
return Collections.emptyList();
}
}
/**
* Adds a known file extension to this type.
*
* @param extension file extension
*/
void addExtension(String extension) {
if (extensions == null) {
extensions = Collections.singletonList(extension);
} else if (extensions.size() == 1) {
extensions = new ArrayList(extensions);
}
if (!extensions.contains(extension)) {
extensions.add(extension);
}
}
}