All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.mp3.Mp3Parser Maven / Gradle / Ivy

There is a newer version: 3.0.0-BETA2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.mp3;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/**
 * The Mp3Parser is used to parse ID3 Version 1 Tag information
 * from an MP3 file, if available.
 *
 * @see MP3 ID3 Version 1 specification
 * @see MP3 ID3 Version 2.4 Structure Specification
 * @see MP3 ID3 Version 2.4 Frames Specification
 */
public class Mp3Parser implements Parser {

    private static final Set SUPPORTED_TYPES =
        Collections.singleton(MediaType.audio("mpeg"));

    public Set getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }


    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // Create handlers for the various kinds of ID3 tags
        ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);

        if (audioAndTags.tags.length > 0) {
           CompositeTagHandler tag = new CompositeTagHandler(audioAndTags.tags);

           metadata.set(Metadata.TITLE, tag.getTitle());
           metadata.set(Metadata.AUTHOR, tag.getArtist());
           metadata.set(XMPDM.ARTIST, tag.getArtist());
           metadata.set(XMPDM.COMPOSER, tag.getComposer());
           metadata.set(XMPDM.ALBUM, tag.getAlbum());
           metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
           metadata.set(XMPDM.GENRE, tag.getGenre());
           metadata.set(XMPDM.LOG_COMMENT, tag.getComment());

           xhtml.element("h1", tag.getTitle());
           xhtml.element("p", tag.getArtist());

            // ID3v1.1 Track addition
            if (tag.getTrackNumber() != null) {
                xhtml.element("p", tag.getAlbum() + ", track " + tag.getTrackNumber());
                metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
            } else {
                xhtml.element("p", tag.getAlbum());
            }
            xhtml.element("p", tag.getYear());
            xhtml.element("p", tag.getComment());
            xhtml.element("p", tag.getGenre());
        }
        if (audioAndTags.audio != null) {
            metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
            metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
            metadata.set("version", audioAndTags.audio.getVersion());
            metadata.set(
                    XMPDM.AUDIO_SAMPLE_RATE,
                    Integer.toString(audioAndTags.audio.getSampleRate()));
        }
        if (audioAndTags.lyrics != null && audioAndTags.lyrics.hasLyrics()) {
        	xhtml.element("p", audioAndTags.lyrics.lyricsText);
        }

        xhtml.endDocument();
    }

    /**
     * @deprecated This method will be removed in Apache Tika 1.0.
     */
    public void parse(
            InputStream stream, ContentHandler handler, Metadata metadata)
            throws IOException, SAXException, TikaException {
        parse(stream, handler, metadata, new ParseContext());
    }

    /**
     * Scans the MP3 frames for ID3 tags, and creates ID3Tag Handlers
     *  for each supported set of tags. 
     */
    protected static ID3TagsAndAudio getAllTagHandlers(InputStream stream, ContentHandler handler)
           throws IOException, SAXException, TikaException {
       ID3v24Handler v24 = null;
       ID3v23Handler v23 = null;
       ID3v22Handler v22 = null;
       ID3v1Handler v1 = null;
       LyricsHandler lyrics = null;
       AudioFrame firstAudio = null;

       // ID3v2 tags live at the start of the file
       // You can apparently have several different ID3 tag blocks
       // So, keep going until we don't find any more
       MP3Frame f;
       while ((f = ID3v2Frame.createFrameIfPresent(stream)) != null && firstAudio == null) {
           if(f instanceof ID3v2Frame) {
               ID3v2Frame id3F = (ID3v2Frame)f;
               if (id3F.getMajorVersion() == 4) {
                   v24 = new ID3v24Handler(id3F);
               } else if(id3F.getMajorVersion() == 3) {
                   v23 = new ID3v23Handler(id3F);
               } else if(id3F.getMajorVersion() == 2) {
                   v22 = new ID3v22Handler(id3F);
               }
           } else if(f instanceof AudioFrame) {
               firstAudio = (AudioFrame)f;
           }
       }

       // ID3v1 tags live at the end of the file
       // Lyrics live just before ID3v1, at the end of the file
       // Search for both (handlers seek to the end for us)
       lyrics = new LyricsHandler(stream, handler);
       v1 = lyrics.id3v1;

       // Go in order of preference
       // Currently, that's newest to oldest
       List tags = new ArrayList();

       if(v24 != null && v24.getTagsPresent()) {
          tags.add(v24);
       }
       if(v23 != null && v23.getTagsPresent()) {
          tags.add(v23);
       }
       if(v22 != null && v22.getTagsPresent()) {
          tags.add(v22);
       }
       if(v1 != null && v1.getTagsPresent()) {
          tags.add(v1);
       }
       
       ID3TagsAndAudio ret = new ID3TagsAndAudio();
       ret.audio = firstAudio;
       ret.lyrics = lyrics;
       ret.tags = tags.toArray(new ID3Tags[tags.size()]);
       return ret;
    }

    protected static class ID3TagsAndAudio {
        private ID3Tags[] tags;
        private AudioFrame audio;
        private LyricsHandler lyrics;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy