All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.harvard.hul.ois.jhove.module.xml.XhtmlProcessing Maven / Gradle / Ivy

/**********************************************************************
 * Jhove - JSTOR/Harvard Object Validation Environment
 * Copyright 2004 by JSTOR and the President and Fellows of Harvard College
 **********************************************************************/

package edu.harvard.hul.ois.jhove.module.xml;

import edu.harvard.hul.ois.jhove.*;
import edu.harvard.hul.ois.jhove.module.html.HtmlMetadata;
import java.util.*;
import org.xml.sax.Attributes;

/**
 * Methods for processing XHTML elements in an XML document.
 * These are closely related in functionality to corresponding
 * methods in the XML module.
 *
 * @author Gary McGath
 * @see  edu.harvard.hul.ois.jhove.module.html.HtmlDocDesc
 */
public final class XhtmlProcessing {

    /** Process an element and extract metadata */
    /** Process the element to extract any available metadata. */
    protected static void processElement (String localName,
                String qualifiedName,
                Attributes atts,
                HtmlMetadata mdata)
    {
        if ("html".equals (localName)) {
            processHtml (mdata, atts);
        }
        else if ("meta".equals (localName)) {
            processMeta (mdata, atts);
        }
        else if ("a".equals (localName)) {
            processA (mdata, atts);
        }
        else if ("img".equals (localName)) {
            processImg (mdata, atts);
        }
        else if ("frame".equals (localName)) {
            processFrame (mdata, atts);
        }
        else if ("script".equals (localName)) {
            processScript (mdata, atts);
        }
        else if ("abbr".equals (localName)) {
            processAbbr (mdata, atts);
        }
        else if ("title".equals (localName)) {
            processTitle (mdata, atts);
        }
        else if ("cite".equals (localName)) {
            processCite (mdata, atts);
        }
        
        /* Look for certain attributes in any tag. */
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("lang".equals (attname) && attval != null) {
                mdata.addLanguage (attval);
            }
        }
    }


    /** Process metadata from an HTML tag */
    private static void processHtml (HtmlMetadata mdata, Attributes atts)
    {
        String lang = null;
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("lang".equals (attname)) {
                lang = attval;
            }
        }
        if (lang != null) {
            mdata.setLanguage(lang);
        }
    }


    /** Process metadata from a META tag */
    private static void processMeta (HtmlMetadata mdata, Attributes atts) 
    {
        String name = null;
        String httpeq = null;
        String content = null;
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("name".equals (attname)) {
                name = attval;
            }
            if ("http-equiv".equals (attname)) {
                httpeq = attval;
            }
            if ("content".equals (attname)) {
                content = attval;
            }
        }
        if (name != null || httpeq != null || content != null) {
            List plist = new ArrayList (3);
            if (name != null) {
                plist.add (new Property ("Name",
                        PropertyType.STRING,
                        name));
            }
            if (httpeq != null) {
                plist.add (new Property ("Httpequiv",
                        PropertyType.STRING,
                        httpeq));
            }
            if (content != null) {
                plist.add (new Property ("Content",
                        PropertyType.STRING,
                        content));
            }
            mdata.addMeta (new Property ("Meta",
                        PropertyType.PROPERTY,
                        PropertyArity.LIST,
                        plist));
        }
    }

    /** Process metadata from an A element.  Only elements with an
     *  HREF attribute are of interest here.  We ignore links
     *  to anchors. */
    private static void processA (HtmlMetadata mdata, Attributes atts) 
    {
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("href".equals (attname)) {
                String link = attval;
                if (link.length() > 0 && link.charAt (0) != '#') {
                    mdata.addLink (link);
                }
                break;
            }
        }
    }
    
    /** Process metadata from the IMG element. */
    private static void processImg (HtmlMetadata mdata, Attributes atts)
    {
        String alt = null;
        String longdesc = null;
        String src = null;
        int height = -1;
        int width = -1;
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("alt".equals (attname)) {
                alt = attval;
            }
            else if ("src".equals (attname)) {
                src = attval;
            }
            else if ("longdesc".equals (attname)) {
                longdesc = attval;
            }
            else if ("height".equals (attname)) {
                try {
                    height = Integer.parseInt(attval);
                }
                catch (Exception e) {}
            }
            else if ("width".equals (attname)) {
                try {
                    width = Integer.parseInt(attval);
                }
                catch (Exception e) {}
            }
        }
        List plist = new ArrayList (5);
        if (alt != null) {
            plist.add (new Property ("Alt",
                    PropertyType.STRING,
                    alt));
        }
        if (longdesc != null) {
            plist.add (new Property ("Longdesc",
                    PropertyType.STRING,
                    longdesc));
        }
        if (src != null) {
            plist.add (new Property ("Src",
                    PropertyType.STRING,
                    src));
        }
        if (height >= 0) {
            plist.add (new Property ("Height",
                    PropertyType.INTEGER,
                    new Integer (height)));
        }
        if (width >= 0) {
            plist.add (new Property ("Width",
                    PropertyType.INTEGER,
                    new Integer (width)));
        }
        if (!plist.isEmpty ()) {
            mdata.addImage(new Property ("Image",
                    PropertyType.PROPERTY,
                    PropertyArity.LIST,
                    plist));
        }
    }
    

    /** Process metadata from the FRAME element. */
    private static void processFrame (HtmlMetadata mdata, Attributes atts)
    {
        String name = null;
        String title = null;
        String src = null;
        String longdesc = null;
        int height = -1;
        int width = -1;
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("name".equals (attname)) {
                name = attval;
            }
            else if ("title".equals (attname)) {
                title = attval;
            }
            else if ("src".equals (attname)) {
                src = attval;
            }
            else if ("longdesc".equals (attname)) {
                longdesc = attval;
            }
        }
        List plist = new ArrayList (4);
        if (name != null) {
            plist.add (new Property ("Name",
                    PropertyType.STRING,
                    name));
        }
        if (title != null) {
            plist.add (new Property ("Title",
                    PropertyType.STRING,
                    title));
        }
        if (longdesc != null) {
            plist.add (new Property ("Longdesc",
                    PropertyType.STRING,
                    longdesc));
        }
        if (src != null) {
            plist.add (new Property ("Src",
                    PropertyType.STRING,
                    src));
        }
        if (!plist.isEmpty ()) {
            mdata.addFrame(new Property ("Frame",
                    PropertyType.PROPERTY,
                    PropertyArity.LIST,
                    plist));
        }
    }

    /** Process metadata from the SCRIPT element. */
    private static void processScript (HtmlMetadata mdata, Attributes atts)
    {
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("type".equals (attname)) {
                String stype = attval;
                if (stype.length() > 0 ) {
                    mdata.addScript (stype);
                }
            }
        }
    }

    /** Processes metadata from the ABBR element.
     *  This will require the PCdata as well as the attributes,
     *  so we deposit the Property temporarily in the Metadata.
     */
    private static void processAbbr (HtmlMetadata mdata, Attributes atts)
    {
        List lst = new ArrayList (2);
        Property p = new Property ("abbr",
            PropertyType.PROPERTY,
            PropertyArity.LIST,
            lst);
        for (int i = 0; i < atts.getLength (); i++) {
            String attname = atts.getLocalName (i);
            String attval = atts.getValue (i);
            if ("title".equals (attname)) {
                if (attval.length() > 0 ) {
                    lst.add (new Property ("title",
                            PropertyType.STRING,
                            attval));
                }
                // Note: The PCData should be stuck at the
                // front of the list when we get it, as
                // the "abbr" property.
            }
        }
        mdata.setPropUnderConstruction (p);
    }
    
    /** Processes metadata from the TITLE element.
     *  This will require PCData, so we deposit the Property
     *  temporarily in the Metadata.
     */
    private static void processTitle (HtmlMetadata mdata, Attributes atts)
    {
        Property p = new Property ("title",
            PropertyType.STRING,
            "");               // store property with placeholder value
        mdata.setPropUnderConstruction (p);
    }
    
    /** Processes metadata from the CITE element.
     *  This will require PCData, so we deposit the Property
     *  temporarily in the Metadata.
     */
    private static void processCite (HtmlMetadata mdata, Attributes atts)
    {
        Property p = new Property ("cite",
            PropertyType.STRING,
            "");               // store property with placeholder value
        mdata.setPropUnderConstruction (p);
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy