All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.harvard.hul.ois.jhove.module.html.JHPCData Maven / Gradle / Ivy

The newest version!
/**********************************************************************
 * Jhove - JSTOR/Harvard Object Validation Environment
 * Copyright 2004 by JSTOR and the President and Fellows of Harvard College
 *
 **********************************************************************/

package edu.harvard.hul.ois.jhove.module.html;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.module.utf8.Utf8BlockMarker;
import edu.harvard.hul.ois.jhove.module.xml.HtmlMetadata;

/** Representation of parsed HTML PCDATA.
 * 
 * @author Gary McGath
 *
 */
public class JHPCData extends JHElement {

    public String _text;
    
    /**
     *  Constructor.
     * 
     *  @param   elements     The list of parsed elements, to which
     *                        this gets added.  May be null for a stub
     *                        element not generated by the parser.
     *  @param   text         The name of the tag
     *  @param   line         Line number, for information reporting
     *  @param   column       Line number, for information reporting
     */
    public JHPCData (List elements, String text, int line, int column) {
        super (elements);
        _text = text;
        _line = line;
        _column = column;
    }
    
    /** Extracts metadata and entities from the PCData object 
     *  and its stack context. */
    protected void processPCData (HtmlStack elementStack, HtmlMetadata metadata)
    {
        JHOpenTag tag = elementStack.top ();
        String name = tag.getName();
        if ("title".equals (name)) {
            metadata.setTitle (_text);
        }
        else if ("cite".equals (name)) {
            metadata.addCitation (_text);
        }
        else if ("dfn".equals (name)) {
            metadata.addDef (_text);
        }
        else if ("abbr".equals (name)) {
            List abbrList = new ArrayList (2);
            abbrList.add( (new Property ("Text",
                    PropertyType.STRING,
                    _text)));
            Iterator iter = tag.getAttributes().iterator ();
            while (iter.hasNext ()) {
                String[] attr = (String []) iter.next ();
                String attname = attr[0];
                String attval = attr[1];
                if ("title".equals (attname)) {
                    abbrList.add (new Property ("Title",
                        PropertyType.STRING,
                        attval));
                    break;
                }
            }
            metadata.addAbbr (new Property ("Abbr",
                    PropertyType.PROPERTY,
                    PropertyArity.LIST,
                    abbrList));
        }
        // Extract the entities and add them to the metadata
        Iterator iter = getEntities (_text).iterator ();
        Utf8BlockMarker utf8BM = metadata.getUtf8BlockMarker ();
        while (iter.hasNext ()) {
            String ent = (String) iter.next ();
            metadata.addEntity (ent);
            // If it's a numerical entity, note which UTF8 block it's in
            try {
                if (ent.charAt (1) == '#') {
                    int entval = Integer.parseInt
                            (ent.substring (2, ent.length() - 1));
                    utf8BM.markBlock(entval);
                }
            }
            catch (Exception e) {
                // Any exception means it's the wrong kind of entity
            }
        }
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy