edu.harvard.hul.ois.jhove.module.html.JHPCData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of html-hul Show documentation
Show all versions of html-hul Show documentation
HTML module developed by Harvard University Library
The newest version!
/**********************************************************************
* Jhove - JSTOR/Harvard Object Validation Environment
* Copyright 2004 by JSTOR and the President and Fellows of Harvard College
*
**********************************************************************/
package edu.harvard.hul.ois.jhove.module.html;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.module.utf8.Utf8BlockMarker;
import edu.harvard.hul.ois.jhove.module.xml.HtmlMetadata;
/** Representation of parsed HTML PCDATA.
*
* @author Gary McGath
*
*/
public class JHPCData extends JHElement {
public String _text;
/**
* Constructor.
*
* @param elements The list of parsed elements, to which
* this gets added. May be null for a stub
* element not generated by the parser.
* @param text The name of the tag
* @param line Line number, for information reporting
* @param column Line number, for information reporting
*/
public JHPCData (List elements, String text, int line, int column) {
super (elements);
_text = text;
_line = line;
_column = column;
}
/** Extracts metadata and entities from the PCData object
* and its stack context. */
protected void processPCData (HtmlStack elementStack, HtmlMetadata metadata)
{
JHOpenTag tag = elementStack.top ();
String name = tag.getName();
if ("title".equals (name)) {
metadata.setTitle (_text);
}
else if ("cite".equals (name)) {
metadata.addCitation (_text);
}
else if ("dfn".equals (name)) {
metadata.addDef (_text);
}
else if ("abbr".equals (name)) {
List abbrList = new ArrayList (2);
abbrList.add( (new Property ("Text",
PropertyType.STRING,
_text)));
Iterator iter = tag.getAttributes().iterator ();
while (iter.hasNext ()) {
String[] attr = (String []) iter.next ();
String attname = attr[0];
String attval = attr[1];
if ("title".equals (attname)) {
abbrList.add (new Property ("Title",
PropertyType.STRING,
attval));
break;
}
}
metadata.addAbbr (new Property ("Abbr",
PropertyType.PROPERTY,
PropertyArity.LIST,
abbrList));
}
// Extract the entities and add them to the metadata
Iterator iter = getEntities (_text).iterator ();
Utf8BlockMarker utf8BM = metadata.getUtf8BlockMarker ();
while (iter.hasNext ()) {
String ent = (String) iter.next ();
metadata.addEntity (ent);
// If it's a numerical entity, note which UTF8 block it's in
try {
if (ent.charAt (1) == '#') {
int entval = Integer.parseInt
(ent.substring (2, ent.length() - 1));
utf8BM.markBlock(entval);
}
}
catch (Exception e) {
// Any exception means it's the wrong kind of entity
}
}
}
}