au.id.jericho.lib.html.FormFields Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 2.4 // Copyright (C) 2007 Martin Jericho // http://jerichohtml.sourceforge.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package au.id.jericho.lib.html; import java.util.*; /** * Represents a collection of {@link FormField} objects. *
file. ** This class provides the main interface for the analysis and manipulation of {@linkplain FormControl form controls}. * A
FormFields
object is a collection of {@link FormField} objects, with each form field consisting of * a group of {@linkplain FormControl form controls} having the same {@linkplain FormControl#getName() name}. ** The functionality provided by this class can be used to accomplish two main tasks: *
*
*- * Modify the submission values of the constituent form controls * for subsequent output in an {@link OutputDocument}. *
* The methods available for this purpose are:
* {@link #getValues(String) Collection getValues(String fieldName)}
* {@link #getDataSet() Map getDataSet()}
* {@link #clearValues() void clearValues()}
* {@link #setDataSet(Map) void setDataSet(Map)}
* {@link #setValue(String,CharSequence) boolean setValue(String fieldName, CharSequence value)}
* {@link #addValue(String,CharSequence) boolean addValue(String fieldName, CharSequence value)}
** Although the {@link FormField} and {@link FormControl} classes provide methods for directly modifying * the submission values of individual form fields and controls, it is generally recommended to use the interface provided by this * (the
FormFields
) class unless there is a specific requirement for the lower level functionality. ** The display characteristics of individual controls, * such as whether the control is {@linkplain FormControl#setDisabled(boolean) disabled}, replaced with a simple * {@linkplain FormControlOutputStyle#DISPLAY_VALUE value}, or {@linkplain FormControlOutputStyle#REMOVE removed} altogether, * can only be set on the individual {@link FormControl} objects. * See below for information about retrieving a specific
FormControl
object from theFormFields
object. *- * Convert data from a form data set * (represented as a field data set) into a simple array format, * suitable for storage in a tabular format such as a database table or
.CSV
file. ** The methods available for this purpose are:
* {@link #getColumnLabels() String[] getColumnLabels()}
* {@link #getColumnValues(Map) String[] getColumnValues(Map)}
* {@link #getColumnValues() String[] getColumnValues()}
** The {@link Util} class contains a method called {@link Util#outputCSVLine(Writer,String[]) outputCSVLine(Writer,String[])} * which writes the
String[]
output of these methods to the specifiedWriter
in.CSV
format. ** The implementation of these methods makes use of certain properties * in the {@link FormField} class that describe the structure of the data in each field. * These properties can be utilised directly in the event that a * form data set is to be converted * from its normal format into some other type of data structure. *
* To access a specific {@link FormControl} from a
FormFields
object, use: **
*formFields.
{@link #get(String) get(fieldName)}.
{@link FormField#getFormControl() getFormControl()} * if the control is the only one with the specified {@linkplain FormControl#getName() name}, or *formFields.
{@link #get(String) get(fieldName)}.
{@link FormField#getFormControl(String) getFormControl(predefinedValue)} * to retrieve the control having the speficied {@linkplain FormControl#getPredefinedValue() predefined value} * if it is part of a {@linkplain FormField field} containing multiple controls. ** The term field data set is used in this library to refer to a data structure consisting of * a set of names (in lower case), each mapped to one or more values. * Generally, this is represented by a
java.util.Map
with the keys (names) being of typeString
and the * values represented by either an array or collection containing one or more items of typeCharSequence
. * A field data set can be used to represent the data in an HTML * form data set. **
FormFields
instances are obtained using the {@link #FormFields(Collection formControls)} constructor * or by calling the {@link Segment#findFormFields()} method. ** The case sensitivity of form field names is determined by the * {@link Config#CurrentCompatibilityMode}
.
{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. ** Examples: *
*
* @see FormField * @see FormControl */ public final class FormFields extends AbstractCollection { private final LinkedHashMap map=new LinkedHashMap(); private final ArrayList formControls=new ArrayList(); /** * Constructs a new- * Write the data received from in the current
ServletRequest
to a.CSV
file, * and then display the form populated with this data: ** Source source=new Source(htmlTextOfOriginatingForm); * FormFields formFields=source.findFormFields(); * * File csvOutputFile=new File("FormData.csv"); * boolean outputHeadings=!csvOutputFile.exists(); * Writer writer=new FileWriter(csvOutputFile,true); * if (outputHeadings) Util.outputCSVLine(writer,formFields.getColumnLabels()); * Util.outputCSVLine(writer,formFields.getColumnValues(servletRequest.getParameterMap())); * writer.close(); * * formFields.setDataSet(servletRequest.getParameterMap()); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); * outputDocument.writeTo(servletResponse.getWriter());*See also the sample program FormFieldCSVOutput.
*- Replace the initial values of controls in the form named "MyForm" with new values: *
* Source source=new Source(htmlText); * Element myForm=null; * List formElements=source.findAllElements(Tag.FORM); * for (Iterator i=formElements.iterator(); i.hasNext();) { * Element formElement=(Element)i.next(); * String formName=formElement.getAttributes().getValue("name"); * if ("MyForm".equals(formName)) { * myForm=form; * break; * } * } * FormFields formFields=myForm.findFormFields(); * formFields.clearValues(); // clear any values that might be set in the source document * formFields.addValue("Name","Humphrey Bear"); * formFields.addValue("MailingList","A"); * formFields.addValue("MailingList","B"); * formFields.addValue("FavouriteFare","honey"); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); * String newHtmlText=outputDocument.toString();*See also the sample program FormFieldSetValues.
*- Change the display characteristics of individual controls: *
* Source source=new Source(htmlText); * FormFields formFields=source.findFormFields(); * // disable some controls: * formFields.get("Password").getFormControl().setDisabled(true); * FormField mailingListFormField=formFields.get("MailingList"); * mailingListFormField.setValue("C"); * mailingListFormField.getFormControl("C").setDisabled(true); * mailingListFormField.getFormControl("D").setDisabled(true); * // remove some controls: * formFields.get("button1").getFormControl().setOutputStyle(FormControlOutputStyle.REMOVE); * FormControl rhubarbFormControl=formFields.get("FavouriteFare").getFormControl("rhubarb"); * rhubarbFormControl.setOutputStyle(FormControlOutputStyle.REMOVE); * // set some controls to display value: * formFields.setValue("Address","The Lodge\nDeakin ACT 2600\nAustralia"); * formFields.get("Address").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); * FormField favouriteSportsFormField=formFields.get("FavouriteSports"); * favouriteSportsFormField.setValue("BB"); * favouriteSportsFormField.addValue("AFL"); * favouriteSportsFormField.getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); // adds all segments necessary to effect changes * String newHtmlText=outputDocument.toString();*See also the sample program FormControlDisplayCharacteristics.
*FormFields
object consisting of the specified {@linkplain FormControl form controls}. * @param formControls a collection of {@link FormControl} objects. * @see Segment#findFormFields() */ public FormFields(final Collection formControls) { // Passing "this" as a parameter inside a constructor used to cause some strange problems back in java 1.0, // but it seems to work here and there is no explicit mention in the Java language spec about any potential problems. // The alternative is an ugly static FormFields constructFrom(List formControls) method. for (final Iterator i=formControls.iterator(); i.hasNext();) { final FormControl formControl=(FormControl)i.next(); if (formControl.getName()!=null && formControl.getName().length()!=0) { formControl.addToFormFields(this); this.formControls.add(formControl); } } } /** * Returns the number ofFormField
objects. * @return the number ofFormField
objects. */ public int getCount() { return map.size(); } /** * Returns the number ofFormField
objects. ** This is equivalent to {@link #getCount()}, * and is necessary to for the implementation of the
java.util.Collection
interface. * * @return the number ofFormField
objects. */ public int size() { return getCount(); } /** * Returns theFormField
with the specified {@linkplain FormField#getName() name}. ** The case sensitivity of the
fieldName
argument is determined by the * {@link Config#CurrentCompatibilityMode}.
{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. * * @param fieldName the name of theFormField
to get. * @return theFormField
with the specified {@linkplain FormField#getName() name}, ornull
if noFormField
with the specified name exists. */ public FormField get(String fieldName) { if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase(); return (FormField)map.get(fieldName); } /** * Returns an iterator over the {@link FormField} objects in the collection. ** The order in which the form fields are iterated corresponds to the order of appearance * of each form field's first {@link FormControl} in the source document. *
* If this
FormFields
object has been {@linkplain #merge(FormFields) merged} with another, * the ordering is no longer defined. * * @return an iterator over the {@link FormField} objects in the collection. */ public Iterator iterator() { return map.values().iterator(); } /** * Returns a collection of the field submission values of all the specified constituent {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}. ** All objects in the returned collection are of type
CharSequence
, with nonull
entries. ** This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#getValues() getValues()}, * assuming that a field with the specified name exists in this collection. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @return a collection of the field submission values of all the specified constituent {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}, ornull
if no form field with this name exists. * @see FormField#getValues() */ public Collection getValues(final String fieldName) { final FormField formField=get(fieldName); return formField==null ? null : formField.getValues(); } /** * Returns the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. ** The values in the map returned by this method are represented as a string array, giving the map a format consistent with the *
javax.servlet.ServletRequest.getParameterMap()
* method. ** Only the {@linkplain FormField#getName() names} of form fields with at least one {@linkplain FormField#getValues() value} * are included in the map, meaning every
String[]
is guaranteed to have at least one entry. * * @return the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. * @see #setDataSet(Map) */ public Map getDataSet() { final HashMap map=new HashMap((int)(getCount()/0.7)); for (final Iterator i=iterator(); i.hasNext();) { final FormField formField=(FormField)i.next(); final Collection values=formField.getValues(); if (values.isEmpty()) continue; final String[] valuesArray=new String[values.size()]; final Iterator valuesIterator=values.iterator(); for (int x=0; xsubmission values of all the constituent {@linkplain #getFormControls() form controls}. * @see FormControl#clearValues() */ public void clearValues() { for (final Iterator i=formControls.iterator(); i.hasNext();) ((FormControl)i.next()).clearValues(); } /** * Sets the submission values of all the constituent * {@linkplain FormControl form controls} to match the data in the specified field data set. * * The map keys must be
String
{@linkplain FormField#getName() field names}, with each map value either an array or *Collection
ofCharSequence
objects containing the field's new * {@linkplain FormField#setValues(Collection) values}. ** The map returned by the *
javax.servlet.ServletRequest.getParameterMap()
* method has a suitable format for use with this method. ** All existing values are {@linkplain #clearValues() cleared} before the values from the field data set are added. *
* Any map entries with a
null
value are ignored. * * @param dataSet the field data set containing the new {@linkplain FormField#setValues(Collection) values} of the constituent form fields. * @see #getDataSet() */ public void setDataSet(final Map dataSet) { clearValues(); if (map==null) return; for (final Iterator i=dataSet.entrySet().iterator(); i.hasNext();) { final Map.Entry entry=(Map.Entry)i.next(); final String fieldName=entry.getKey().toString(); final FormField formField=get(fieldName); if (formField!=null) { if (entry.getValue() instanceof Collection) formField.addValues((Collection)entry.getValue()); else formField.addValues((CharSequence[])entry.getValue()); } } } /** * Sets the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name} to the single specified value. ** This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#setValue(CharSequence) setValue(value)}, * assuming that a field with the specified name exists in this collection. ** The return value indicates whether the specified form field "accepted" the value. * A return value of
false
implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value of the specified field, ornull
to {@linkplain FormField#clearValues() clear} the field of all submission values. * @returntrue
if a field of the specified name exists in this collection and it accepts the specified value, otherwisefalse
. */ public boolean setValue(final String fieldName, final CharSequence value) { final FormField formField=get(fieldName); return formField==null ? false : formField.setValue(value); } /** * Adds the specified value to the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}. ** This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#addValue(CharSequence) addValue(value)}, * assuming that a field with the specified name exists in this collection. ** The return value indicates whether the specified form field "accepted" the value. * A return value of
false
implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value to add to the specified field, must not benull
. * @returntrue
if a field of the specified name exists in this collection and it accepts the specified value, otherwisefalse
. */ public boolean addValue(final String fieldName, final CharSequence value) { final FormField formField=get(fieldName); return formField==null ? false : formField.addValue(value); } /** * Returns a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. ** Instead of using the {@linkplain FormField#getName() name} of each constituent form field to construct the labels, * the {@linkplain FormControl#getName() name} of the first {@linkplain FormControl form control} from each form field is used. * This allows the labels to be constructed using the names with the original case from the source document rather than * unsing the all lower case names of the form fields. *
* See the documentation of the {@link #getColumnValues(Map)} method for more details. * * @return a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. * @see Util#outputCSVLine(Writer,String[]) */ public String[] getColumnLabels() { initColumns(); final String[] columnLabels=new String[columns.length]; for (int i=0; i
field data set into a simple string array, * suitable for storage in a tabular format such as a database table or .CSV
file. ** The conversion is performed in a way that allows the multiple values of certain fields to be stored in separate columns, * by analysing the possible form data sets * that can be generated from the constituent {@linkplain #getFormControls() form controls}. *
* The column labels and values are determined as follows: *
*
*
*- * For each {@linkplain FormField form field} in this collection (taken in {@linkplain #iterator() iterator} order): *
*
*- * If the form field has no {@linkplain FormField#getPredefinedValues() predefined values}, * such as a single {@linkplain FormControlType#TEXT text control}, then: *
*
*- * Add a single column: *
*
* In the unlikely event that this field contains more than one value, all values are included in this one column and * separated by the text defined in the {@link Config#ColumnMultipleValueSeparator} property. *{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the single value mapped to this field in the specified field data set. * - * Otherwise, if the form field does have {@linkplain FormField#getPredefinedValues() predefined values}, * but does not {@linkplain FormField#allowsMultipleValues() allow multiple values}, then: *
*
*- * If the form field has only one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a single {@linkplain FormControlType#CHECKBOX checkbox}, then: *
*
*- * Add a single boolean column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} * - * Otherwise, if the form field has more than one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a set of {@linkplain FormControlType#RADIO radio buttons}, then: *
*
*- * Add a single column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the single value mapped to this field in the specified field data set, * which in the case of a set of radio buttons should be the {@linkplain FormControl#getPredefinedValue() predefined value} * of the {@linkplain FormControl#isChecked() checked} radio button. * - * Otherwise, if the form field has {@linkplain FormField#getPredefinedValues() predefined values} * and {@linkplain FormField#allowsMultipleValues() allows multiple values}, * such as a set of {@linkplain FormControlType#CHECKBOX checkboxes}, then: *
*
*- * For each {@linkplain FormField#getPredefinedValues() predefined value} in the form field: *
*
*- * Add a boolean column: *
*
*{@linkplain #getColumnLabels() Label}: " FieldName.PredefinedValue
", * whereFieldName
is the {@linkplain FormField#getName() name} of the form field in original case, * andPredefinedValue
is the {@linkplain FormField#getPredefinedValues() predefined value}. *Value: the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} * - * In addition, if the form field can also contain user values ({@link FormField#getUserValueCount()}
>0
), then: **
*- * Add another column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: all values mapped to this field in the specified field data set * that do not match any of the {@linkplain FormField#getPredefinedValues() predefined values}, * separated by the text defined in the {@link Config#ColumnMultipleValueSeparator} property. * * The sample program FormFieldCSVOutput demonstrates the use of this method and its output. * * @param dataSet a field data set containing the data to convert. * @return the data values in the specified field data set in the form of a simple string array. * @see Util#outputCSVLine(Writer,String[]) * @see #getColumnLabels() * @see #getColumnValues() */ public String[] getColumnValues(final Map dataSet) { initColumns(); final String[] columnValues=new String[columns.length]; if (Config.ColumnValueFalse!=null) { // initialise all boolean columns with false string for (int i=0; i
.CSV * This is equivalent to {@link #getColumnValues(Map) getColumnValues}
(
{@link #getDataSet()})
. * * @return all the {@linkplain FormField#getValues() form submission values} of the constituent form fields in the form of a simple string array. */ public String[] getColumnValues() { return getColumnValues(getDataSet()); } private void initColumns() { if (columns!=null) return; final ArrayList columnList=new ArrayList(); for (final Iterator i=iterator(); i.hasNext();) { final FormField formField=(FormField)i.next(); formField.columnIndex=columnList.size(); if (!formField.allowsMultipleValues() || formField.getPredefinedValues().isEmpty()) { columnList.add(new Column(formField,formField.getPredefinedValues().size()==1,null)); } else { // add a column for every predefined value for (final Iterator pvi=formField.getPredefinedValues().iterator(); pvi.hasNext();) columnList.add(new Column(formField,true,(String)pvi.next())); if (formField.getUserValueCount()>0) columnList.add(new Column(formField,false,null)); // add a column for user values, must come after predefined values for algorithm in getColumnValues to work } } columns=(Column[])columnList.toArray(new Column[columnList.size()]); } private Column[] columns=null; private static class Column { public FormField formField; public boolean isBoolean; public String predefinedValue; public Column(final FormField formField, final boolean isBoolean, final String predefinedValue) { this.formField=formField; this.isBoolean=isBoolean; this.predefinedValue=predefinedValue; } } /** * Returns a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. * @return a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. */ public List getFormControls() { return formControls; } /** * Merges the specifiedFormFields
into thisFormFields
collection. * This is useful if a full collection of possible form fields is required from multiple {@linkplain Source source} documents. ** If both collections contain a
FormField
with the same {@linkplain FormField#getName() name}, * the resultingFormField
has the following properties: *
-
*
- {@link FormField#getUserValueCount() getUserValueCount()} : the maximum user value count from both form fields *
- {@link FormField#allowsMultipleValues() allowsMultipleValues()} :
true
if either form field allows multiple values
* - {@link FormField#getPredefinedValues() getPredefinedValues()} : the union of predefined values in both form fields *
- {@link FormField#getFormControls() getFormControls()} : the union of {@linkplain FormControl form controls} from both form fields *
* NOTE: Some underlying data structures may end up being shared between the two merged FormFields
collections.
*/
public void merge(final FormFields formFields) {
for (final Iterator i=formFields.iterator(); i.hasNext();) {
final FormField formField=(FormField)i.next();
final String fieldName=formField.getName();
final FormField existingFormField=get(fieldName);
if (existingFormField==null)
add(formField);
else
existingFormField.merge(formField);
}
}
/**
* Returns a string representation of this object useful for debugging purposes.
* @return a string representation of this object useful for debugging purposes.
*/
public String getDebugInfo() {
final StringBuffer sb=new StringBuffer();
for (final Iterator i=iterator(); i.hasNext();) {
sb.append(i.next());
}
return sb.toString();
}
/**
* Returns a string representation of this object useful for debugging purposes.
*
* This is equivalent to {@link #getDebugInfo()}.
*
* @return a string representation of this object useful for debugging purposes.
*/
public String toString() {
return getDebugInfo();
}
void add(final FormControl formControl) {
add(formControl,formControl.getPredefinedValue());
}
void add(final FormControl formControl, final String predefinedValue) {
add(formControl,predefinedValue,formControl.name);
}
void addName(final FormControl formControl, final String fieldName) {
add(formControl,null,fieldName);
}
void add(final FormControl formControl, final String predefinedValue, String fieldName) {
if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase();
FormField formField=(FormField)map.get(fieldName);
if (formField==null) {
formField=new FormField(fieldName);
add(formField);
}
formField.addFormControl(formControl,predefinedValue);
}
void replaceInOutputDocument(final OutputDocument outputDocument) {
for (final Iterator i=formControls.iterator(); i.hasNext();)
outputDocument.replace((FormControl)i.next());
}
private void add(final FormField formField) {
map.put(formField.getName(),formField);
}
}