src.java.net.htmlparser.jericho.FormFields Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.0 // Copyright (C) 2007 Martin Jericho // http://jerichohtml.sourceforge.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents a collection of {@link FormField} objects. *
file. ** This class provides the main interface for the analysis and manipulation of {@linkplain FormControl form controls}. * A
FormFields
object is a collection of {@link FormField} objects, with each form field consisting of * a group of {@linkplain FormControl form controls} having the same {@linkplain FormControl#getName() name}. ** The functionality provided by this class can be used to accomplish two main tasks: *
*
*- * Modify the submission values of the constituent form controls * for subsequent output in an {@link OutputDocument}. *
* The methods available for this purpose are:
* {@link #getValues(String) List<String> getValues(String fieldName)}
* {@link #getDataSet() Map<String,String[]> getDataSet()}
* {@link #clearValues() void clearValues()}
* {@link #setDataSet(Map) void setDataSet(Map<String,String[]>)}
* {@link #setValue(String,String) boolean setValue(String fieldName, String value)}
* {@link #addValue(String,String) boolean addValue(String fieldName, String value)}
** Although the {@link FormField} and {@link FormControl} classes provide methods for directly modifying * the submission values of individual form fields and controls, it is generally recommended to use the interface provided by this * (the
FormFields
) class unless there is a specific requirement for the lower level functionality. ** The display characteristics of individual controls, * such as whether the control is {@linkplain FormControl#setDisabled(boolean) disabled}, replaced with a simple * {@linkplain FormControlOutputStyle#DISPLAY_VALUE value}, or {@linkplain FormControlOutputStyle#REMOVE removed} altogether, * can only be set on the individual {@link FormControl} objects. * See below for information about retrieving a specific
FormControl
object from theFormFields
object. *- * Convert data from a form data set * (represented as a field data set) into a simple array format, * suitable for storage in a tabular format such as a database table or
.CSV
file. ** The methods available for this purpose are:
* {@link #getColumnLabels() String[] getColumnLabels()}
* {@link #getColumnValues(Map) String[] getColumnValues(Map)}
* {@link #getColumnValues() String[] getColumnValues()}
** The {@link Util} class contains a method called {@link Util#outputCSVLine(Writer,String[]) outputCSVLine(Writer,String[])} * which writes the
String[]
output of these methods to the specifiedWriter
in.CSV
format. ** The implementation of these methods makes use of certain properties * in the {@link FormField} class that describe the structure of the data in each field. * These properties can be utilised directly in the event that a * form data set is to be converted * from its normal format into some other type of data structure. *
* To access a specific {@link FormControl} from a
FormFields
object, use: **
*formFields.
{@link #get(String) get(fieldName)}.
{@link FormField#getFormControl() getFormControl()} * if the control is the only one with the specified {@linkplain FormControl#getName() name}, or *formFields.
{@link #get(String) get(fieldName)}.
{@link FormField#getFormControl(String) getFormControl(predefinedValue)} * to retrieve the control having the speficied {@linkplain FormControl#getPredefinedValue() predefined value} * if it is part of a {@linkplain FormField field} containing multiple controls. ** The term field data set is used in this library to refer to a data structure consisting of * a set of names (in lower case), each mapped to one or more values. * Generally, this is represented by a data type of
java.util.Map<String,String[]>
, * with the keys (names) being of typeString
and the values represented by an array containing one or more items of typeString
. * A field data set can be used to represent the data in an HTML * form data set. **
FormFields
instances are obtained using the {@link #FormFields(Collection formControls)} constructor * or by calling the {@link Segment#getFormFields()} method. ** The case sensitivity of form field names is determined by the static * {@link Config#CurrentCompatibilityMode}
.
{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. ** Examples: *
*
* @see FormField * @see FormControl */ public final class FormFields extends AbstractCollection- * Write the data received from in the current
ServletRequest
to a.CSV
file, * and then display the form populated with this data: ** Source source=new Source(htmlTextOfOriginatingForm); * FormFields formFields=source.getFormFields(); * * File csvOutputFile=new File("FormData.csv"); * boolean outputHeadings=!csvOutputFile.exists(); * Writer writer=new FileWriter(csvOutputFile,true); * if (outputHeadings) Util.outputCSVLine(writer,formFields.getColumnLabels()); * Util.outputCSVLine(writer,formFields.getColumnValues(servletRequest.getParameterMap())); * writer.close(); * * formFields.setDataSet(servletRequest.getParameterMap()); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); * outputDocument.writeTo(servletResponse.getWriter());*See also the sample program FormFieldCSVOutput.
*- Replace the initial values of controls in the form named "MyForm" with new values: *
* Source source=new Source(htmlText); * Element myForm=null; * List formElements=source.getAllElements(Tag.FORM); * for (Iterator i=formElements.iterator(); i.hasNext();) { * Element formElement=(Element)i.next(); * String formName=formElement.getAttributes().getValue("name"); * if ("MyForm".equals(formName)) { * myForm=form; * break; * } * } * FormFields formFields=myForm.getFormFields(); * formFields.clearValues(); // clear any values that might be set in the source document * formFields.addValue("Name","Humphrey Bear"); * formFields.addValue("MailingList","A"); * formFields.addValue("MailingList","B"); * formFields.addValue("FavouriteFare","honey"); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); * String newHtmlText=outputDocument.toString();*See also the sample program FormFieldSetValues.
*- Change the display characteristics of individual controls: *
* Source source=new Source(htmlText); * FormFields formFields=source.getFormFields(); * // disable some controls: * formFields.get("Password").getFormControl().setDisabled(true); * FormField mailingListFormField=formFields.get("MailingList"); * mailingListFormField.setValue("C"); * mailingListFormField.getFormControl("C").setDisabled(true); * mailingListFormField.getFormControl("D").setDisabled(true); * // remove some controls: * formFields.get("button1").getFormControl().setOutputStyle(FormControlOutputStyle.REMOVE); * FormControl rhubarbFormControl=formFields.get("FavouriteFare").getFormControl("rhubarb"); * rhubarbFormControl.setOutputStyle(FormControlOutputStyle.REMOVE); * // set some controls to display value: * formFields.setValue("Address","The Lodge\nDeakin ACT 2600\nAustralia"); * formFields.get("Address").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); * FormField favouriteSportsFormField=formFields.get("FavouriteSports"); * favouriteSportsFormField.setValue("BB"); * favouriteSportsFormField.addValue("AFL"); * favouriteSportsFormField.getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); * OutputDocument outputDocument=new OutputDocument(source); * outputDocument.replace(formFields); // adds all segments necessary to effect changes * String newHtmlText=outputDocument.toString();*See also the sample program FormControlDisplayCharacteristics.
*{ private final LinkedHashMap map=new LinkedHashMap (); private final ArrayList formControls=new ArrayList (); /** * Constructs a new FormFields
object consisting of the specified {@linkplain FormControl form controls}. * @param formControls a collection of {@link FormControl} objects. * @see Segment#getFormFields() */ public FormFields(final CollectionformControls) { // Passing "this" as a parameter inside a constructor used to cause some strange problems back in java 1.0, // but it seems to work here and there is no explicit mention in the Java language spec about any potential problems. // The alternative is an ugly static FormFields constructFrom(List formControls) method. for (FormControl formControl : formControls) { if (formControl.getName()!=null && formControl.getName().length()!=0) { formControl.addToFormFields(this); this.formControls.add(formControl); } } } /** * Returns the number of FormField
objects. * @return the number ofFormField
objects. */ public int getCount() { return map.size(); } /** * Returns the number ofFormField
objects. ** This is equivalent to {@link #getCount()}, * and is necessary to for the implementation of the
java.util.Collection
interface. * * @return the number ofFormField
objects. */ public int size() { return getCount(); } /** * Returns theFormField
with the specified {@linkplain FormField#getName() name}. ** The case sensitivity of the
fieldName
argument is determined by the static * {@link Config#CurrentCompatibilityMode}.
{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. * * @param fieldName the name of theFormField
to get. * @return theFormField
with the specified {@linkplain FormField#getName() name}, ornull
if noFormField
with the specified name exists. */ public FormField get(String fieldName) { if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase(); return map.get(fieldName); } /** * Returns an iterator over the {@link FormField} objects in the collection. ** The order in which the form fields are iterated corresponds to the order of appearance * of each form field's first {@link FormControl} in the source document. *
* If this
FormFields
object has been {@linkplain #merge(FormFields) merged} with another, * the ordering is no longer defined. * * @return an iterator over the {@link FormField} objects in the collection. */ public Iteratoriterator() { return map.values().iterator(); } /** * Returns a list of the field submission values of all the specified constituent {@linkplain FormField form fields} with the specified {@linkplain FormField#getName() name}. * * All objects in the returned list are of type
String
, with nonull
entries. ** This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#getValues() getValues()}, * assuming that a field with the specified name exists in this collection. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @return a list of the field submission values of all the specified constituent {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}, ornull
if no form field with this name exists. * @see FormField#getValues() */ public ListgetValues(final String fieldName) { final FormField formField=get(fieldName); return formField==null ? null : formField.getValues(); } /** * Returns the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. * * The values in the map returned by this method are represented as a string array, giving the map a format consistent with the *
javax.servlet.ServletRequest.getParameterMap()
* method. ** Only the {@linkplain FormField#getName() names} of form fields with at least one {@linkplain FormField#getValues() value} * are included in the map, meaning every
String[]
is guaranteed to have at least one entry. ** Iterating over the map keys returns them in the order of appearance in the source document. * * @return the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. * @see #setDataSet(Map) */ public Map
getDataSet() { final LinkedHashMap map=new LinkedHashMap ((int)(getCount()/0.7)); for (FormField formField : this) { final List values=formField.getValues(); if (values.isEmpty()) continue; map.put(formField.getName(),values.toArray(new String[values.size()])); } return map; } /** * Clears the submission values of all the constituent {@linkplain #getFormControls() form controls}. * @see FormControl#clearValues() */ public void clearValues() { for (FormControl formControl : formControls) formControl.clearValues(); } /** * Sets the submission values of all the constituent * {@linkplain FormControl form controls} to match the data in the specified field data set. * * The map keys must be
String
{@linkplain FormField#getName() field names}, * with each map value an array ofString
objects containing the field's new {@linkplain FormField#setValues(Collection) values}. ** The map returned by the *
javax.servlet.ServletRequest.getParameterMap()
* method has a suitable format for use with this method. ** All existing values are {@linkplain #clearValues() cleared} before the values from the field data set are added. *
* Any map entries with a
null
value are ignored. * * @param dataSet the field data set containing the new {@linkplain FormField#setValues(Collection) values} of the constituent form fields. * @see #getDataSet() */ public void setDataSet(final MapdataSet) { clearValues(); if (map==null) return; for (Map.Entry entry : dataSet.entrySet()) { final String fieldName=entry.getKey(); final FormField formField=get(fieldName); if (formField!=null) formField.addValues(entry.getValue()); } } /** * Sets the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name} to the single specified value. * * This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#setValue(String) setValue(value)}, * assuming that a field with the specified name exists in this collection. ** The return value indicates whether the specified form field "accepted" the value. * A return value of
false
implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value of the specified field, ornull
to {@linkplain FormField#clearValues() clear} the field of all submission values. * @returntrue
if a field of the specified name exists in this collection and it accepts the specified value, otherwisefalse
. */ public boolean setValue(final String fieldName, final String value) { final FormField formField=get(fieldName); return formField==null ? false : formField.setValue(value); } /** * Adds the specified value to the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}. ** This is equivalent to {@link #get(String) get(fieldName)}
.
{@link FormField#addValue(String) addValue(value)}, * assuming that a field with the specified name exists in this collection. ** The return value indicates whether the specified form field "accepted" the value. * A return value of
false
implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value to add to the specified field, must not benull
. * @returntrue
if a field of the specified name exists in this collection and it accepts the specified value, otherwisefalse
. */ public boolean addValue(final String fieldName, final String value) { final FormField formField=get(fieldName); return formField==null ? false : formField.addValue(value); } /** * Returns a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. ** Instead of using the {@linkplain FormField#getName() name} of each constituent form field to construct the labels, * the {@linkplain FormControl#getName() name} of the first {@linkplain FormControl form control} from each form field is used. * This allows the labels to be constructed using the names with the original case from the source document rather than * unsing the all lower case names of the form fields. *
* See the documentation of the {@link #getColumnValues(Map)} method for more details. * * @return a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. * @see Util#outputCSVLine(Writer,String[]) */ public String[] getColumnLabels() { initColumns(); final String[] columnLabels=new String[columns.length]; for (int i=0 ; i
field data set into a simple string array, * suitable for storage in a tabular format such as a database table or .CSV
file. ** The conversion is performed in a way that allows the multiple values of certain fields to be stored in separate columns, * by analysing the possible form data sets * that can be generated from the constituent {@linkplain #getFormControls() form controls}. *
* The column labels and values are determined as follows: *
*
*
*- * For each {@linkplain FormField form field} in this collection (taken in {@linkplain #iterator() iterator} order): *
*
*- * If the form field has no {@linkplain FormField#getPredefinedValues() predefined values}, * such as a single {@linkplain FormControlType#TEXT text control}, then: *
*
*- * Add a single column: *
*
* In the unlikely event that this field contains more than one value, all values are included in this one column and * separated by the text defined in the static {@link Config#ColumnMultipleValueSeparator} property. *{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the single value mapped to this field in the specified field data set. * - * Otherwise, if the form field does have {@linkplain FormField#getPredefinedValues() predefined values}, * but does not {@linkplain FormField#allowsMultipleValues() allow multiple values}, then: *
*
*- * If the form field has only one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a single {@linkplain FormControlType#CHECKBOX checkbox}, then: *
*
*- * Add a single boolean column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} * - * Otherwise, if the form field has more than one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a set of {@linkplain FormControlType#RADIO radio buttons}, then: *
*
*- * Add a single column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: the single value mapped to this field in the specified field data set, * which in the case of a set of radio buttons should be the {@linkplain FormControl#getPredefinedValue() predefined value} * of the {@linkplain FormControl#isChecked() checked} radio button. * - * Otherwise, if the form field has {@linkplain FormField#getPredefinedValues() predefined values} * and {@linkplain FormField#allowsMultipleValues() allows multiple values}, * such as a set of {@linkplain FormControlType#CHECKBOX checkboxes}, then: *
*
*- * For each {@linkplain FormField#getPredefinedValues() predefined value} in the form field: *
*
*- * Add a boolean column: *
*
*{@linkplain #getColumnLabels() Label}: " FieldName.PredefinedValue
", * whereFieldName
is the {@linkplain FormField#getName() name} of the form field in original case, * andPredefinedValue
is the {@linkplain FormField#getPredefinedValues() predefined value}. *Value: the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} * - * In addition, if the form field can also contain user values ({@link FormField#getUserValueCount()}
>0
), then: **
*- * Add another column: *
*
*{@linkplain #getColumnLabels() Label}: the {@linkplain FormField#getName() name} of the form field in original case * Value: all values mapped to this field in the specified field data set * that do not match any of the {@linkplain FormField#getPredefinedValues() predefined values}, * separated by the text defined in the static {@link Config#ColumnMultipleValueSeparator} property. * * The sample program FormFieldCSVOutput demonstrates the use of this method and its output. * * @param dataSet a field data set containing the data to convert. * @return the data values in the specified field data set in the form of a simple string array. * @see Util#outputCSVLine(Writer,String[]) * @see #getColumnLabels() * @see #getColumnValues() */ public String[] getColumnValues(final Map
dataSet) { initColumns(); final String[] columnValues=new String[columns.length]; if (Config.ColumnValueFalse!=null) { // initialise all boolean columns with false string for (int i=0; i entry : dataSet.entrySet()) { final String fieldName=entry.getKey(); final FormField formField=get(fieldName); if (formField!=null) { final int columnIndex=formField.columnIndex; for (String value : entry.getValue()) { for (int ci=columnIndex; ci .CSV * This is equivalent to {@link #getColumnValues(Map) getColumnValues}
(
{@link #getDataSet()})
. * * @return all the {@linkplain FormField#getValues() form submission values} of the constituent form fields in the form of a simple string array. */ public String[] getColumnValues() { return getColumnValues(getDataSet()); } private void initColumns() { if (columns!=null) return; final ArrayListcolumnList=new ArrayList (); for (FormField formField : this) { formField.columnIndex=columnList.size(); if (!formField.allowsMultipleValues() || formField.getPredefinedValues().isEmpty()) { columnList.add(new Column(formField,formField.getPredefinedValues().size()==1,null)); } else { // add a column for every predefined value for (String predefinedValue : formField.getPredefinedValues()) columnList.add(new Column(formField,true,predefinedValue)); if (formField.getUserValueCount()>0) columnList.add(new Column(formField,false,null)); // add a column for user values, must come after predefined values for algorithm in getColumnValues to work } } columns=columnList.toArray(new Column[columnList.size()]); } private Column[] columns=null; private static class Column { public FormField formField; public boolean isBoolean; public String predefinedValue; public Column(final FormField formField, final boolean isBoolean, final String predefinedValue) { this.formField=formField; this.isBoolean=isBoolean; this.predefinedValue=predefinedValue; } } /** * Returns a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. * @return a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. */ public List getFormControls() { return formControls; } /** * Merges the specified FormFields
into thisFormFields
collection. * This is useful if a full collection of possible form fields is required from multiple {@linkplain Source source} documents. ** If both collections contain a
FormField
with the same {@linkplain FormField#getName() name}, * the resultingFormField
has the following properties: **
*- {@link FormField#getUserValueCount() getUserValueCount()} : the maximum user value count from both form fields
*- {@link FormField#allowsMultipleValues() allowsMultipleValues()} :
*true
if either form field allows multiple values- {@link FormField#getPredefinedValues() getPredefinedValues()} : the union of predefined values in both form fields
*- {@link FormField#getFormControls() getFormControls()} : the union of {@linkplain FormControl form controls} from both form fields
** NOTE: Some underlying data structures may end up being shared between the two merged
FormFields
collections. */ public void merge(final FormFields formFields) { for (FormField formField : formFields) { final String fieldName=formField.getName(); final FormField existingFormField=get(fieldName); if (existingFormField==null) map.put(formField.getName(),formField); else existingFormField.merge(formField); } } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); for (FormField formField : this) sb.append(formField); return sb.toString(); } /** * Returns a string representation of this object useful for debugging purposes. ** This is equivalent to {@link #getDebugInfo()}. * * @return a string representation of this object useful for debugging purposes. */ public String toString() { return getDebugInfo(); } void add(final FormControl formControl) { add(formControl,formControl.getPredefinedValue()); } void add(final FormControl formControl, final String predefinedValue) { add(formControl,predefinedValue,formControl.name); } void addName(final FormControl formControl, final String fieldName) { add(formControl,null,fieldName); } void add(final FormControl formControl, final String predefinedValue, String fieldName) { if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase(); FormField formField=(FormField)map.get(fieldName); if (formField==null) { formField=new FormField(fieldName); map.put(formField.getName(),formField); } formField.addFormControl(formControl,predefinedValue); } void replaceInOutputDocument(final OutputDocument outputDocument) { for (FormControl formControl : formControls) outputDocument.replace(formControl); } }