
org.dstadler.htmlunit.HtmlUnitUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-htmlunit Show documentation
Show all versions of commons-htmlunit Show documentation
Common HtmlUnit utility methods that I find useful in various projects.
The newest version!
package org.dstadler.htmlunit;
import org.htmlunit.*;
import org.htmlunit.html.*;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
public class HtmlUnitUtils {
private static final Log logger = LogFactory.getLog(HtmlUnitUtils.class);
private HtmlUnitUtils() {
}
public static WebClient createWebClient() {
return createWebClient(true);
}
public static WebClient createWebClient(boolean enableJavaScript) {
return createWebClient(enableJavaScript, BrowserVersion.FIREFOX_ESR);
}
public static WebClient createWebClient(boolean enableJavaScript, BrowserVersion browserVersion) {
logger.debug("Creating client");
// proxy for some machines
final WebClient webClient = new WebClient(browserVersion);
webClient.waitForBackgroundJavaScriptStartingBefore(1000);
webClient.getOptions().setTimeout(60000);
webClient.getOptions().setJavaScriptEnabled(enableJavaScript);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setRedirectEnabled(true); // follow old-school HTTP 302 redirects - standard behaviour
webClient.setHTMLParserListener(null);
webClient.setIncorrectnessListener((message, origin) -> {
// Swallow for now, but maybe collect it for optional retrieval?
});
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.getOptions().setThrowExceptionOnScriptError(false); // ignore script errors
return webClient;
}
public static HtmlPage getInitialPage(final WebClient webClient, final String url) throws IOException {
try {
HtmlPage page = webClient.getPage(url);
logger.debug("Page title = " + page.getTitleText());
/*
* webClient.setAjaxController(new MyAjaxController());
* page.addDomChangeListener( new MyDomChangeListener());
*/
return page;
} catch (FailingHttpStatusCodeException e) {
throw new IOException(e);
}
}
@SuppressWarnings("unchecked")
public static T getElementById(final HtmlPage page, String id, Class type) throws HtmlUnitException {
DomElement element = page.getElementById(id);
if(element == null) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new NoElementFoundException("Could not find element with id '" + id + "' on page " + page.getUrl());
}
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new WrongElementException("Expected a field with id '" + id + "' and type " + type.getName() +
", but had an element of type " + element.getClass() + " on page: " + page.getUrl());
}
return (T) element;
}
@SuppressWarnings("unchecked")
public static T getElementByName(final HtmlPage page, String name, Class type) throws HtmlUnitException {
final HtmlElement element;
try {
element = page.getElementByName(name);
} catch (ElementNotFoundException e) {
throw new NoElementFoundException("Could not find element with name '" + name + "' on page " + page.getUrl() + ": " + e);
}
/*will throw exception anyway:
if(element == null) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new ElementNotFoundException("Could not find element with name '" + name + "' on page " + page.getUrl());
}*/
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new WrongElementException("Expected a field with name '" + name + "' and type " + type.getName() +
", but had an element of type " + element.getClass() + " on page: " + page.getUrl());
}
return (T) element;
}
@SuppressWarnings("unchecked")
public static List getElementsByAttribute(final HtmlPage page, String tagName, String attribute, String value, Class type) throws HtmlUnitException {
List list = new ArrayList<>();
DomNodeList elementsByTagName = page.getElementsByTagName(tagName);
for(DomElement element : elementsByTagName) {
String attValue = element.getAttribute(attribute);
if(attValue.equals(value)) {
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new WrongElementException("Expected a field with tag '" + tagName + "', attribute '" + attribute +
"', value '" + value + "' and type " + type.getName() +
", but had an element of type " + element.getClass() + " on page: " + page.getUrl());
}
list.add((T) element);
}
}
return list;
}
@SuppressWarnings("unchecked")
public static List getElementsByAttributeContains(final HtmlPage page, String tagName, String attribute, String value, Class type) throws WrongElementException {
List list = new ArrayList<>();
DomNodeList elementsByTagName = page.getElementsByTagName(tagName);
for(DomElement element : elementsByTagName) {
String attValue = element.getAttribute(attribute);
if(attValue.contains(value)) {
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new WrongElementException("Expected a field with tag '" + tagName + "', attribute '" + attribute +
"', which contains value '" + value + "' and type " + type.getName() +
", but had an element of type " + element.getClass() + " on page: " + page.getUrl());
}
list.add((T) element);
}
}
return list;
}
@SuppressWarnings("unchecked")
public static List getElementsByTextContents(final HtmlPage page, String tagName, String text, Class type) throws WrongElementException {
List list = new ArrayList<>();
DomNodeList elementsByTagName = page.getElementsByTagName(tagName);
for(DomElement element : elementsByTagName) {
if(element.getTextContent().equals(text)) {
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Page contents (" + page.getUrl() + "): " + page.asXml());
throw new WrongElementException("Expected a field with tag '" + tagName + "', " +
"which contains text '" + text + "' and type " + type.getName() +
", but had an element of type " + element.getClass() + " on page: " + page.getUrl());
}
list.add((T) element);
}
}
return list;
}
@SuppressWarnings("unchecked")
public static T getFormElementByType(HtmlPage page, String formName, Class type) throws HtmlUnitException {
final HtmlForm form;
try {
form = page.getFormByName(formName);
} catch (ElementNotFoundException e) {
throw new NoElementFoundException("Could not find form with name '" + formName + "' on page " + page.getUrl() + ": " + e);
}
// use a stack to recursively walk into all sub-elements, not just the first level
Stack elements = new Stack<>();
Iterators.addAll(elements, form.getChildElements().iterator());
T search = null;
while(!elements.isEmpty()) {
DomElement it = elements.pop();
if(type.isAssignableFrom(it.getClass())) {
if(search != null) {
logger.warn("Form contents (" + page.getUrl() + '/' + formName + "): " + form.asXml());
throw new HtmlUnitException("Did find more than one element of type " + type.getName() + " in form '" + formName + "' on page " + page.getUrl());
}
search = (T) it;
}
Iterators.addAll(elements, it.getChildElements().iterator());
}
if(search == null) {
logger.warn("Form contents (" + page.getUrl() + '/' + formName + "): " + form.asXml());
throw new NoElementFoundException("Could not find element of type " + type.getName() + " in form '" + formName + "' on page " + page.getUrl());
}
return search;
}
@SuppressWarnings("unchecked")
public static T getFormElementByName(final HtmlForm form, String name, Class type) throws HtmlUnitException {
// use a stack to recursively walk into all sub-elements, not just the first level
Stack elements = new Stack<>();
Iterators.addAll(elements, form.getChildElements().iterator());
HtmlElement element = null;
while(!elements.isEmpty()) {
DomElement it = elements.pop();
if(it.getAttribute("name").equals(name)) {
// don't allow to find it twice
if(element != null) {
logger.warn("Form contents: " + form.asXml());
throw new HtmlUnitException("Did find more than one element with name " + name + " and type " + type.getName() + " in form.");
}
element = (HtmlElement)it;
}
Iterators.addAll(elements, it.getChildElements().iterator());
}
if(element == null) {
logger.warn("Form contents (" + form.asXml());
throw new NoElementFoundException("Could not find element with name " + name + " of type " + type.getName() + " in form '" + form.getNameAttribute());
}
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Form contents: " + form.asXml());
throw new WrongElementException("Expected a field with name '" + name + "' and type " + type.getName() +
", but had an element of type " + element.getClass());
}
return (T) element;
}
@SuppressWarnings("unchecked")
public static T getFormElementByNameAndValue(final HtmlForm form, String name, String value, Class type) throws HtmlUnitException {
// use a stack to recursively walk into all sub-elements, not just the first level
Stack elements = new Stack<>();
Iterators.addAll(elements, form.getChildElements().iterator());
HtmlElement element = null;
while(!elements.isEmpty()) {
DomElement it = elements.pop();
if(it.getAttribute("name").equals(name) && it.getAttribute("value").equals(value)) {
// don't allow to find it twice
if(element != null) {
logger.warn("Form contents: " + form.asXml());
throw new HtmlUnitException("Did find more than one element with name " + name + ", value " + value + " and type " + type.getName() + " in form.");
}
element = (HtmlElement)it;
}
Iterators.addAll(elements, it.getChildElements().iterator());
}
if(element == null) {
logger.warn("Form contents (" + form.asXml());
throw new NoElementFoundException("Could not find element with name " + name + ", value " + value + " of type " + type.getName() + " in form '" + form.getNameAttribute());
}
if(!type.isAssignableFrom(element.getClass())) {
logger.warn("Form contents: " + form.asXml());
throw new WrongElementException("Expected a field with name '" + name + "', value '" + value + "' and type " + type.getName() +
", but had an element of type " + element.getClass());
}
return (T) element;
}
@SuppressWarnings("unchecked")
public static T getFormElementByType(final HtmlForm form, Class type) throws HtmlUnitException {
// use a stack to recursively walk into all sub-elements, not just the first level
Stack elements = new Stack<>();
Iterators.addAll(elements, form.getChildElements().iterator());
HtmlElement element = null;
while(!elements.isEmpty()) {
DomElement it = elements.pop();
if(type.isAssignableFrom(it.getClass())) {
// don't allow to find it twice
if(element != null) {
logger.warn("Form contents: " + form.asXml());
throw new HtmlUnitException("Did find more than one element of type " + type.getName() + " in form.");
}
element = (HtmlElement)it;
}
Iterators.addAll(elements, it.getChildElements().iterator());
}
if(element == null) {
logger.warn("Form contents (" + form.asXml());
throw new NoElementFoundException("Could not find element of type " + type.getName() + " in form '" + form.getNameAttribute());
}
return (T) element;
}
/**
* Returns the first form on the page with the given action-attribute.
*
* @param page The page to look at
* @param action The action-attribute that the form should have.
* @return The found form, not null always
*
* @throws NoElementFoundException if no form with the given action attribute is found.
*/
public static HtmlForm getFormByAction(HtmlPage page, String action) throws HtmlUnitException {
//
© 2015 - 2025 Weber Informatics LLC | Privacy Policy