All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.databene.html.HTMLUtil Maven / Gradle / Ivy

Go to download

'databene webdecs' is an open source software library for WEB Data Extraction, Conversion and Scripting, written by Volker Bergmann.

The newest version!
/*
 * (c) Copyright 2007-2010 by Volker Bergmann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, is permitted under the terms of the
 * GNU General Public License.
 *
 * For redistributing this software or a derivative work under a license other
 * than the GPL-compatible Free Software License as defined by the Free
 * Software Foundation or approved by OSI, you must first obtain a commercial
 * license to this software product from Volker Bergmann.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
 * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
 * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package org.databene.html;

import org.databene.commons.CollectionUtil;
import org.databene.commons.SystemInfo;

import java.lang.reflect.Method;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * Provides utility methods for HTML processing.
*
* Created: 15.06.2007 19:42:19 * @author Volker Bergmann */ public class HTMLUtil { private static final Set EMPTY_TAGS = CollectionUtil.toSet("br", "img", "meta", "link"); public static final String DOCTYPE_401 = ""; static final String[] BROWSERS = { "google-chrome", "firefox", "opera", "epiphany", "konqueror", "conkeror", "midori", "kazehakase", "mozilla", "netscape", "links", "lynx" }; public static void openBrowser(String url) { try { // On JDK 1.6+, I can use java.awt.Desktop.getDesktop().browse(). // On older Java versions, this would not compile. Thus the reflection based invocation Class desktopClass = Class.forName("java.awt.Desktop"); Method browseMethod = desktopClass.getDeclaredMethod("browse", new Class[] { URI.class }); Method getDesktopMethod = desktopClass.getDeclaredMethod("getDesktop"); Object desktop = getDesktopMethod.invoke(null); browseMethod.invoke(desktop, new Object[] { URI.create(url) }); } catch (Exception ignore) { // older JVM version or invocation failed // try invoking the browser in OS specific ways try { if (SystemInfo.isMacOsx()) { // On Mac OS X, use FileManager.openURL(url) Class fileManager = Class.forName("com.apple.eio.FileManager"); Method openUrlMethod = fileManager.getDeclaredMethod( "openURL", new Class[] { String.class }); openUrlMethod.invoke(null, new Object[] { url }); } else if (SystemInfo.isWindows()) { // On Windows, call rundll32 url.dll,FileProtocolHandler Runtime.getRuntime().exec( "rundll32 url.dll,FileProtocolHandler " + url); } else { // Otherwise, assume Unix or Linux // and try to open the browser on the command line String browser = null; for (String candidate : BROWSERS) if (browser == null && Runtime.getRuntime().exec(new String[] { "which", candidate }).getInputStream().read() != -1) { browser = candidate; Runtime.getRuntime().exec(new String[] { candidate, url }); } if (browser == null) throw new RuntimeException("No browser found"); } } catch (Exception e) { throw new RuntimeException("Error opening web browser with URL: " + url, e); } } } public static boolean isEmptyTag(String tagName) { return EMPTY_TAGS.contains(tagName.toLowerCase()); } public static String unescape(String text) { StringBuilder result = new StringBuilder(text.length()); int i; while ((i = text.indexOf('&')) >= 0) { HTMLEntity entity = HTMLEntity.getEntity(text, i); if (entity != null) { result.append(text.substring(0, i)); if ("nbsp".equals(entity.htmlCode)) result.append(' '); else if ("ndash".equals(entity.htmlCode)) result.append('-'); else result.append(entity.character); text = text.substring(i + entity.htmlCode.length() + 2); } else { result.append(text.substring(0, i)); result.append("&"); text = text.substring(i + 1); } } result.append(text); return result.toString(); } public static String escape(String value) { if (value == null) return ""; value = value.replace("&", "&"); // must be the first conversion value = value.replace("<", "<"); value = value.replace(">", ">"); value = value.replace("'", "'"); // IE6 and IE7 do not support ' value = value.replace("\"", """); // TODO v0.6.x replace diacritic characters return value; } public static Map parseCGIParameters(String url) { Map result = new HashMap(); int qmIndex = url.indexOf('?'); if (qmIndex >= 0) url = url.substring(qmIndex + 1); String[] nvPairs = url.split("&"); for (String nvPair : nvPairs) { String[] tokens = nvPair.split("="); result.put(tokens[0], tokens[1]); } return result; } public static String td(String text) { return td(text, null, null); } public static String td(String text, String alignment) { return td(text, alignment); } public static String td(String text, String alignment, String style) { StringBuilder builder = new StringBuilder("').append(text).append(""); return builder.toString(); } public static String a(String href, String text) { return "" + text + ""; } public static String aNewWindow(String href, String text) { return "" + text + ""; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy