org.databene.html.HTMLUtil Maven / Gradle / Ivy
Go to download
'databene webdecs' is an open source software library for
WEB Data Extraction, Conversion and Scripting, written by Volker Bergmann.
The newest version!
/*
* (c) Copyright 2007-2010 by Volker Bergmann. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted under the terms of the
* GNU General Public License.
*
* For redistributing this software or a derivative work under a license other
* than the GPL-compatible Free Software License as defined by the Free
* Software Foundation or approved by OSI, you must first obtain a commercial
* license to this software product from Volker Bergmann.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
* REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
* HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.databene.html;
import org.databene.commons.CollectionUtil;
import org.databene.commons.SystemInfo;
import java.lang.reflect.Method;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* Provides utility methods for HTML processing.
*
* Created: 15.06.2007 19:42:19
* @author Volker Bergmann
*/
public class HTMLUtil {
private static final Set EMPTY_TAGS = CollectionUtil.toSet("br", "img", "meta", "link");
public static final String DOCTYPE_401 = "";
static final String[] BROWSERS = {
"google-chrome", "firefox", "opera", "epiphany", "konqueror", "conkeror", "midori", "kazehakase", "mozilla", "netscape", "links", "lynx" };
public static void openBrowser(String url) {
try {
// On JDK 1.6+, I can use java.awt.Desktop.getDesktop().browse().
// On older Java versions, this would not compile. Thus the reflection based invocation
Class> desktopClass = Class.forName("java.awt.Desktop");
Method browseMethod = desktopClass.getDeclaredMethod("browse", new Class[] { URI.class });
Method getDesktopMethod = desktopClass.getDeclaredMethod("getDesktop");
Object desktop = getDesktopMethod.invoke(null);
browseMethod.invoke(desktop, new Object[] { URI.create(url) });
} catch (Exception ignore) {
// older JVM version or invocation failed
// try invoking the browser in OS specific ways
try {
if (SystemInfo.isMacOsx()) {
// On Mac OS X, use FileManager.openURL(url)
Class> fileManager = Class.forName("com.apple.eio.FileManager");
Method openUrlMethod = fileManager.getDeclaredMethod( "openURL", new Class[] { String.class });
openUrlMethod.invoke(null, new Object[] { url });
} else if (SystemInfo.isWindows()) {
// On Windows, call rundll32 url.dll,FileProtocolHandler
Runtime.getRuntime().exec( "rundll32 url.dll,FileProtocolHandler " + url);
} else {
// Otherwise, assume Unix or Linux
// and try to open the browser on the command line
String browser = null;
for (String candidate : BROWSERS)
if (browser == null && Runtime.getRuntime().exec(new String[] { "which", candidate }).getInputStream().read() != -1) {
browser = candidate;
Runtime.getRuntime().exec(new String[] { candidate, url });
}
if (browser == null)
throw new RuntimeException("No browser found");
}
} catch (Exception e) {
throw new RuntimeException("Error opening web browser with URL: " + url, e);
}
}
}
public static boolean isEmptyTag(String tagName) {
return EMPTY_TAGS.contains(tagName.toLowerCase());
}
public static String unescape(String text) {
StringBuilder result = new StringBuilder(text.length());
int i;
while ((i = text.indexOf('&')) >= 0) {
HTMLEntity entity = HTMLEntity.getEntity(text, i);
if (entity != null) {
result.append(text.substring(0, i));
if ("nbsp".equals(entity.htmlCode))
result.append(' ');
else if ("ndash".equals(entity.htmlCode))
result.append('-');
else
result.append(entity.character);
text = text.substring(i + entity.htmlCode.length() + 2);
} else {
result.append(text.substring(0, i));
result.append("&");
text = text.substring(i + 1);
}
}
result.append(text);
return result.toString();
}
public static String escape(String value) {
if (value == null)
return "";
value = value.replace("&", "&"); // must be the first conversion
value = value.replace("<", "<");
value = value.replace(">", ">");
value = value.replace("'", "'"); // IE6 and IE7 do not support '
value = value.replace("\"", """);
// TODO v0.6.x replace diacritic characters
return value;
}
public static Map parseCGIParameters(String url) {
Map result = new HashMap();
int qmIndex = url.indexOf('?');
if (qmIndex >= 0)
url = url.substring(qmIndex + 1);
String[] nvPairs = url.split("&");
for (String nvPair : nvPairs) {
String[] tokens = nvPair.split("=");
result.put(tokens[0], tokens[1]);
}
return result;
}
public static String td(String text) {
return td(text, null, null);
}
public static String td(String text, String alignment) {
return td(text, alignment);
}
public static String td(String text, String alignment, String style) {
StringBuilder builder = new StringBuilder("').append(text).append(" ");
return builder.toString();
}
public static String a(String href, String text) {
return "" + text + "";
}
public static String aNewWindow(String href, String text) {
return "" + text + "";
}
}