com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com.ibm.wala.cast.js Show documentation
Show all versions of com.ibm.wala.cast.js Show documentation
T. J. Watson Libraries for Analysis
The newest version!
/*
* Copyright (c) 2002 - 2011 IBM Corporation.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*/
package com.ibm.wala.cast.js.html.jericho;
import com.ibm.wala.cast.ir.translator.TranslatorToCAst;
import com.ibm.wala.cast.js.html.IHtmlCallback;
import com.ibm.wala.cast.js.html.IHtmlParser;
import com.ibm.wala.core.util.warnings.Warning;
import com.ibm.wala.util.collections.HashSetFactory;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.util.List;
import java.util.Set;
import net.htmlparser.jericho.Config;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Logger;
import net.htmlparser.jericho.LoggerProvider;
import net.htmlparser.jericho.Source;
/**
* @author danielk Uses the Jericho parser to go over the HTML
*/
public class JerichoHtmlParser implements IHtmlParser {
static Set warnings = HashSetFactory.make();
static {
class CAstLoggerProvider implements LoggerProvider {
@Override
public Logger getLogger(String arg0) {
class CAstLogger implements Logger {
@Override
public void debug(String arg0) {
// TODO Auto-generated method stub
}
@Override
public void error(final String arg0) {
warnings.add(
new Warning() {
@Override
public String getMsg() {
return arg0;
}
});
}
@Override
public void info(String arg0) {
// TODO Auto-generated method stub
}
@Override
public boolean isDebugEnabled() {
return true;
}
@Override
public boolean isErrorEnabled() {
return true;
}
@Override
public boolean isInfoEnabled() {
return true;
}
@Override
public boolean isWarnEnabled() {
return true;
}
@Override
public void warn(String arg0) {
// TODO Auto-generated method stub
}
}
return new CAstLogger();
}
}
Config.LoggerProvider = new CAstLoggerProvider();
}
@Override
public void parse(URL url, Reader reader, IHtmlCallback callback, String fileName)
throws TranslatorToCAst.Error {
warnings.clear();
Parser parser = new Parser(callback, fileName);
Source src;
try {
src = new Source(reader);
src.setLogger(Config.LoggerProvider.getLogger(fileName));
List childElements = src.getChildElements();
for (Element e : childElements) {
parser.parse(e);
}
if (!warnings.isEmpty()) {
throw new TranslatorToCAst.Error(warnings);
}
} catch (IOException e) {
System.err.println("Error parsing file: " + e.getMessage());
}
}
/**
* @author danielk Inner class does the actual traversal of the HTML using recursion
*/
private static class Parser {
private final IHtmlCallback handler;
private final String fileName;
public Parser(IHtmlCallback handler, String fileName) {
this.handler = handler;
this.fileName = fileName;
}
private void parse(Element root) {
JerichoTag tag = new JerichoTag(root, fileName);
handler.handleStartTag(tag);
handler.handleText(tag.getElementPosition(), tag.getBodyText().snd);
List childElements = root.getChildElements();
for (Element child : childElements) {
parse(child);
}
handler.handleEndTag(tag);
}
}
}