info.bliki.api.XMLPagesParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bliki-core Show documentation
Show all versions of bliki-core Show documentation
This is the core project.
The newest version!
package info.bliki.api;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Reads Page
data from an XML file generated by the
* Wikimedia API
*/
public class XMLPagesParser extends AbstractXMLParser {
private static final String REV_ID = "rev";
private static final String CATEGORY_ID = "cl";
private static final String URL_ID = "url";
private static final String THUMB_URL_ID = "thumburl";
private static final String ANON_ID = "anon";
private static final String PL_ID = "pl";
private static final String TIMESTAMP_ID = "timestamp";
private static final String IMAGEINFO_ID = "imageinfo";
private static final String II_ID = "ii";
private static final String EDIT_TOKEN_ID = "edittoken";
private static final String WARNINGS = "warnings";
private static final String PAGE_MISSING_ATTRIBUTE = "missing";
private static final String PAGE_INVALID_ATTRIBUTE = "invalid";
private Page fPage;
private Revision fRevision;
private List pagesList;
private List warnings;
private boolean isParsingWarnings;
public XMLPagesParser(String xmlText) throws SAXException {
super(xmlText);
pagesList = new ArrayList<>();
}
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
fAttributes = atts;
if (PAGE_TAG1.equals(qName) || PAGE_TAG2.equals(qName)) {
fPage = new Page();
fPage.setPageid(fAttributes.getValue(PAGE_ID));
fPage.setNs(fAttributes.getValue(NS_ID));
fPage.setTitle(fAttributes.getValue(TITLE_ID));
fPage.setEditToken(fAttributes.getValue(EDIT_TOKEN_ID));
fPage.setMissing(fAttributes.getValue(PAGE_MISSING_ATTRIBUTE) != null);
fPage.setInvalid(fAttributes.getValue(PAGE_INVALID_ATTRIBUTE) != null);
} else if (REV_ID.equals(qName)) {
fRevision = new Revision();
fRevision.setAnon(fAttributes.getValue(ANON_ID));
fRevision.setTimestamp(fAttributes.getValue(TIMESTAMP_ID));
fPage.setCurrentRevision(fRevision);
} else if (CATEGORY_ID.equals(qName)) {
if (fPage != null) {
PageInfo cat = new PageInfo();
cat.setNs(fAttributes.getValue(NS_ID));
cat.setTitle(fAttributes.getValue(TITLE_ID));
fPage.addCategory(cat);
}
} else if (PL_ID.equals(qName)) {
if (fPage != null) {
Link link = new Link();
link.setNs(fAttributes.getValue(NS_ID));
link.setTitle(fAttributes.getValue(TITLE_ID));
fPage.addLink(link);
}
} else if (II_ID.equals(qName)) {
// ...
if (fPage != null) {
fPage.setImageUrl(fAttributes.getValue(URL_ID));
fPage.setImageThumbUrl(fAttributes.getValue(THUMB_URL_ID));
}
} else if (WARNINGS.equals(qName)) {
warnings = new ArrayList<>();
isParsingWarnings = true;
}
fData = null;
}
@Override
public void endElement(String uri, String name, String qName) {
try {
if (REV_ID.equals(qName)) {
if (fRevision != null) {
fRevision.setContent(getString());
}
} else if (PAGE_TAG1.equals(qName) || PAGE_TAG2.equals(qName)) {
if (fPage != null) {
pagesList.add(fPage);
}
} else if (WARNINGS.equals(qName)) {
isParsingWarnings = false;
} else if (isParsingWarnings) {
String warning = getString();
if (warning != null) {
warnings.add(warning);
}
}
fData = null;
fAttributes = null;
} catch (RuntimeException re) {
re.printStackTrace();
}
}
public List getPagesList() {
return pagesList;
}
public List getWarnings() {
if (warnings == null) {
return Collections.emptyList();
} else {
return warnings;
}
}
}