nl.siegmann.epublib.util.ToolsResourceUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of epublib-tools Show documentation
Show all versions of epublib-tools Show documentation
A java library for reading/writing/manipulating epub files
The newest version!
package nl.siegmann.epublib.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Scanner;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import nl.siegmann.epublib.Constants;
import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.epub.EpubProcessorSupport;
import nl.siegmann.epublib.service.MediatypeService;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Various resource utility methods
*
* @author paul
*
*/
public class ToolsResourceUtil {
private static Logger log = LoggerFactory.getLogger(ToolsResourceUtil.class);
public static String getTitle(Resource resource) {
if (resource == null) {
return "";
}
if (resource.getMediaType() != MediatypeService.XHTML) {
return resource.getHref();
}
String title = findTitleFromXhtml(resource);
if (title == null) {
title = "";
}
return title;
}
/**
* Retrieves whatever it finds between <title>...</title> or <h1-7>...</h1-7>.
* The first match is returned, even if it is a blank string.
* If it finds nothing null is returned.
* @param resource
* @return whatever it finds in the resource between <title>...</title> or <h1-7>...</h1-7>.
*/
public static String findTitleFromXhtml(Resource resource) {
if (resource == null) {
return "";
}
if (resource.getTitle() != null) {
return resource.getTitle();
}
Pattern h_tag = Pattern.compile("^h\\d\\s*", Pattern.CASE_INSENSITIVE);
String title = null;
try {
Reader content = resource.getReader();
Scanner scanner = new Scanner(content);
scanner.useDelimiter("<");
while(scanner.hasNext()) {
String text = scanner.next();
int closePos = text.indexOf('>');
String tag = text.substring(0, closePos);
if (tag.equalsIgnoreCase("title")
|| h_tag.matcher(tag).find()) {
title = text.substring(closePos + 1).trim();
title = StringEscapeUtils.unescapeHtml(title);
break;
}
}
} catch (IOException e) {
log.error(e.getMessage());
}
resource.setTitle(title);
return title;
}
}