
com.github.mertakdut.Content Maven / Gradle / Ivy
package com.github.mertakdut;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.commons.codec.binary.Base64;
import org.xml.sax.SAXException;
import com.github.mertakdut.BaseFindings.XmlItem;
import com.github.mertakdut.Package.Metadata;
import com.github.mertakdut.exception.OutOfPagesException;
import com.github.mertakdut.exception.ReadingException;
class Content {
private Logger logger;
private String zipFilePath;
private Container container;
private Package opfPackage;
private Toc toc;
private List entryNames;
private Map> entryTagPositions;
private List nonExistingHrefList;
private int playOrder;
// private int maxContentPerSection; // String length.
private BookSection lastBookSectionInfo;
public Content() {
logger = new Logger();
entryNames = new ArrayList<>();
container = new Container();
opfPackage = new Package();
toc = new Toc();
}
// Debug
public void print() {
System.out.println("Printing zipEntryNames...\n");
for (int i = 0; i < entryNames.size(); i++) {
System.out.println("(" + i + ")" + entryNames.get(i));
}
getContainer().print();
getPackage().print();
getToc().print();
}
// public BookSection getNextBookSection() throws ReadingException {
// NavPoint navPoint = getNavPoint(this.playOrder++);
// return prepareBookSection(navPoint, this.playOrder);
// }
//
// public BookSection getPrevBookSection() throws ReadingException {
// NavPoint navPoint = getNavPoint(this.playOrder--);
// return prepareBookSection(navPoint, this.playOrder);
// }
BookSection getBookSection(int index) throws ReadingException, OutOfPagesException {
BookSection bookSection = null;
int orderDiff = index - this.playOrder;
while (orderDiff > 0) { // Out of order. Calculate the ones before first.
calculateBookSection(--orderDiff);
}
NavPoint navPoint = getNavPoint(index);
if (Optionals.maxContentPerSection == 0 || navPoint.getTypeCode() == 0 || navPoint.getTypeCode() == 1) { // Real navPoint - actual file/anchor.
// logger.log(Severity.info, "\nindex: " + index + ", Real(at least for now...) navPoint");
bookSection = prepareBookSection(navPoint, index);
} else { // Pseudo navPoint - trimmed file entry.
// logger.log(Severity.info, "\nindex: " + index + ", Pseudo navPoint");
bookSection = prepareTrimmedBookSection(navPoint, index);
}
this.playOrder++;
return bookSection;
}
private NavPoint getNavPoint(int index) throws ReadingException, OutOfPagesException {
if (index >= 0) {
if (getToc() != null) {
List navPoints = getToc().getNavMap().getNavPoints();
if (index >= navPoints.size()) {
throw new OutOfPagesException("Out of bounds at position: " + index);
}
return navPoints.get(index);
} else {
throw new ReadingException("Term of Contents is null.");
}
} else {
throw new ReadingException("Index can't be less than 0");
}
}
// TODO: A new method for only calculating book sections. This will also be useful for pre-loading the whole book.
private void calculateBookSection(int index) throws ReadingException, OutOfPagesException {
NavPoint navPoint = getNavPoint(index);
if (Optionals.maxContentPerSection == 0 || navPoint.getTypeCode() == 0 || navPoint.getTypeCode() == 1) { // Real navPoint - actual file/anchor.
// logger.log(Severity.info, "\nindex: " + index + ", Real(at least for now...) navPoint");
prepareBookSection(navPoint, index);
} else { // Pseudo navPoint - trimmed file entry.
// logger.log(Severity.info, "\nindex: " + index + ", Pseudo navPoint");
prepareTrimmedBookSection(navPoint, index);
}
}
private BookSection prepareBookSection(NavPoint navPoint, int index) throws ReadingException, OutOfPagesException {
BookSection bookSection = new BookSection();
int trimStartPosition = navPoint.getBodyTrimStartPosition();
int trimEndPosition = navPoint.getBodyTrimEndPosition();
String entryName = navPoint.getEntryName();
String fileContentStr = null;
String htmlBody = null;
if (!navPoint.isCalculated()) { // Not calculated before.
String href = navPoint.getContentSrc();
String label = navPoint.getNavLabel();
boolean isSourceFileFound = false;
for (int i = 0; i < getEntryNames().size(); i++) {
String fileName = ContextHelper.encodeToUtf8(ContextHelper.getTextAfterCharacter(getEntryNames().get(i), Constants.SLASH));
if (href.equals(fileName) || (href.startsWith(fileName) && href.replace(fileName, "").startsWith("%23"))) {
isSourceFileFound = true;
entryName = getEntryNames().get(i);
fileContentStr = readFileContent(entryName);
htmlBody = getHtmlBody(fileContentStr); // This must not be changed.
// entryTagPositions only used in either in trimming or including text content.
if ((Optionals.maxContentPerSection != 0 && Optionals.maxContentPerSection < htmlBody.length()) || Optionals.isIncludingTextContent) {
// Calculate the tag positions of the current entry, if it hasn't done before.
if (entryTagPositions == null || !entryTagPositions.containsKey(entryName)) {
if (entryTagPositions == null) {
entryTagPositions = new HashMap<>();
}
calculateEntryTagPositions(entryName, htmlBody);
}
}
if (!href.equals(fileName)) { // Anchored, e.g. #pgepubid00058
Pair bodyIntervals = getNextAvailableAnchorIndex2(index, entryName, htmlBody, href, fileName);
if (bodyIntervals != null) {
trimStartPosition = bodyIntervals.getFirst();
trimEndPosition = bodyIntervals.getSecond();
} else {
return getBookSection(index);
}
}
String extension = ContextHelper.getTextAfterCharacter(fileName, Constants.DOT);
String mediaType = getMediaType(fileName);
// If fileContentStr is too long; crop it by the maxContentPerSection.
// Save the fileContent and position within a new navPoint, insert it after current index.
if (Optionals.maxContentPerSection != 0) { // maxContentPerSection is given.
int calculatedTrimEndPosition = calculateTrimEndPosition(entryName, htmlBody, trimStartPosition, trimEndPosition);
if (calculatedTrimEndPosition != -1) {
trimEndPosition = calculatedTrimEndPosition;
NavPoint nextEntryNavPoint = new NavPoint();
nextEntryNavPoint.setTypeCode(2);
nextEntryNavPoint.setEntryName(entryName);
nextEntryNavPoint.setBodyTrimStartPosition(trimEndPosition);
getToc().getNavMap().getNavPoints().add(index + 1, nextEntryNavPoint);
// Inserting calculated info to avoid calculating this navPoint again. In the future these data could be written to Term of Contents file.
getToc().getNavMap().getNavPoints().get(index).setTypeCode(2); // To indicate that, this is a trimmed part. TODO: Change these with constants.
if (lastBookSectionInfo == null) {
lastBookSectionInfo = new BookSection();
}
lastBookSectionInfo.setExtension(extension);
lastBookSectionInfo.setLabel(label);
lastBookSectionInfo.setMediaType(mediaType);
}
}
bookSection.setExtension(extension);
bookSection.setLabel(label);
bookSection.setMediaType(mediaType);
break;
}
}
if (!isSourceFileFound) {
logger.log(Logger.Severity.warning, "Source file not found!");
getToc().getNavMap().getNavPoints().remove(index);
return getBookSection(index);
}
} else { // Calculated before.
fileContentStr = readFileContent(entryName);
htmlBody = getHtmlBody(fileContentStr);
}
if (Optionals.isIncludingTextContent) {
bookSection.setSectionTextContent(getOnlyTextContent(entryName, htmlBody, trimStartPosition, trimEndPosition));
}
if (Optionals.cssStatus == CssStatus.OMIT) {
searchForTableTags(entryName, htmlBody, trimStartPosition, trimEndPosition);
}
String htmlBodyToReplace = appendIncompleteTags(htmlBody, entryName, index, trimStartPosition, trimEndPosition);
htmlBodyToReplace = replaceImgTag(htmlBodyToReplace);
fileContentStr = fileContentStr.replace(htmlBody, htmlBodyToReplace);
if (Optionals.cssStatus == CssStatus.DISTRIBUTE) {
fileContentStr = dissolveStyleTag(fileContentStr);
}
bookSection.setSectionContent(fileContentStr);
return bookSection;
}
private BookSection prepareTrimmedBookSection(NavPoint entryNavPoint, int index) throws ReadingException, OutOfPagesException {
BookSection bookSection = new BookSection();
String entryName = entryNavPoint.getEntryName();
int bodyTrimStartPosition = entryNavPoint.getBodyTrimStartPosition();
int bodyTrimEndPosition = entryNavPoint.getBodyTrimEndPosition(); // Will be calculated on the first attempt.
// logger.log(Severity.info, "index: " + index + ", entryName: " + entryName + ", bodyTrimStartPosition: " + bodyTrimStartPosition + ", bodyTrimEndPosition: "
// + bodyTrimEndPosition + ", entryOpenedTags: " + entryOpenedTags + ", entryClosingTags: " + entryClosingTags);
String fileContent = readFileContent(entryName);
String htmlBody = getHtmlBody(fileContent);
String htmlBodyToReplace = null;
if (!entryNavPoint.isCalculated()) { // Not calculated before.
String nextAnchor = getNextAnchor(index, entryName);
if (nextAnchor != null) { // Next anchor is available in the same file. It may be the next stop for the content.
String nextAnchorHtml = convertAnchorToHtml(nextAnchor);
int anchorIndex = htmlBody.indexOf(nextAnchorHtml);
if (anchorIndex != -1 && bodyTrimStartPosition <= anchorIndex) {
while (htmlBody.charAt(anchorIndex) != Constants.TAG_OPENING) { // Getting just before anchor html.
anchorIndex--;
}
bodyTrimEndPosition = anchorIndex;
} else { // NextAnchor not found in the htmlContent. Invalidate it by removing it from navPoints and search for the next one.
bodyTrimEndPosition = getNextAvailableAnchorIndex(index, entryName, bodyTrimStartPosition, htmlBody);
if (bodyTrimEndPosition == -1) {
return getBookSection(index);
}
}
}
int calculatedTrimEndPosition = calculateTrimEndPosition(entryName, htmlBody, bodyTrimStartPosition, bodyTrimEndPosition);
if (calculatedTrimEndPosition != -1) { // Trimming again if needed.
bodyTrimEndPosition = calculatedTrimEndPosition;
NavPoint nextEntryNavPoint = new NavPoint();
nextEntryNavPoint.setTypeCode(2);
nextEntryNavPoint.setEntryName(entryName);
nextEntryNavPoint.setBodyTrimStartPosition(bodyTrimEndPosition);
getToc().getNavMap().getNavPoints().add(index + 1, nextEntryNavPoint);
}
}
if (Optionals.cssStatus == CssStatus.OMIT) {
searchForTableTags(entryName, htmlBody, bodyTrimStartPosition, bodyTrimEndPosition);
}
htmlBodyToReplace = appendIncompleteTags(htmlBody, entryName, index, bodyTrimStartPosition, bodyTrimEndPosition);
htmlBodyToReplace = replaceImgTag(htmlBodyToReplace);
if (Optionals.isIncludingTextContent) {
bookSection.setSectionTextContent(getOnlyTextContent(entryName, htmlBody, bodyTrimStartPosition, bodyTrimEndPosition));
}
fileContent = fileContent.replace(htmlBody, htmlBodyToReplace);
if (Optionals.cssStatus == CssStatus.DISTRIBUTE) {
fileContent = dissolveStyleTag(fileContent);
}
bookSection.setSectionContent(fileContent);
if (this.lastBookSectionInfo != null) {
bookSection.setExtension(this.lastBookSectionInfo.getExtension());
bookSection.setLabel(this.lastBookSectionInfo.getLabel());
bookSection.setMediaType(this.lastBookSectionInfo.getMediaType());
}
return bookSection;
}
/*
* This method calculates and keeps every tag indices of the given entry file. Later on, these calculations will be used when trimming the entry.
*
* e.g. If the open-close tag indices are in the same trimmed part; tag will be closed there and won't disturb the next trimmed part.
*
* If the open-close tag indices are not in the same trimmed part; tag will be closed at the end of the current trimmed part, and opened in the next trimmed part.
*/
private void calculateEntryTagPositions(String entryName, String htmlBody) {
List openedTags = null;
ListIterator listIterator = null;
boolean isPossiblyTagOpened = false;
StringBuilder possiblyTag = new StringBuilder();
Pattern pattern = Pattern.compile(Constants.HTML_TAG_PATTERN);
Matcher matcher;
for (int i = 0; i < htmlBody.length(); i++) {
if (htmlBody.charAt(i) == Constants.TAG_OPENING) { // Tag might have been opened.
isPossiblyTagOpened = true;
possiblyTag.setLength(0); // In case of double occurence of '<' start from the next found tag opening; e.g. '< '.
} else if (htmlBody.charAt(i) == Constants.TAG_CLOSING) { // Tag might have been closed.
possiblyTag.append(Constants.TAG_CLOSING);
// Warning: There may be looks to be opening tags but empty tags like
. Find a workaround for them. Or are they already skipped? Since the closing tag would never be found.
if (htmlBody.charAt(i - 1) != '/') { // Not an empty tag.
String tagStr = possiblyTag.toString();
matcher = pattern.matcher(tagStr);
if (matcher.matches()) {
if (tagStr.charAt(1) == '/') { // Closing tag. Match it with the last open tag with the same name.
String tagName = getFullTagName(tagStr, false);
listIterator = openedTags.listIterator(openedTags.size());
while (listIterator.hasPrevious()) {
Tag openedTag = listIterator.previous();
if (openedTag.getTagName().equals(tagName)) { // Found the last open tag with the same name.
addEntryTagPosition(entryName, openedTag.getFullTagName(), openedTag.getOpeningTagStartPosition(), i - tagName.length() - 1);
listIterator.remove();
break;
}
}
} else { // Opening tag.
if (openedTags == null) {
openedTags = new ArrayList<>();
}
String fullTagName = getFullTagName(tagStr, true);
String tagName = getTagName(fullTagName);
Tag tag = new Tag();
tag.setTagName(tagName);
tag.setFullTagName(fullTagName);
tag.setOpeningTagStartPosition(i - fullTagName.length());
openedTags.add(tag);
}
}
} else { // Empty tag.
String tagStr = possiblyTag.toString();
matcher = pattern.matcher(tagStr);
if (matcher.matches()) {
int closingBracletIndex = tagStr.indexOf(Constants.TAG_CLOSING);
String tagName = tagStr.substring(1, closingBracletIndex - 1);
addEntryTagPosition(entryName, tagName, i - tagName.length() - 1, i - tagName.length() - 1);
}
}
possiblyTag.setLength(0);
isPossiblyTagOpened = false;
}
if (isPossiblyTagOpened) {
possiblyTag.append(htmlBody.charAt(i));
}
}
}
private void addEntryTagPosition(String entryName, String fullTagName, int openingPosition, int closingPosition) {
Tag tag = new Tag();
tag.setOpeningTagStartPosition(openingPosition);
tag.setClosingTagStartPosition(closingPosition);
tag.setFullTagName(fullTagName);
tag.setTagName(getTagName(fullTagName));
if (this.entryTagPositions.containsKey(entryName)) {
List tagList = this.entryTagPositions.get(entryName);
int index = tagList.size();
while (index > 0 && tagList.get(index - 1).getOpeningTagStartPosition() > openingPosition) {
index--;
}
this.entryTagPositions.get(entryName).add(index, tag);
} else {
List tagList = new ArrayList<>();
tagList.add(tag);
this.entryTagPositions.put(entryName, tagList);
}
}
private String getFullTagName(String tag, boolean isOpeningTag) {
int closingBracletIndex = tag.indexOf(Constants.TAG_CLOSING);
if (isOpeningTag) {
return tag.substring(1, closingBracletIndex);
} else {
return tag.substring(2, closingBracletIndex);
}
}
private String getTagName(String fullTagName) {
if (fullTagName.contains(" ")) {
fullTagName = fullTagName.trim();
int endIndex = 1;
while (fullTagName.length() > endIndex && fullTagName.charAt(endIndex) != ' ') {
endIndex++;
}
return fullTagName.substring(0, endIndex);
} else {
return fullTagName;
}
}
private Pair getNextAvailableAnchorIndex2(int index, String entryName, String htmlBody, String href, String fileName) throws ReadingException, OutOfPagesException {
boolean isNavigatingToNextFile = false;
String currentAnchor = null;
String nextAnchor = null;
boolean isFileReadFirstTime = isFileReadFirstTime(index, entryName);
if (isFileReadFirstTime) { // No previous anchor; so it should start from the beginning to the current anchor.
NavPoint currentEntryNavPoint = new NavPoint();
currentEntryNavPoint.setTypeCode(0);
currentEntryNavPoint.setContentSrc(fileName); // href or fileName?
getToc().getNavMap().getNavPoints().add(index, currentEntryNavPoint);
nextAnchor = href.replace(fileName, "");
} else {
currentAnchor = href.replace(fileName, "");
nextAnchor = getNextAnchor(index, entryName);
}
currentAnchor = convertAnchorToHtml(currentAnchor);
nextAnchor = convertAnchorToHtml(nextAnchor);
if (currentAnchor != null && nextAnchor != null) {
int currentAnchorIndex = htmlBody.indexOf(currentAnchor);
int nextAnchorIndex = htmlBody.indexOf(nextAnchor);
// Abnormality in toc.ncx file. Its order is probably given wrong.
// Warning: This may break the navPoints order if all the order is malformed.
if (currentAnchorIndex > nextAnchorIndex) {
int tmp = currentAnchorIndex;
currentAnchorIndex = nextAnchorIndex;
nextAnchorIndex = tmp;
Collections.swap(getToc().getNavMap().getNavPoints(), index, index + 1);
}
if (currentAnchorIndex == -1 || nextAnchorIndex == -1) {
int tmpIndex = index;
if (currentAnchorIndex == -1 && nextAnchorIndex == -1) { // Both of the anchors not found.
getToc().getNavMap().getNavPoints().get(tmpIndex++).setMarkedToDelete(true); // Delete the first one (current anchor)
getToc().getNavMap().getNavPoints().get(tmpIndex++).setMarkedToDelete(true); // Delete the second one (next anchor)
currentAnchor = null;
nextAnchor = null;
} else if (currentAnchorIndex == -1) { // Current anchor not found.
getToc().getNavMap().getNavPoints().get(tmpIndex++).setMarkedToDelete(true); // Delete the first one (current anchor)
currentAnchor = nextAnchor;
} else if (nextAnchorIndex == -1) { // Next anchor not found.
getToc().getNavMap().getNavPoints().get(++tmpIndex).setMarkedToDelete(true); // Delete the second one (next anchor)
nextAnchor = null;
}
int markedNavPoints = tmpIndex - index;
// Next available anchor should be the next starting point.
while (tmpIndex < getToc().getNavMap().getNavPoints().size()) { // Looping until next anchor is found.
boolean isCurrentNavPointMarked = true;
String possiblyNextEntryName = getNavPoint(tmpIndex).getContentSrc();
if (possiblyNextEntryName.startsWith(fileName) && possiblyNextEntryName.replace(fileName, "").startsWith("%23")) {
String anchor = possiblyNextEntryName.replace(fileName, "");
anchor = convertAnchorToHtml(anchor);
if (htmlBody.contains(anchor)) {
if (currentAnchor == null) { // If current anchor is not found, first set that.
currentAnchor = anchor;
isCurrentNavPointMarked = false;
} else { // If current anchor is already defined set the next anchor and break.
nextAnchor = anchor;
break;
}
}
} else { // TODO: Next content is not the same file as the current one. Anchors are broken. Navigate to the next file.
isNavigatingToNextFile = true;
break;
}
if (isCurrentNavPointMarked) {
getToc().getNavMap().getNavPoints().get(tmpIndex).setMarkedToDelete(true);
markedNavPoints++;
}
tmpIndex++;
}
if (markedNavPoints != 0) {
if (markedNavPoints == getToc().getNavMap().getNavPoints().size() && markedNavPoints > 1) {
throw new ReadingException("There are no items left in TOC. Toc.ncx file is probably malformed.");
}
for (Iterator iterator = getToc().getNavMap().getNavPoints().iterator(); iterator.hasNext();) {
NavPoint navPointToDelete = iterator.next();
if (navPointToDelete.isMarkedToDelete()) {
iterator.remove();
if (--markedNavPoints == 0) {
break;
}
}
}
}
}
}
if (isNavigatingToNextFile) {
return null;
} else {
return getAnchorsInterval(htmlBody, currentAnchor, nextAnchor);
}
}
// TODO: Similar functionality happens in the prepareBookSection method. Merge them into this.
private int getNextAvailableAnchorIndex(int index, String entryName, int bodyTrimStartPosition, String htmlBody) throws ReadingException, OutOfPagesException {
getToc().getNavMap().getNavPoints().remove(++index); // Removing the nextAnchor from navPoints; 'cause it's already not found.
int markedNavPoints = 0;
int anchorIndex = -1;
boolean isNextAnchorFound = false;
boolean isNavigatingToNextFile = false;
// Next available anchor should be the next starting point.
while (index < getToc().getNavMap().getNavPoints().size()) { // Looping until next anchor is found.
String possiblyNextEntryName = getNavPoint(index).getContentSrc();
String fileName = ContextHelper.encodeToUtf8(ContextHelper.getTextAfterCharacter(entryName, Constants.SLASH));
if (possiblyNextEntryName.startsWith(fileName) && possiblyNextEntryName.replace(fileName, "").startsWith("%23")) {
String anchor = possiblyNextEntryName.replace(fileName, "");
String anchorHtml = convertAnchorToHtml(anchor);
anchorIndex = htmlBody.indexOf(anchorHtml);
if (anchorIndex != -1) {
while (htmlBody.charAt(anchorIndex) != Constants.TAG_OPENING) { // Getting just before anchor html.
anchorIndex--;
}
if (bodyTrimStartPosition <= anchorIndex) {
isNextAnchorFound = true;
break;
}
}
} else { // TODO: Next content is not the same file as the current one. Anchors are broken. Navigate to the next file.
isNavigatingToNextFile = true;
break;
}
getToc().getNavMap().getNavPoints().get(index).setMarkedToDelete(true);
markedNavPoints++;
index++;
}
if (markedNavPoints != 0) {
for (Iterator iterator = getToc().getNavMap().getNavPoints().iterator(); iterator.hasNext();) {
NavPoint navPointToDelete = iterator.next();
if (navPointToDelete.isMarkedToDelete()) {
iterator.remove();
if (--markedNavPoints == 0) {
break;
}
}
}
}
if (isNavigatingToNextFile) {
return -1;
} else if (isNextAnchorFound) {
return anchorIndex;
} else {
return 0;
}
}
private String prepareOpeningTags(List openedTags) {
StringBuilder openingTagsBuilder = new StringBuilder();
for (ListIterator iterator = openedTags.listIterator(); iterator.hasNext();) {
openingTagsBuilder.append(Constants.TAG_OPENING).append(iterator.next().getFullTagName()).append(Constants.TAG_CLOSING);
}
return openingTagsBuilder.toString();
}
private String prepareClosingTags(List openedTags) {
StringBuilder closingTagsBuilder = new StringBuilder();
for (ListIterator iterator = openedTags.listIterator(openedTags.size()); iterator.hasPrevious();) {
closingTagsBuilder.append(Constants.TAG_START).append(iterator.previous().getTagName()).append(Constants.TAG_CLOSING);
}
return closingTagsBuilder.toString();
}
private int calculateTrimEndPosition(String entryName, String htmlBody, int trimStartPosition, int trimEndPos) {
int trimEndPosition = (trimEndPos != 0 && (trimEndPos - trimStartPosition) < Optionals.maxContentPerSection) ? trimEndPos : trimStartPosition + Optionals.maxContentPerSection;
int htmlBodyLength = htmlBody.length();
// Don't need to trim. HtmlBody with tags are already below limit.
if (htmlBodyLength < trimEndPosition || (trimEndPosition - trimStartPosition) < Optionals.maxContentPerSection) {
return -1;
}
List tagStartEndPositions = this.entryTagPositions.get(entryName);
int loopCount = 0;
int lastTagsLength = 0;
while (true) {
int tagsLength = 0;
for (Tag tag : tagStartEndPositions) {
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
if (tag.getOpeningTagStartPosition() == tag.getClosingTagStartPosition()) {
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) { // Empty Tag.
tagsLength += tag.getFullTagName().length() + 3; // < />
}
} else {
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) { // Opening tag.
tagsLength += tag.getFullTagName().length() + 2; // < >
}
if (tag.getClosingTagStartPosition() > trimStartPosition && tag.getClosingTagStartPosition() < trimEndPosition) { // Closing tag.
tagsLength += tag.getTagName().length() + 3; // < />
}
}
}
if (lastTagsLength == tagsLength) { // Tags length isn't greater than the last one. No need to keep going.
if (loopCount == 0) { // Returned on the first try. Don't need to trim. HtmlBody without tags are already below limit.
if (tagsLength == 0 && htmlBodyLength > trimEndPosition) { // If there are no tags in the trimmed part.
break;
}
return -1;
} else {
break;
}
}
trimEndPosition += tagsLength;
// If trimEndPosition is over the htmlBody's index; then htmlBody is already within limits. No need to trim.
if (trimEndPosition >= htmlBodyLength) {
return -1;
}
if (((trimEndPosition - trimStartPosition) - tagsLength) >= Optionals.maxContentPerSection) {
break;
}
lastTagsLength = tagsLength;
loopCount++;
}
// TODO: Regex to find table tags like: [
|>]
// TODO: This may break the maxContentPerSection rule. Check if the table content will exceed the limit.
int tableStartIndex = htmlBody.indexOf(Constants.TAG_TABLE_START, trimStartPosition);
// If interval has table, don't break the table.
if (tableStartIndex != -1 && tableStartIndex < trimEndPosition) {
int tableEndIndex = htmlBody.indexOf(Constants.TAG_TABLE_END, tableStartIndex);
if (tableEndIndex != -1) {
trimEndPosition = tableEndIndex + Constants.TAG_TABLE_END.length();
} else {
trimEndPosition = findEligibleEndPosition(tagStartEndPositions, htmlBody, trimEndPosition);
}
} else {
trimEndPosition = findEligibleEndPosition(tagStartEndPositions, htmlBody, trimEndPosition);
}
return trimEndPosition;
}
// Checks if we are in an html tag. If so, move forward or backward until the tag is over. Else, move backwards until we hit the blank.
private int findEligibleEndPosition(List tagStartEndPositions, String htmlBody, int trimEndPosition) {
boolean isMovedToEndOfTag = false;
for (Tag tag : tagStartEndPositions) {
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
if (tag.getOpeningTagStartPosition() == tag.getClosingTagStartPosition()) { // Empty tag.
// Inside an empty tag.
if (tag.getOpeningTagStartPosition() < trimEndPosition && (tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 2) > trimEndPosition) {
while (htmlBody.charAt(trimEndPosition) != Constants.TAG_CLOSING) {
trimEndPosition++;
}
trimEndPosition++;
isMovedToEndOfTag = true;
break;
}
} else {
// Inside an opening tag.
if (tag.getOpeningTagStartPosition() < trimEndPosition && (tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 1) > trimEndPosition) {
while (htmlBody.charAt(trimEndPosition) != Constants.TAG_OPENING) {
trimEndPosition--;
}
// trimEndPosition--;
isMovedToEndOfTag = true;
break;
}
// Inside a closing tag.
if (tag.getClosingTagStartPosition() < trimEndPosition && (tag.getClosingTagStartPosition() + tag.getTagName().length() + 2) > trimEndPosition) {
while (htmlBody.charAt(trimEndPosition) != Constants.TAG_CLOSING) {
trimEndPosition++;
}
trimEndPosition++;
isMovedToEndOfTag = true;
break;
}
}
}
if (!isMovedToEndOfTag) { // To avoid dividing the words in half.
while (htmlBody.charAt(trimEndPosition) != ' ') {
trimEndPosition--;
// We may have hit a tag.
if (htmlBody.charAt(trimEndPosition) == Constants.TAG_CLOSING) {
trimEndPosition++;
break;
} else if (htmlBody.charAt(trimEndPosition) == Constants.TAG_OPENING) {
break;
}
}
}
return trimEndPosition;
}
private String getNextAnchor(int index, String entryName) throws ReadingException, OutOfPagesException {
if (getToc().getNavMap().getNavPoints().size() > (index + 1)) {
NavPoint nextNavPoint = getNavPoint(index + 1);
if (nextNavPoint.getTypeCode() != 2) { // Real navPoint. Only real navPoints are anchored. TODO: Change these with constants.
String nextHref = nextNavPoint.getContentSrc();
if (nextHref != null) {
String fileName = ContextHelper.encodeToUtf8(ContextHelper.getTextAfterCharacter(entryName, Constants.SLASH));
if (nextHref.startsWith(fileName) && nextHref.replace(fileName, "").startsWith("%23")) { // Both anchors are in the same file.
return nextHref.replace(fileName, "");
}
}
}
}
return null;
}
private boolean isFileReadFirstTime(int index, String entryName) throws ReadingException, OutOfPagesException {
if ((index - 1) >= 0) {
NavPoint prevNavPoint = getNavPoint(index - 1);
if (prevNavPoint.getTypeCode() == 2) {
return false;
}
String prevHref = prevNavPoint.getContentSrc();
if (prevHref != null) {
String fileName = ContextHelper.encodeToUtf8(ContextHelper.getTextAfterCharacter(entryName, Constants.SLASH));
if (prevHref.startsWith(fileName)) { // Same content as previous, not reading for the first time. (&& prevHref.replace(fileName, "").startsWith("%23"))
return false;
}
}
}
return true;
}
// TODO: This operation is getting expensive and expensive. fileContent could be held in cache; if the entry is same. Maybe a map with one element ->
// If map doesn't contain that entryName -> then this method can be used.
private String readFileContent(String entryName) throws ReadingException {
ZipFile epubFile = null;
try {
epubFile = new ZipFile(zipFilePath);
ZipEntry zipEntry = epubFile.getEntry(entryName);
InputStream inputStream = epubFile.getInputStream(zipEntry);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
StringBuilder fileContent = new StringBuilder();
try {
String line;
while ((line = bufferedReader.readLine()) != null) {
fileContent.append(line).append(" ");
}
} finally {
bufferedReader.close();
}
String fileContentStr = fileContent.toString();
if (Optionals.cssStatus != CssStatus.OMIT) {
fileContentStr = replaceCssLinkWithActualCss(epubFile, fileContentStr);
} else {
fileContentStr = removeStyleTags(fileContentStr);
}
if (Optionals.isOmittingTitleTag) {
fileContentStr = removeTitleTags(fileContentStr);
}
return fileContentStr;
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("IOException while reading content " + entryName + e.getMessage());
} catch (ParserConfigurationException e) {
e.printStackTrace();
throw new ReadingException("ParserConfigurationException while reading content " + entryName + e.getMessage());
} catch (SAXException e) {
e.printStackTrace();
throw new ReadingException("SAXException while reading content " + entryName + e.getMessage());
} catch (TransformerException e) {
e.printStackTrace();
throw new ReadingException("TransformerException while reading content " + entryName + e.getMessage());
} finally {
try {
if (epubFile != null) {
epubFile.close();
}
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("Error closing ZipFile: " + e.getMessage());
}
}
}
private String getHtmlBody(String htmlContent) throws ReadingException {
int startOfBody = htmlContent.lastIndexOf(Constants.TAG_BODY_START);
int endOfBody = htmlContent.lastIndexOf(Constants.TAG_BODY_END);
int bodyStartEndIndex = startOfBody + Constants.TAG_BODY_START.length();
while (htmlContent.charAt(bodyStartEndIndex) != Constants.TAG_CLOSING) {
bodyStartEndIndex++;
}
if (startOfBody != -1 && endOfBody != -1) {
return htmlContent.substring(bodyStartEndIndex + 1, endOfBody);
} else {
throw new ReadingException("Exception while getting book section : Html body tags not found.");
}
}
// Starts from current anchor, reads until the next anchor starts.
private Pair getAnchorsInterval(String htmlBody, String currentAnchor, String nextAnchor) throws ReadingException {
int startOfCurrentAnchor = -1;
int startOfNextAnchor = -1;
if (currentAnchor != null && !currentAnchor.equals("")) {
startOfCurrentAnchor = htmlBody.indexOf(currentAnchor);
}
if (nextAnchor != null && !nextAnchor.equals("")) {
startOfNextAnchor = htmlBody.indexOf(nextAnchor);
}
if (startOfCurrentAnchor != -1) {
while (htmlBody.charAt(startOfCurrentAnchor) != Constants.TAG_OPENING) {
startOfCurrentAnchor--;
}
} else {
startOfCurrentAnchor = 0;
}
if (startOfNextAnchor != -1) {
while (htmlBody.charAt(startOfNextAnchor) != Constants.TAG_OPENING) {
startOfNextAnchor--;
}
} else {
startOfNextAnchor = 0;
}
return new Pair<>(startOfCurrentAnchor, startOfNextAnchor);
}
private String convertAnchorToHtml(String anchor) throws ReadingException { // #Page_1 to id="Page_1" converter
if (anchor == null) {
return null;
}
if (anchor.startsWith("%23")) { // Or UTF-8 equivalent of #
return "id=\"" + anchor.substring(3) + "\"";
} else {
throw new ReadingException("Anchor does not start with #");
}
}
private String getMediaType(String fileName) {
List manifestItems = getPackage().getManifest().getXmlItemList();
for (int i = 0; i < manifestItems.size(); i++) {
if (manifestItems.get(i).getAttributes().containsValue(fileName)) {
if (manifestItems.get(i).getAttributes().containsKey("media-type")) {
return manifestItems.get(i).getAttributes().get("media-type");
}
}
}
return null;
}
// Distributing the css parts in the style tag to the belonging html tags.
private String dissolveStyleTag(String trimmedFileContent) throws ReadingException {
Pattern cssPattern = Pattern.compile("");
Matcher matcher = cssPattern.matcher(trimmedFileContent);
while (matcher.find()) { // There may be multiple style tags.
String styleTagStr = matcher.group(2);
Map cssMap = getCssMap(styleTagStr);
String htmlBody = getHtmlBody(trimmedFileContent);
String htmlBodyToReplace = putCssIntoTags(cssMap, htmlBody);
trimmedFileContent = trimmedFileContent.replace(htmlBody, htmlBodyToReplace);
trimmedFileContent = trimmedFileContent.replace("");
htmlContent = htmlContent.replace(linkTag, fileContent.toString());
break;
}
}
if (!isCssFileFound) {
logger.log(Logger.Severity.warning, "Referenced css file not found!");
if (nonExistingHrefList == null) {
nonExistingHrefList = new ArrayList<>();
}
nonExistingHrefList.add(cssHref);
htmlContent = htmlContent.replace(cssHref, "");
}
}
}
}
}
return htmlContent;
}
private String removeStyleTags(String fileContent) {
return findAndRemove(fileContent, ContextHelper.getTagsRegex("style", false));
}
private String removeTitleTags(String fileContent) {
return findAndRemove(fileContent, ContextHelper.getTagsRegex("title", false));
}
private String findAndRemove(String text, String regex) {
Pattern titleTagPattern = Pattern.compile(regex);
Matcher titleTagMatcher = titleTagPattern.matcher(text);
StringBuffer stringBuffer = new StringBuffer();
while (titleTagMatcher.find()) {
titleTagMatcher.appendReplacement(stringBuffer, "");
}
if (stringBuffer.length() > 0) {
titleTagMatcher.appendTail(stringBuffer);
return stringBuffer.toString();
}
return text;
}
private String replaceImgTag(String htmlBody) throws ReadingException {
Pattern imgTagPattern = Pattern.compile(ContextHelper.getTagsRegex("img", true));
Pattern srcPattern = Pattern.compile("src=\"(.*?)\"");
Matcher imgTagMatcher = imgTagPattern.matcher(htmlBody);
while (imgTagMatcher.find()) {
String imgPart = imgTagMatcher.group(0);
Matcher srcMatcher = srcPattern.matcher(imgPart);
if (srcMatcher.find()) {
String srcHref = ContextHelper.getTextAfterCharacter(srcMatcher.group(1), Constants.SLASH);
String encodedSrcHref = ContextHelper.encodeToUtf8(srcHref);
if (nonExistingHrefList != null && nonExistingHrefList.contains(srcHref)) {
// logger.log(Logger.Severity.warning, "Already not found on the first try. Skipping the search for(Img) : " + srcMatcher);
htmlBody = htmlBody.replace(imgPart, "");
} else {
boolean isImageFileFound = false;
for (int i = 0; i < getEntryNames().size(); i++) {
String entryName = getEntryNames().get(i);
String fileName = ContextHelper.encodeToUtf8(ContextHelper.getTextAfterCharacter(entryName, Constants.SLASH));
if (encodedSrcHref.equals(fileName)) { // image exists.
isImageFileFound = true;
ZipFile epubFile = null;
try {
String extension = ContextHelper.getTextAfterCharacter(fileName, Constants.DOT);
epubFile = new ZipFile(this.zipFilePath);
ZipEntry zipEntry = epubFile.getEntry(entryName);
InputStream zipEntryInputStream = epubFile.getInputStream(zipEntry); // Convert inputStream to Base64Binary.
byte[] imageAsBytes = ContextHelper.convertIsToByteArray(zipEntryInputStream);
byte[] imageAsBase64 = Base64.encodeBase64(imageAsBytes);
String imageContent = new String(imageAsBase64);
String src = "data:image/" + extension + ";base64," + imageContent;
htmlBody = htmlBody.replace(srcHref, src);
break;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (epubFile != null) {
try {
epubFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
if (!isImageFileFound) {
logger.log(Logger.Severity.warning, "Referenced image file not found: " + srcHref);
if (nonExistingHrefList == null) {
nonExistingHrefList = new ArrayList<>();
}
nonExistingHrefList.add(srcHref);
htmlBody = htmlBody.replace(imgPart, "");
}
}
}
}
return htmlBody;
}
// Warning: May devour anchors.
private void searchForTableTags(String entryName, String htmlBody, int trimStartPosition, int trimEndPosition) {
String htmlBodyToReplace = null;
if (trimEndPosition == 0) {
htmlBodyToReplace = htmlBody.substring(trimStartPosition);
} else {
htmlBodyToReplace = htmlBody.substring(trimStartPosition, trimEndPosition);
}
Pattern tableTagPattern = Pattern.compile("", Pattern.DOTALL);
Matcher tableTagMatcher = tableTagPattern.matcher(htmlBodyToReplace);
if (tableTagMatcher.find()) {
if (entryTagPositions == null || !entryTagPositions.containsKey(entryName)) {
if (entryTagPositions == null) {
entryTagPositions = new HashMap<>();
}
calculateEntryTagPositions(entryName, htmlBody);
}
List tagStartEndPositions = this.entryTagPositions.get(entryName);
List tableTagList = new ArrayList<>();
for (Tag tag : tagStartEndPositions) {
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
if (tag.getTagName().equals("table")) {
if (tag.getOpeningTagStartPosition() != tag.getClosingTagStartPosition()) { // Not an empty table tag.
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) { // Opening tag is within scope.
tableTagList.add(tag);
// if (tag.getClosingTagStartPosition() > trimStartPosition && tag.getClosingTagStartPosition() < trimEndPosition) { // Closing tag is also withing scope.
//
// }
}
}
}
}
// Remove nested tables.
List smallerTableTagList = new ArrayList<>();
for (int i = 0; i < tableTagList.size(); i++) {
int tag1StartPosition = tableTagList.get(i).getOpeningTagStartPosition();
int tag1EndPosition = tableTagList.get(i).getClosingTagStartPosition();
for (int j = i + 1; j < tableTagList.size(); j++) {
int tag2StartPosition = tableTagList.get(j).getOpeningTagStartPosition();
int tag2EndPosition = tableTagList.get(j).getClosingTagStartPosition();
if (tag1StartPosition > tag2StartPosition && tag1EndPosition < tag2EndPosition) {
smallerTableTagList.add(tableTagList.get(i));
} else if (tag2StartPosition > tag1StartPosition && tag2EndPosition < tag1EndPosition) {
smallerTableTagList.add(tableTagList.get(j));
}
}
}
tableTagList.removeAll(smallerTableTagList);
markTableTags(entryName, htmlBody, trimStartPosition, trimEndPosition, tableTagList);
}
}
private void markTableTags(String entryName, String htmlBody, int trimStartPosition, int trimEndPosition, List tableTagPositions) {
List tagStartEndPositions = this.entryTagPositions.get(entryName);
for (int i = 0; i < tableTagPositions.size(); i++) {
int tableStartPosition = tableTagPositions.get(i).getOpeningTagStartPosition();
int tableEndPosition = tableTagPositions.get(i).getClosingTagStartPosition();
for (Tag tag : tagStartEndPositions) {
if (tag.getOpeningTagStartPosition() > tableEndPosition) {
break;
}
if (tag.getOpeningTagStartPosition() == tag.getClosingTagStartPosition()) { // Empty Tag
// Images inside table tags look corrupt.
// if (tag.getTagName().equals("img")) {
// continue;
// }
if (tag.getOpeningTagStartPosition() > tableStartPosition && tag.getOpeningTagStartPosition() < tableEndPosition) {
tag.setOmitted(true);
}
} else {
if (tag.getOpeningTagStartPosition() > tableStartPosition && tag.getOpeningTagStartPosition() < tableEndPosition) { // Opening tag.
tag.setOmitted(true);
}
if (tag.getClosingTagStartPosition() > tableStartPosition && tag.getClosingTagStartPosition() < tableEndPosition) { // Closing tag.
tag.setOmitted(true);
}
}
}
}
}
// Removes all the tags from htmlBody and returns it.
private String getOnlyTextContent(String entryName, String htmlBody, int trimStartPosition, int trimEndPosition) {
List tagStartEndPositions = this.entryTagPositions.get(entryName);
List stringsToRemove = new ArrayList<>();
if (trimEndPosition == 0) {
trimEndPosition = htmlBody.length();
}
for (Tag tag : tagStartEndPositions) {
// This may not work correctly.
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
if (tag.getOpeningTagStartPosition() == tag.getClosingTagStartPosition()) { // Empty Tag
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) {
htmlBody = htmlBody.substring(0, tag.getOpeningTagStartPosition() - 1) + Constants.STRING_MARKER
+ htmlBody.substring(tag.getOpeningTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 2 - Constants.STRING_MARKER.length())
+ Constants.STRING_MARKER + htmlBody.substring(tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 2, htmlBody.length());
stringsToRemove.add(Constants.STRING_MARKER + htmlBody.substring(tag.getOpeningTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 2 - Constants.STRING_MARKER.length()) + Constants.STRING_MARKER);
}
} else {
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) { // Opening tag.
htmlBody = htmlBody.substring(0, tag.getOpeningTagStartPosition() - 1) + Constants.STRING_MARKER
+ htmlBody.substring(tag.getOpeningTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 1 - Constants.STRING_MARKER.length())
+ Constants.STRING_MARKER + htmlBody.substring(tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 1, htmlBody.length());
stringsToRemove.add(Constants.STRING_MARKER + htmlBody.substring(tag.getOpeningTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 1 - Constants.STRING_MARKER.length()) + Constants.STRING_MARKER);
}
if (tag.getClosingTagStartPosition() > trimStartPosition && tag.getClosingTagStartPosition() < trimEndPosition) { // Closing tag.
htmlBody = htmlBody.substring(0, tag.getClosingTagStartPosition() - 1) + Constants.STRING_MARKER
+ htmlBody.substring(tag.getClosingTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getClosingTagStartPosition() + tag.getTagName().length() + 2 - Constants.STRING_MARKER.length())
+ Constants.STRING_MARKER + htmlBody.substring(tag.getClosingTagStartPosition() + tag.getTagName().length() + 2, htmlBody.length());
stringsToRemove.add(Constants.STRING_MARKER + htmlBody.substring(tag.getClosingTagStartPosition() - 1 + Constants.STRING_MARKER.length(),
tag.getClosingTagStartPosition() + tag.getTagName().length() + 2 - Constants.STRING_MARKER.length()) + Constants.STRING_MARKER);
}
}
}
htmlBody = htmlBody.substring(trimStartPosition, trimEndPosition);
for (String stringToRemove : stringsToRemove) {
htmlBody = htmlBody.replace(stringToRemove, "");
}
return htmlBody;
}
// TODO: Save these in navPoints as well avoid calculating again.
// TODO: Change the method name.
private String appendIncompleteTags(String htmlBody, String entryName, int index, int trimStartPosition, int trimEndPosition) throws ReadingException {
if (!getToc().getNavMap().getNavPoints().get(index).isCalculated()) {
getToc().getNavMap().getNavPoints().get(index).setBodyTrimStartPosition(trimStartPosition);
getToc().getNavMap().getNavPoints().get(index).setBodyTrimEndPosition(trimEndPosition);
getToc().getNavMap().getNavPoints().get(index).setEntryName(entryName);
getToc().getNavMap().getNavPoints().get(index).setCalculated(true);
}
if (trimStartPosition == 0 && trimEndPosition == 0) {
return htmlBody;
}
String htmlBodyToReplace = null;
List prevOpenedNotClosedYetTags = new ArrayList<>(); // Previously opened in this scope and not yet closed tags. Appending opening and closing tags.
List openedNotClosedYetTags = new ArrayList<>(); // Opened in this scope and not yet closed tags. Appending only closing tags.
List prevOpenedClosedTags = new ArrayList<>(); // Previously opened and closed in this scope. Appending only opening tags.
List currentEntryTags = this.entryTagPositions.get(entryName);
trimEndPosition = trimEndPosition == 0 ? htmlBody.length() : trimEndPosition;
for (int i = 0; i < currentEntryTags.size(); i++) {
Tag tag = currentEntryTags.get(i);
// TODO: break this when it's out of possibility.
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
// Opened in the trimmed part, closed after the trimmed part.
if (!tag.isOmitted() && tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition && tag.getClosingTagStartPosition() > trimEndPosition) {
openedNotClosedYetTags.add(tag);
}
}
List prevOpenedTags = getToc().getNavMap().getNavPoints().get(index).getOpenTags();
if (prevOpenedTags != null) {
for (Tag prevOpenedTag : prevOpenedTags) {
if (prevOpenedTag.getClosingTagStartPosition() > trimEndPosition) { // Previously opened and not yet closed in scope tags. Should have a place in the beginning.
prevOpenedNotClosedYetTags.add(prevOpenedTag);
} else { // Previously opened but closed in scope tags. // TODO: Find these tags a position :( Or just append them from the beginning. I don't think it would break anything, would it?
prevOpenedClosedTags.add(prevOpenedTag);
}
}
}
Pair> htmlBodyMarkingsPair = null;
if (Optionals.cssStatus == CssStatus.OMIT) { // Tag omitting only happens in replaceTableTag function when css status is given Omit.
htmlBodyMarkingsPair = markOmittedTags(currentEntryTags, htmlBody, trimStartPosition, trimEndPosition);
if (htmlBodyMarkingsPair != null) {
htmlBody = htmlBodyMarkingsPair.getFirst();
}
}
// TODO: We shouldn't substring htmlBody before this method.
if (trimEndPosition == htmlBody.length()) {
htmlBodyToReplace = htmlBody.substring(trimStartPosition);
} else {
htmlBodyToReplace = htmlBody.substring(trimStartPosition, trimEndPosition);
}
if (htmlBodyMarkingsPair != null) {
List stringsToRemove = htmlBodyMarkingsPair.getSecond();
if (stringsToRemove != null) {
for (String stringToRemove : stringsToRemove) {
if (stringToRemove.contains("|tr")) {
htmlBodyToReplace = htmlBodyToReplace.replace(stringToRemove, "
");
} else {
htmlBodyToReplace = htmlBodyToReplace.replace(stringToRemove, "");
}
}
}
}
String openingTags = "";
String closingTags = "";
if (!openedNotClosedYetTags.isEmpty()) {
closingTags += prepareClosingTags(openedNotClosedYetTags);
}
if (!prevOpenedNotClosedYetTags.isEmpty()) {
openingTags += prepareOpeningTags(prevOpenedNotClosedYetTags);
closingTags += prepareClosingTags(prevOpenedNotClosedYetTags);
}
if (!prevOpenedClosedTags.isEmpty()) {
openingTags += prepareOpeningTags(prevOpenedClosedTags);
}
if (!openingTags.isEmpty() || !closingTags.isEmpty()) {
htmlBodyToReplace = openingTags + htmlBodyToReplace + closingTags;
}
if (getToc().getNavMap().getNavPoints().size() > (index + 1)) { // If this is not the last page, next navPoint should start with not closed yet tags because they are not closed in this navPoint as well.
openedNotClosedYetTags.addAll(prevOpenedNotClosedYetTags);
getToc().getNavMap().getNavPoints().get(index + 1).setOpenTags(openedNotClosedYetTags.isEmpty() ? null : openedNotClosedYetTags);
} else {
openedNotClosedYetTags.addAll(prevOpenedNotClosedYetTags);
if (!openedNotClosedYetTags.isEmpty()) { // openedTags should already be null if this is the last page.
throw new ReadingException("Last Page has opened and not yet closed tags."); // For debugging purposes.
}
}
return htmlBodyToReplace;
}
private Pair> markOmittedTags(List currentEntryTags, String htmlBody, int trimStartPosition, int trimEndPosition) {
boolean isHtmlBodyModified = false;
List stringsToRemove = null;
for (Tag tag : currentEntryTags) {
if (tag.getOpeningTagStartPosition() > trimEndPosition) {
break;
}
if (!tag.isOmitted()) {
continue;
}
int fromIndex = -1;
int toIndex = -1;
if (tag.getOpeningTagStartPosition() == tag.getClosingTagStartPosition()) { // Empty Tag
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) {
fromIndex = tag.getOpeningTagStartPosition() - 1;
toIndex = tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 2;
}
} else {
if (tag.getOpeningTagStartPosition() > trimStartPosition && tag.getOpeningTagStartPosition() < trimEndPosition) { // Opening tag.
fromIndex = tag.getOpeningTagStartPosition() - 1;
toIndex = tag.getOpeningTagStartPosition() + tag.getFullTagName().length() + 1;
}
if (fromIndex != -1 && toIndex != -1) {
htmlBody = htmlBody.substring(0, fromIndex) + Constants.STRING_MARKER + htmlBody.substring(fromIndex + Constants.STRING_MARKER.length(), toIndex - Constants.STRING_MARKER.length())
+ Constants.STRING_MARKER + htmlBody.substring(toIndex, htmlBody.length());
if (stringsToRemove == null) {
stringsToRemove = new ArrayList<>();
}
stringsToRemove.add(Constants.STRING_MARKER + htmlBody.substring(fromIndex + Constants.STRING_MARKER.length(), toIndex - Constants.STRING_MARKER.length()) + Constants.STRING_MARKER);
isHtmlBodyModified = true;
}
if (tag.getClosingTagStartPosition() > trimStartPosition && tag.getClosingTagStartPosition() < trimEndPosition) { // Closing tag.
fromIndex = tag.getClosingTagStartPosition() - 1;
toIndex = tag.getClosingTagStartPosition() + tag.getTagName().length() + 2;
}
}
// If both opened and closed tags should be removed, skips the closing tag.
if (fromIndex != -1 && toIndex != -1) {
htmlBody = htmlBody.substring(0, fromIndex) + Constants.STRING_MARKER + htmlBody.substring(fromIndex + Constants.STRING_MARKER.length(), toIndex - Constants.STRING_MARKER.length())
+ Constants.STRING_MARKER + htmlBody.substring(toIndex, htmlBody.length());
if (stringsToRemove == null) {
stringsToRemove = new ArrayList<>();
}
stringsToRemove.add(Constants.STRING_MARKER + htmlBody.substring(fromIndex + Constants.STRING_MARKER.length(), toIndex - Constants.STRING_MARKER.length()) + Constants.STRING_MARKER);
isHtmlBodyModified = true;
}
}
return isHtmlBodyModified ? new Pair<>(htmlBody, stringsToRemove) : null;
}
byte[] getCoverImage() throws ReadingException {
Metadata metadata = this.opfPackage.getMetadata();
if (this.opfPackage != null && metadata != null) {
String coverImageId = metadata.getCoverImageId();
if (coverImageId != null && !coverImageId.equals("")) {
List manifestXmlItems = this.opfPackage.getManifest().getXmlItemList();
for (XmlItem xmlItem : manifestXmlItems) {
if (xmlItem.getAttributes().get("id").equals(coverImageId)) {
String coverImageEntryName = xmlItem.getAttributes().get("href");
if (coverImageEntryName != null && !coverImageEntryName.equals("")) {
ZipFile epubFile = null;
try {
try {
epubFile = new ZipFile(this.getZipFilePath());
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("Error initializing ZipFile: " + e.getMessage());
}
for (String entryName : this.getEntryNames()) {
// TODO: I might have to change this contains with equals.
if (entryName.contains(coverImageEntryName)) {
ZipEntry coverImageEntry = epubFile.getEntry(entryName);
InputStream inputStream;
try {
inputStream = epubFile.getInputStream(coverImageEntry);
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("IOException while reading " + entryName + " file: " + e.getMessage());
}
try {
return ContextHelper.convertIsToByteArray(inputStream);
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("IOException while converting inputStream to byte array: " + e.getMessage());
}
}
}
} finally {
try {
if (epubFile != null) {
epubFile.close();
}
} catch (IOException e) {
e.printStackTrace();
throw new ReadingException("Error closing ZipFile: " + e.getMessage());
}
}
}
}
}
}
}
return null;
}
List getEntryNames() {
return entryNames;
}
void addEntryName(String zipEntryName) {
entryNames.add(zipEntryName);
}
Container getContainer() {
return container;
}
Package getPackage() {
return opfPackage;
}
Toc getToc() {
return toc;
}
void setZipFilePath(String zipFilePath) {
this.zipFilePath = zipFilePath;
}
String getZipFilePath() {
return this.zipFilePath;
}
}