All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.dotify.formatter.impl.VolumeProvider Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
package org.daisy.dotify.formatter.impl;

import org.daisy.dotify.api.formatter.SequenceProperties.SequenceBreakBefore;
import org.daisy.dotify.common.splitter.SplitPoint;
import org.daisy.dotify.common.splitter.SplitPointCost;
import org.daisy.dotify.common.splitter.SplitPointDataSource;
import org.daisy.dotify.common.splitter.SplitPointHandler;
import org.daisy.dotify.common.splitter.SplitPointSpecification;
import org.daisy.dotify.common.splitter.StandardSplitOption;
import org.daisy.dotify.formatter.impl.core.PaginatorException;
import org.daisy.dotify.formatter.impl.page.BlockSequence;
import org.daisy.dotify.formatter.impl.page.PageImpl;
import org.daisy.dotify.formatter.impl.page.RestartPaginationException;
import org.daisy.dotify.formatter.impl.search.AnchorData;
import org.daisy.dotify.formatter.impl.search.CrossReferenceHandler;
import org.daisy.dotify.formatter.impl.search.DefaultContext;
import org.daisy.dotify.formatter.impl.search.Space;
import org.daisy.dotify.formatter.impl.search.VolumeData;
import org.daisy.dotify.formatter.impl.search.VolumeKeepPriority;
import org.daisy.dotify.formatter.impl.sheet.PageCounter;
import org.daisy.dotify.formatter.impl.sheet.SectionBuilder;
import org.daisy.dotify.formatter.impl.sheet.Sheet;
import org.daisy.dotify.formatter.impl.sheet.SheetDataSource;
import org.daisy.dotify.formatter.impl.sheet.SheetGroup;
import org.daisy.dotify.formatter.impl.sheet.SheetGroupManager;
import org.daisy.dotify.formatter.impl.sheet.SplitterLimit;
import org.daisy.dotify.formatter.impl.sheet.VolumeImpl;
import org.daisy.dotify.formatter.impl.volume.VolumeSequence;
import org.daisy.dotify.formatter.impl.volume.VolumeTemplate;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * 

Given a list of {@link BlockSequence}s, produces {@link * org.daisy.dotify.formatter.impl.common.Volume} objects one by one. The volumes are obtained * through a {@link #nextVolume() "iterator" interface}.

* *

The input is a list of {@link BlockSequence}s, which are first converted to a sequence of * "volume groups", where every group is a list of {@link BlockSequence}s, of which the first one * has a hard volume break (break-before="volume"). For every volume group a {@link * SheetDataSource} is then created, which is wrapped in a {@link SheetGroup}. All groups are * managed in a {@link SheetGroupManager}. {@link SheetDataSource}s that do not fit in a volume are * broken, using {@link SplitPointHandler}. The cost function takes into account how much the total * size of a volume deviates from the target size, the volume-break-priority of the * last sheet, and whether the {@link Sheet#isBreakable() isBreakable} constraint of the last sheet * is violated.

* *

Pre- and post-content is added to every volume based on the provided {@link * VolumeTemplate}s.

* *

One {@link PageCounter} is created for the body of the whole document, and one for every pre- * or post-content of every volume.

* * @author Joel Håkansson */ public class VolumeProvider { private static final Logger logger = Logger.getLogger(VolumeProvider.class.getCanonicalName()); private static final int DEFAULT_SPLITTER_MAX = 50; private final List blocks; private final CrossReferenceHandler crh; private SheetGroupManager groups; private final SplitPointHandler volSplitter; private int pageIndex = 0; private int currentVolumeNumber = 0; private boolean init = false; private int j = 1; private final SplitterLimit splitterLimit; private final Stack volumeTemplates; private final LazyFormatterContext context; /** * Creates a new volume provider with the specifed parameters. * * @param blocks the block sequences * @param volumeTemplates volume templates * @param context the formatter context */ VolumeProvider(List blocks, Stack volumeTemplates, LazyFormatterContext context) { this.blocks = blocks; this.crh = new CrossReferenceHandler(); this.splitterLimit = volumeNumber -> { final DefaultContext c = new DefaultContext.Builder(crh) .currentVolume(volumeNumber) .build(); Optional ot = volumeTemplates.stream().filter(t -> t.appliesTo(c)).findFirst(); if (ot.isPresent()) { return ot.get().getVolumeMaxSize(); } else { logger.fine("Found no applicable volume template."); return DEFAULT_SPLITTER_MAX; } }; this.volumeTemplates = volumeTemplates; this.context = context; this.volSplitter = new SplitPointHandler<>(); } /** * Resets the volume provider to its initial state (with some information preserved). * * @throws RestartPaginationException */ void prepare() { if (!init) { groups = new SheetGroupManager(); // make a preliminary calculation based on a contents only Iterable allUnits = prepareToPaginateWithVolumeGroups( blocks, new DefaultContext.Builder(crh).space(Space.BODY).build() ); int volCount = 0; // initial estimate of volume count for (SheetDataSource data : allUnits) { groups.add().setUnits(data); volCount += (1 + (data.getRemaining().size() - 1) / splitterLimit.getSplitterLimit(volCount + 1)); } crh.setVolumeCount(volCount); //if there is an error, we won't have a proper initialization and have to retry from the beginning init = true; } Iterable allUnits = prepareToPaginateWithVolumeGroups( blocks, new DefaultContext.Builder(crh).space(Space.BODY).build() ); int i = 0; for (SheetDataSource unit : allUnits) { groups.atIndex(i).setUnits(unit); i++; } pageIndex = 0; currentVolumeNumber = 0; groups.resetAll(); } /** * @return returns the next volume * @throws RestartPaginationException if pagination should be restarted */ VolumeImpl nextVolume() { currentVolumeNumber++; VolumeImpl volume = new VolumeImpl(crh.getOverhead(currentVolumeNumber)); List ad = new ArrayList<>(); volume.setPreVolData(updateVolumeContents(currentVolumeNumber, ad, true)); volume.setBody(nextBodyContents(currentVolumeNumber, volume.getOverhead().total(), ad)); if (logger.isLoggable(Level.FINE)) { logger.fine("Sheets in volume " + currentVolumeNumber + ": " + (volume.getVolumeSize()) + ", content:" + volume.getBodySize() + ", overhead:" + volume.getOverhead()); } volume.setPostVolData(updateVolumeContents(currentVolumeNumber, ad, false)); crh.setSheetsInVolume(currentVolumeNumber, volume.getBodySize() + volume.getOverhead().total()); //crh.setPagesInVolume(i, value); crh.setAnchorData(currentVolumeNumber, ad); crh.setOverhead(currentVolumeNumber, volume.getOverhead()); return volume; } /** * Gets the contents of the next volume. * * @param overhead the number of sheets in this volume that's not part of the main body of text * @param ad the anchor data * @return returns the contents of the next volume */ private SectionBuilder nextBodyContents(int volumeNumber, final int overhead, List ad) { groups.currentGroup().setOverheadCount(groups.currentGroup().getOverheadCount() + overhead); final int targetSheetsInVolume = splitterLimit.getSplitterLimit(volumeNumber); //Not using lambda for now, because it's noticeably slower. SplitPointCost cost = new SplitPointCost() { @Override public double getCost(SplitPointDataSource units, int index, int breakpoint) { int contentSheetTarget = targetSheetsInVolume - overhead; Sheet lastSheet = units.get(index); double priorityPenalty = 0; int sheetCount = index + 1; // Calculates a maximum offset based on the maximum possible number of sheets double range = targetSheetsInVolume * 0.4; if (!units.isEmpty()) { VolumeKeepPriority avoid = lastSheet.getAvoidVolumeBreakAfterPriority(); if (avoid.hasValue()) { // Reverses 1-9 to 9-1 with bounds control and normalizes that to [1/9, 1] double normalized = ((10 - avoid.getValue()) / 9d); // Calculates a number of sheets that a high priority can beat priorityPenalty = range * normalized; } } // sets the preferred value to targetSheetsInVolume, where cost will be 0 // including a small preference for bigger volumes double distancePenalty = Math.abs(contentSheetTarget - sheetCount) + (contentSheetTarget - sheetCount) * 0.001; int unbreakablePenalty = lastSheet.isBreakable() ? 0 : 100; return distancePenalty + priorityPenalty + unbreakablePenalty; } }; SplitPoint sp; // The data is consumed two times. Once to find the optimal break point ("find" function), // and once to do the actual split at that position ("split" function). We only record the // changes made to the CrossReferenceHandler during the second pass. Note that although no // changes are made to the CrossReferenceHandler, it can still become dirty if some info // that is requested is not available. crh.setReadOnly(); SheetDataSource data = groups.currentGroup().getUnits(); data.setCurrentVolumeNumber(volumeNumber); SheetDataSource copySource = new SheetDataSource(data); SplitPointSpecification spec = volSplitter.find(targetSheetsInVolume - overhead, copySource, cost, StandardSplitOption.ALLOW_FORCE); crh.setReadWrite(); sp = volSplitter.split(spec, data); /* sp = volSplitter.split(targetSheetsInVolume-overhead, groups.currentGroup().getUnits(), cost, StandardSplitOption.ALLOW_FORCE); */ groups.currentGroup().setUnits(sp.getTail()); List contents = sp.getHead(); int pageCount = Sheet.countPages(contents); crh.commitPageDetails(); crh.setVolumeScope(volumeNumber, pageIndex, pageIndex + pageCount); pageIndex += pageCount; SectionBuilder sb = new SectionBuilder(); boolean atFirstPageOfContents = true; for (Sheet sheet : contents) { for (PageImpl p : sheet.getPages()) { if (atFirstPageOfContents) { crh.setPageNumberOfFirstContentPageOfVolume(volumeNumber, p.getPageNumber()); List contentIdentifiers = p.getContentIdentifiers(); for (String id : p.getIdentifiers()) { boolean atStartOfPage = !contentIdentifiers.contains(id); crh.setVolumeData(id, new VolumeData(volumeNumber, atStartOfPage)); } } else { for (String id : p.getIdentifiers()) { crh.setVolumeData(id, new VolumeData(volumeNumber, false)); } } if (p.getAnchors().size() > 0) { ad.add(new AnchorData(p.getAnchors(), p.getPageNumber())); } atFirstPageOfContents = false; } sb.addSheet(sheet); } groups.currentGroup().setSheetCount(groups.currentGroup().getSheetCount() + contents.size()); groups.nextVolume(); return sb; } private SectionBuilder updateVolumeContents(int volumeNumber, List ad, boolean pre) { DefaultContext c = new DefaultContext.Builder(crh) .currentVolume(volumeNumber) .space(pre ? Space.PRE_CONTENT : Space.POST_CONTENT) .build(); try { List ib = new ArrayList<>(); for (VolumeTemplate t : volumeTemplates) { if (t.appliesTo(c)) { for (VolumeSequence seq : (pre ? t.getPreVolumeContent() : t.getPostVolumeContent())) { BlockSequence s = seq.getBlockSequence(context.getFormatterContext(), c, crh); if (s != null) { ib.add(s); } } break; } } List ret = prepareToPaginatePrePostVolumeContent(ib, c).getRemaining(); SectionBuilder sb = new SectionBuilder(); for (Sheet ps : ret) { for (PageImpl p : ps.getPages()) { for (String id : p.getIdentifiers()) { crh.setVolumeData(id, new VolumeData(volumeNumber, false)); } if (p.getAnchors().size() > 0) { ad.add(new AnchorData(p.getAnchors(), p.getPageNumber())); } } sb.addSheet(ps); } return sb; } catch (PaginatorException e) { throw new RuntimeException(e); } } /** * Convert a list of {@link BlockSequence}s from a pre- or post-content to a {@link SheetDataSource}. */ private SheetDataSource prepareToPaginatePrePostVolumeContent( List fs, DefaultContext rcontext ) throws PaginatorException { return prepareToPaginate(new PageCounter(), rcontext, null, fs); } /** * Process hard volume breaks. * *

Convert a list of {@link BlockSequence}s to a sequence of {@link SheetDataSource}. At * every BlockSequence with a hard volume break * (break-before="volume") a new SheetDataSource is started. */ private Iterable prepareToPaginateWithVolumeGroups( List fs, DefaultContext rcontext ) { List> volGroups = new ArrayList<>(); List currentGroup = new ArrayList<>(); volGroups.add(currentGroup); for (BlockSequence bs : fs) { if (bs.getSequenceProperties().getBreakBeforeType() == SequenceBreakBefore.VOLUME) { currentGroup = new ArrayList<>(); volGroups.add(currentGroup); } currentGroup.add(bs); } PageCounter pageCounter = new PageCounter(); crh.resetUniqueChecks(); return new Iterable() { @Override public Iterator iterator() { try { return prepareToPaginateWithVolumeGroups(pageCounter, rcontext, volGroups).iterator(); } catch (PaginatorException e) { throw new RuntimeException(e); } } }; } /** * Convert a list of {@link BlockSequence}s, grouped in "volume groups" that start with a hard * volume break, to a list of {@link SheetDataSource}. */ private List prepareToPaginateWithVolumeGroups( PageCounter pageCounter, DefaultContext rcontext, Iterable> volGroups ) throws PaginatorException { List ret = new ArrayList<>(); int i = 0; for (List glist : volGroups) { ret.add(prepareToPaginate(pageCounter, rcontext, i++, glist)); } return ret; } /** * Convert a list of {@link BlockSequence}s to a single {@link SheetDataSource}. */ private SheetDataSource prepareToPaginate( PageCounter pageCounter, DefaultContext rcontext, Integer volumeGroup, List seqs ) throws PaginatorException { return new SheetDataSource(pageCounter, context.getFormatterContext(), rcontext, volumeGroup, seqs); } /** * Informs the volume provider that the caller has finished requesting volumes. * Note: only use after all volumes have been calculated. * * @return returns true if the volumes can be accepted, false otherwise */ boolean done() { crh.commitBreakable(); crh.commitTransitionProperties(); crh.trimPageDetails(); crh.setVolumeCount(groups.getVolumeCount()); crh.setSheetsInDocument(groups.countTotalSheets()); //crh.setPagesInDocument(value); if (!crh.isDirty()) { return true; } else { crh.setDirty(false); logger.info("Things didn't add up, running another iteration (" + j + ")"); } j++; return false; } boolean hasNext() { return groups.hasNext(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy