All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.semanticcms.core.servlet.CapturePage Maven / Gradle / Ivy

/*
 * semanticcms-core-servlet - Java API for modeling web page content and relationships in a Servlet environment.
 * Copyright (C) 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022  AO Industries, Inc.
 *     [email protected]
 *     7262 Bull Pen Cir
 *     Mobile, AL 36695
 *
 * This file is part of semanticcms-core-servlet.
 *
 * semanticcms-core-servlet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * semanticcms-core-servlet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with semanticcms-core-servlet.  If not, see .
 */

package com.semanticcms.core.servlet;

import com.aoapps.collections.AoCollections;
import com.aoapps.concurrent.Executor;
import com.aoapps.encoding.Doctype;
import com.aoapps.encoding.Serialization;
import com.aoapps.encoding.servlet.DoctypeEE;
import com.aoapps.encoding.servlet.SerializationEE;
import com.aoapps.html.any.AnyDocument;
import com.aoapps.lang.NullArgumentException;
import com.aoapps.lang.concurrent.ExecutionExceptions;
import com.aoapps.servlet.ServletUtil;
import com.aoapps.servlet.attribute.ScopeEE;
import com.aoapps.servlet.http.Dispatcher;
import com.aoapps.servlet.http.HttpServletUtil;
import com.aoapps.servlet.http.NullHttpServletResponseWrapper;
import com.aoapps.servlet.subrequest.HttpServletSubRequest;
import com.aoapps.servlet.subrequest.HttpServletSubRequestWrapper;
import com.aoapps.servlet.subrequest.HttpServletSubResponse;
import com.aoapps.servlet.subrequest.HttpServletSubResponseWrapper;
import com.aoapps.servlet.subrequest.IHttpServletSubRequest;
import com.aoapps.servlet.subrequest.IHttpServletSubResponse;
import com.aoapps.servlet.subrequest.UnmodifiableCopyHttpServletRequest;
import com.aoapps.servlet.subrequest.UnmodifiableCopyHttpServletResponse;
import com.aoapps.tempfiles.TempFileContext;
import com.aoapps.tempfiles.servlet.TempFileContextEE;
import com.semanticcms.core.model.Page;
import com.semanticcms.core.model.PageRef;
import com.semanticcms.core.model.PageReferrer;
import com.semanticcms.core.servlet.impl.PageImpl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.jsp.SkipPageException;

@SuppressWarnings("UseOfSystemOutOrSystemErr")
public final class CapturePage {

  private static final ScopeEE.Request.Attribute REQUEST_ATTRIBUTE =
      ScopeEE.REQUEST.attribute(CapturePage.class.getName());

  private static final boolean CONCURRENT_TRAVERSALS_ENABLED = true;

  private static final boolean DEBUG = false;
  private static final boolean DEBUG_NOW = false;

  /**
   * Gets the capture context or null if none occurring.
   */
  public static CapturePage getCaptureContext(ServletRequest request) {
    return REQUEST_ATTRIBUTE.context(request).get();
  }

  /**
   * Captures a page.
   * The capture is always done with a request method of "GET", even when the enclosing request is a different method.
   * Also validates parent-child and child-parent relationships if the other related pages happened to already be captured and cached.
   * 

* TODO: Within the scope of one request and cache, avoid capturing the same page at the same time (CurrencyLimiter applied to sub requests), * is there a reasonable way to catch deadlock conditions? *

* * @param level The minimum page capture level, note that a higher level might be substituted, such as a META capture in place of a PAGE request. */ public static Page capturePage( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, PageReferrer pageReferrer, CaptureLevel level ) throws ServletException, IOException { return capturePage( servletContext, request, response, pageReferrer, level, CacheFilter.getCache(request) ); } /** * Captures a page. * The capture is always done with a request method of "GET", even when the enclosing request is a different method. * Also validates parent-child and child-parent relationships if the other related pages happened to already be captured and cached. * * @param cache See {@link CacheFilter#getCache(javax.servlet.ServletRequest)} */ public static Page capturePage( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, PageReferrer pageReferrer, CaptureLevel level, Cache cache ) throws ServletException, IOException { return capturePage(servletContext, request, response, new HttpServletSubRequestWrapper(request), new HttpServletSubResponseWrapper(response, TempFileContextEE.get(request)), pageReferrer, level, cache ); } private static Page capturePage( final ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, final IHttpServletSubRequest subRequest, final IHttpServletSubResponse subResponse, PageReferrer pageReferrer, final CaptureLevel level, Cache cache ) throws ServletException, IOException { NullArgumentException.checkNotNull(level, "level"); PageRef pageRef = pageReferrer.getPageRef(); // Don't use cache for full body captures boolean useCache = level != CaptureLevel.BODY; // cacheKey will be null when this capture is not to be cached final Cache.CaptureKey cacheKey; Page capturedPage; if (useCache) { // Check the cache cacheKey = new Cache.CaptureKey(pageRef, level); capturedPage = cache.get(cacheKey); // Set useCache = false to not put back into the cache unnecessarily below useCache = capturedPage == null; } else { cacheKey = null; capturedPage = null; } if (capturedPage == null) { // Clear request values that break captures CurrentNode.setCurrentNode(subRequest, null); CurrentPage.setCurrentPage(subRequest, null); // Set the content type Serialization currentSerialization = SerializationEE.getDefault(servletContext, subRequest); SerializationEE.set(subRequest, currentSerialization); ServletUtil.setContentType(subResponse, currentSerialization.getContentType(), AnyDocument.ENCODING); // Set the default doctype for all captures DoctypeEE.set(subRequest, Doctype.DEFAULT); // Set new capture context CaptureLevel.setCaptureLevel(subRequest, level); CapturePage captureContext = new CapturePage(); REQUEST_ATTRIBUTE.context(subRequest).set(captureContext); // Always capture as "GET" request subRequest.setMethod(HttpServletUtil.METHOD_GET); // Include the page resource, discarding any direct output final String capturePath = pageRef.getServletPath(); try { // Clear PageContext on include PageContext.newPageContextSkip( null, null, null, () -> Dispatcher.include( servletContext, capturePath, subRequest, // Discard all output new NullHttpServletResponseWrapper(subResponse) ) ); } catch (SkipPageException e) { // An individual page may throw SkipPageException which only terminates // the capture, not the request overall } capturedPage = captureContext.getCapturedPage(); if (capturedPage == null) { throw new ServletException("No page captured, page=" + capturePath); } PageRef capturedPageRef = capturedPage.getPageRef(); if (!capturedPageRef.equals(pageRef)) { throw new ServletException( "Captured page has unexpected pageRef. Expected (" + pageRef.getBookName() + ", " + pageRef.getPath() + ") but got (" + capturedPageRef.getBookName() + ", " + capturedPageRef.getPath() + ')' ); } } assert capturedPage != null; if (useCache) { // Add to cache cache.put(cacheKey, capturedPage); } else { // Body capture, performance is not the main objective, perform full child and parent verifications, // this will mean a "View All" will perform thorough verifications. if (level == CaptureLevel.BODY) { PageImpl.fullVerifyParentChild(servletContext, request, response, capturedPage); } } return capturedPage; } /** * Captures a page in the current page context. * The capture is always done with a request method of "GET", even when the enclosing request is a different method. * Also validates parent-child and child-parent relationships if the other related pages happened to already be captured and cached. * * @see #capturePage(javax.servlet.ServletContext, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, com.semanticcms.core.model.PageReferrer, com.semanticcms.core.servlet.CaptureLevel, com.semanticcms.core.servlet.Cache) * @see PageContext */ public static Page capturePage( PageReferrer pageReferrer, CaptureLevel level ) throws ServletException, IOException { return capturePage( PageContext.getServletContext(), PageContext.getRequest(), PageContext.getResponse(), pageReferrer, level ); } /** * Captures multiple pages. * * @param pageReferrers The pages that should be captured. This set will be iterated only once during this operation. * * @return map from pageRef to page, with iteration order equal to the provided pageRefs parameter. * * @see #capturePage(javax.servlet.ServletContext, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, com.semanticcms.core.model.PageReferrer, com.semanticcms.core.servlet.CaptureLevel) */ public static Map capturePages( final ServletContext servletContext, final HttpServletRequest request, final HttpServletResponse response, Set pageReferrers, final CaptureLevel level ) throws ServletException, IOException { int size = pageReferrers.size(); if (size == 0) { return Collections.emptyMap(); } else if (size == 1) { PageRef pageRef = pageReferrers.iterator().next().getPageRef(); return Collections.singletonMap( pageRef, capturePage(servletContext, request, response, pageRef, level) ); } else { final Cache cache = CacheFilter.getCache(request); Map results = AoCollections.newLinkedHashMap(size); List notCachedList = new ArrayList<>(size); if (level != CaptureLevel.BODY) { // Check cache before queuing on different threads, building list of those not in cache for (PageReferrer pageReferrer : pageReferrers) { PageRef pageRef = pageReferrer.getPageRef(); Page page = cache.get(pageRef, level); if (page != null) { // Use cached value results.put(pageRef, page); } else { // Will capture below notCachedList.add(pageRef); } } } else { notCachedList.addAll(pageReferrers); } int notCachedSize = notCachedList.size(); if ( notCachedSize > 1 && ConcurrencyCoordinator.useConcurrentSubrequests(request) ) { // Concurrent implementation final TempFileContext tempFileContext = TempFileContextEE.get(request); final HttpServletRequest threadSafeReq = new UnmodifiableCopyHttpServletRequest(request); final HttpServletResponse threadSafeResp = new UnmodifiableCopyHttpServletResponse(response); // Create the tasks List> tasks = new ArrayList<>(notCachedSize); for (int i = 0; i < notCachedSize; i++) { final PageRef pageRef = notCachedList.get(i).getPageRef(); tasks.add( () -> capturePage( servletContext, threadSafeReq, threadSafeResp, new HttpServletSubRequest(threadSafeReq), new HttpServletSubResponse(threadSafeResp, tempFileContext), pageRef, level, cache ) ); } List notCachedResults; try { notCachedResults = SemanticCMS.getInstance(servletContext).getExecutors().getPerProcessor().callAll(tasks); } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); throw new ServletException(e); } catch (ExecutionException e) { // Maintain expected exception types while not losing stack trace ExecutionExceptions.wrapAndThrow(e, IOException.class, IOException::new); throw new ServletException(e); } for (int i = 0; i < notCachedSize; i++) { results.put( notCachedList.get(i).getPageRef(), notCachedResults.get(i) ); } } else { // Sequential implementation for (PageReferrer pageReferrer : notCachedList) { PageRef pageRef = pageReferrer.getPageRef(); results.put( pageRef, capturePage(servletContext, request, response, pageRef, level, cache) ); } } return Collections.unmodifiableMap(results); } } /** * Captures multiple pages in the current page context. * * @see #capturePages(javax.servlet.ServletContext, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, java.util.Set, com.semanticcms.core.servlet.CaptureLevel) * @see PageContext */ public static Map capturePages( Set pageReferrers, CaptureLevel level ) throws ServletException, IOException { return capturePages( PageContext.getServletContext(), PageContext.getRequest(), PageContext.getResponse(), pageReferrers, level ); } @FunctionalInterface public static interface TraversalEdges { /** * Gets the child pages to consider for the given page during a traversal. * This may be called more than once per page per traversal and must give consistent results each call. * The returned collection may be iterated more than once and must give consistent results each iteration. * TODO: Make this Iterable? */ Collection getEdges(Page page); } @FunctionalInterface public static interface EdgeFilter { /** * Each edge returned is filtered through this, must return true for the * edge to be considered. This filter is not called when the edge has * already been visited, however it might be called more than once during * some concurrent implementations. This filter must give consistent results * when called more than once. */ boolean applyEdge(PageRef edge); } @FunctionalInterface public static interface PageHandler { /** * Called after page captured but before or after children captured. * * @return non-null value to terminate the traversal and return this value */ T handlePage(Page page) throws ServletException, IOException; } @FunctionalInterface public static interface PageDepthHandler { /** * Called after page captured but before or after children captured. * Provided the current depth in the page tree, where 0 is the root node. * * @return non-null value to terminate the traversal and return this value */ T handlePage(Page page, int depth) throws ServletException, IOException; } /** * @see #traversePagesAnyOrder(javax.servlet.ServletContext, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, com.semanticcms.core.model.Page, com.semanticcms.core.servlet.CaptureLevel, com.semanticcms.core.servlet.CapturePage.PageHandler, com.semanticcms.core.servlet.CapturePage.TraversalEdges, com.semanticcms.core.servlet.CapturePage.EdgeFilter) */ public static T traversePagesAnyOrder( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, PageReferrer rootReferrer, CaptureLevel level, PageHandler pageHandler, TraversalEdges edges, EdgeFilter edgeFilter ) throws ServletException, IOException { return traversePagesAnyOrder( servletContext, request, response, CapturePage.capturePage( servletContext, request, response, rootReferrer, level ), level, pageHandler, edges, edgeFilter ); } /** *

* Performs potentially concurrent traversal of the pages in any order. * Each page is only visited once. *

*

* This may at times appear to give results in a predictable order, but this must not be relied upon. * For example, with all items already in cache it might end up giving results in a breadth-first order, * whereas the same situation on a single-CPU system might end up in a depth-first order. The ordering * is not guaranteed in any way and should not be relied upon. *

*

* pageHandler, edges, and edgeFilter are all called on the main thread (the thread invoking this method). *

*

* Returns when the first pageHandler returns a non-null object. * Once a pageHandler returns non-null, no other pageHandler, * edges, or edgeFilter will be called. *

*

* Due to pageHandlers, edges, and edgeFilter all being called on the main thread, slow implementations * of these methods may limit effective concurrency. A future improvement might be to allow for concurrent * execution of handlers. *

*

* If a page is already in the cache, it is fetched directly instead of passed-off to a separate * thread for capture. Thus, if all is cached, this method will not perform with any concurrency. *

* * @param level The captureLevel. A higher captureLevel may be returned when it is available, such * as a META capture in place of a PAGE request. * * @param pageHandler Optional, null when not needed, called before a page visits it's edges. * If returns a non-null object, the traversal is terminated and the provided object * is returned. * * @param edges Provides the set of pages to looked from the given page. Any edge provided that * has already been visited will not be visited again. * * @param edgeFilter Optional, null when not needed and will match all edges. */ public static T traversePagesAnyOrder( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, Page root, CaptureLevel level, final PageHandler pageHandler, TraversalEdges edges, EdgeFilter edgeFilter ) throws ServletException, IOException { Cache cache = level == CaptureLevel.BODY ? null : CacheFilter.getCache(request); if ( CONCURRENT_TRAVERSALS_ENABLED && ConcurrencyCoordinator.useConcurrentSubrequests(request) ) { return traversePagesAnyOrderConcurrent( servletContext, request, response, root, level, pageHandler, edges, edgeFilter, cache, null ); } else { return traversePagesDepthFirstRecurseSequential(servletContext, request, response, root, 0, level, (Page page, int depth) -> pageHandler.handlePage(page), edges, edgeFilter, null, TempFileContextEE.get(request), cache, new HashSet<>() ); } } private static PageRef getNext(PageRef[] nextHint) { return nextHint == null ? null : nextHint[0]; } /** * @param nextHint an optional one-element array containing what is needed next. * if non-null and contains non-null element, any future task for that page * that is not yet scheduled will be moved to the front of the list. * TODO: Do max concurrency - 1, except nextHint? Then can always schedule at least nextHint immediately. * TODO: Once we get a result matching nextHint, move its children to the top of the stack so we get them first, let first child of nextHint occupy last slot. */ private static T traversePagesAnyOrderConcurrent( final ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, Page page, final CaptureLevel level, PageHandler pageHandler, TraversalEdges edges, EdgeFilter edgeFilter, final Cache cache, PageRef[] nextHint ) throws ServletException, IOException { // Created when first needed to avoid the overhead when fully operating from cache HttpServletRequest threadSafeReq = null; HttpServletResponse threadSafeResp = null; // Find the executor final Executor concurrentSubrequestExecutor; final int preferredConcurrency; { // Scoping block final Executors executors = SemanticCMS.getInstance(servletContext).getExecutors(); concurrentSubrequestExecutor = executors.getPerProcessor(); preferredConcurrency = executors.getPreferredConcurrency(); assert preferredConcurrency > 1 : "Single-CPU systems should never make it to this concurrent implementation"; } final TempFileContext tempFileContext = TempFileContextEE.get(request); int maxSize = 0; // The which pages have been visited final Set visited = new HashSet<>(); // The pages that are currently ready for processing final List readyPages = new ArrayList<>(); // New ready pages, used to add in the correct order to readyPages based on traversal direction hints final List newReadyPages = new ArrayList<>(); // Track which futures have been completed (callable put itself here once done) final BlockingQueue finishedFutures = new ArrayBlockingQueue<>(preferredConcurrency); // Does not immediately submit to the executor, waits until the readyPages are exhausted final List edgesToAdd = new ArrayList<>(); // New edges to add, used to add in the correct order to edgesToAdd based on traversal direction hints final List newEdgesToAdd = new ArrayList<>(); // The futures are queued, active, or finished but not yet processed by main thread final Map> futures = AoCollections.newHashMap(preferredConcurrency); try { // Kick it off visited.add(page.getPageRef()); readyPages.add(page); // The most recently seen nextHint PageRef next = getNext(nextHint); do { // Handle all the ready pages (using stack-ordering to achieve depth-first ordering from cache) while (!readyPages.isEmpty()) { Page readyPage = null; if (next != null) { // Search readyPages for "next", searching backwards assuming depth-first // TODO: This is sequential search for (int i = readyPages.size() - 1; i >= 0; i--) { Page rp = readyPages.get(i); if (rp.getPageRef().equals(next)) { if (DEBUG_NOW && i != (readyPages.size() - 1)) { System.err.println("Found next in readyPages at index " + i + ", size = " + readyPages.size()); } readyPage = rp; readyPages.remove(i); break; } } } if (readyPage == null) { // Pop off stack readyPage = readyPages.remove(readyPages.size() - 1); } if (pageHandler != null) { T result = pageHandler.handlePage(readyPage); if (result != null) { return result; } } // Update next from any hint next = getNext(nextHint); // Add any children not yet visited for (PageReferrer edgeRef : edges.getEdges(readyPage)) { PageRef edge = edgeRef.getPageRef(); if ( !visited.contains(edge) && ( edgeFilter == null || edgeFilter.applyEdge(edge) ) ) { visited.add(edge); // Check cache before going to concurrency Page cached; if (level == CaptureLevel.BODY) { cached = null; } else { cached = cache.get(edge, level); } if (cached != null) { newReadyPages.add(cached); } else { newEdgesToAdd.add(edge); } } } // Add to readyPages in backwards order, so they pop off the top in correct traversal order while (!newReadyPages.isEmpty()) { readyPages.add(newReadyPages.remove(newReadyPages.size() - 1)); } } // Add to edgesToAdd in backwards order, so they pop off the top in correct traversal order while (!newEdgesToAdd.isEmpty()) { edgesToAdd.add(newEdgesToAdd.remove(newEdgesToAdd.size() - 1)); } // Run on this thread if there is only one if (futures.isEmpty() && edgesToAdd.size() == 1) { if (DEBUG) { System.err.println("There is only one, running on current thread"); } readyPages.add( capturePage( servletContext, request, response, edgesToAdd.remove(0), level, cache ) ); } else { if (!edgesToAdd.isEmpty()) { if (threadSafeReq == null) { threadSafeReq = new UnmodifiableCopyHttpServletRequest(request); threadSafeResp = new UnmodifiableCopyHttpServletResponse(response); } final HttpServletRequest finalThreadSafeReq = threadSafeReq; final HttpServletResponse finalThreadSafeResp = threadSafeResp; // Use hint, make sure it is end of edgesToAdd if in the list if (next != null) { // TODO: This is sequential search int i = edgesToAdd.lastIndexOf(next); if (i != -1) { if (DEBUG_NOW && i != (edgesToAdd.size() - 1)) { System.err.println("Found next in edgesToAdd at index " + i + ", size = " + edgesToAdd.size()); } edgesToAdd.add(edgesToAdd.remove(i)); } } // Submit to the futures, but only up to preferredConcurrency while ( futures.size() < preferredConcurrency && !edgesToAdd.isEmpty() ) { final PageRef edge = edgesToAdd.remove(edgesToAdd.size() - 1); futures.put( edge, concurrentSubrequestExecutor.submit(() -> { try { return capturePage( servletContext, finalThreadSafeReq, finalThreadSafeResp, new HttpServletSubRequest(finalThreadSafeReq), new HttpServletSubResponse(finalThreadSafeResp, tempFileContext), edge, level, cache ); } finally { // This one is ready now // There should always be enough room in the queue since the futures are limited going in finishedFutures.add(edge); } }) ); } if (DEBUG) { int futuresSize = futures.size(); int edgesToAddSize = edgesToAdd.size(); int size = futuresSize + edgesToAddSize; if (size > maxSize) { if (DEBUG) { System.err.println("futures.size()=" + futuresSize + ", edgesToAdd.size()=" + edgesToAddSize); } maxSize = size; } } } // Continue until no more futures if (!futures.isEmpty()) { Future future = null; // Favor nextHint on which future to consume first if (next != null) { Future nextsFuture = futures.get(next); if (nextsFuture.isDone()) { if (DEBUG_NOW) { PageRef nextFinished = finishedFutures.peek(); if (!nextFinished.equals(next)) { System.err.println("Found nextHint done early in futures: " + next + ", nextFinished = " + nextFinished); } } if (!finishedFutures.remove(next)) { throw new AssertionError("done future not removed from finishedFutures"); } futures.remove(next); future = nextsFuture; } } if (future == null) { // wait until a result is available future = futures.remove(finishedFutures.take()); } readyPages.add(future.get()); } } } while (!readyPages.isEmpty()); // Traversal over, not found return null; } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); throw new ServletException(e); } catch (ExecutionException e) { // Maintain expected exception types while not losing stack trace ExecutionExceptions.wrapAndThrow(e, IOException.class, IOException::new); throw new ServletException(e); } finally { // Always cancel unfinished futures on the way out, but do not delay for any in progress if (!futures.isEmpty()) { if (DEBUG) { System.err.println("Canceling " + futures.size() + " futures"); } for (Future future : futures.values()) { future.cancel(false); } } } } /** * @see #traversePagesDepthFirst(javax.servlet.ServletContext, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, com.semanticcms.core.model.Page, com.semanticcms.core.servlet.CaptureLevel, com.semanticcms.core.servlet.CapturePage.PageDepthHandler, com.semanticcms.core.servlet.CapturePage.TraversalEdges, com.semanticcms.core.servlet.CapturePage.EdgeFilter, com.semanticcms.core.servlet.CapturePage.PageDepthHandler) */ public static T traversePagesDepthFirst( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, PageReferrer rootReferrer, CaptureLevel level, PageDepthHandler preHandler, TraversalEdges edges, EdgeFilter edgeFilter, PageDepthHandler postHandler ) throws ServletException, IOException { return traversePagesDepthFirst( servletContext, request, response, CapturePage.capturePage( servletContext, request, response, rootReferrer, level ), level, preHandler, edges, edgeFilter, postHandler ); } /** *

* Performs a consistent-ordered, potentially concurrent, depth-first traversal of the pages. * Each page is only visited once. *

*

* preHandler, edges, edgeFilter, and postHandler are all called on the main thread (the thread invoking this method). *

*

* Returns when the first preHandler or postHandler returns a non-null object. * Once a preHandler or postHandler returns non-null, no other preHandler, * edges, edgeFilter, or postHandler will be called. *

*

* Due to preHandlers, edges, edgeFilter, and postHandler all being called on the main thread, slow implementations * of these methods may limit effective concurrency. A future improvement might be to allow for concurrent * execution of handlers. *

*

* If a page is already in the cache, it is fetched directly instead of passed-off to a separate * thread for capture. Thus, if all is cached, this method will not perform with any concurrency. *

* * @param level The captureLevel. A higher captureLevel may be returned when it is available, such * as a META capture in place of a PAGE request. * * @param preHandler Optional, null when not needed, called before a page visits it's edges. * If returns a non-null object, the traversal is terminated and the provided object * is returned. * * @param edges Provides the set of pages to looked from the given page. Any edge provided that * has already been visited will not be visited again. * * @param edgeFilter Optional, null when not needed and will match all edges. * * @param postHandler Optional, null when not needed, called before a page visits it's edges. * If returns a non-null object, the traversal is terminated and the provided object * is returned. */ public static T traversePagesDepthFirst( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, Page root, CaptureLevel level, PageDepthHandler preHandler, TraversalEdges edges, EdgeFilter edgeFilter, PageDepthHandler postHandler ) throws ServletException, IOException { Cache cache = level == CaptureLevel.BODY ? null : CacheFilter.getCache(request); if ( CONCURRENT_TRAVERSALS_ENABLED && ConcurrencyCoordinator.useConcurrentSubrequests(request) ) { return traversePagesDepthFirstConcurrent( servletContext, request, response, root, level, preHandler, edges, edgeFilter, postHandler, cache ); } else { return traversePagesDepthFirstRecurseSequential(servletContext, request, response, root, 0, level, preHandler, edges, edgeFilter, postHandler, TempFileContextEE.get(request), cache, new HashSet<>() ); } } /** * Simple sequential implementation. */ private static T traversePagesDepthFirstRecurseSequential( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, Page page, int depth, CaptureLevel level, PageDepthHandler preHandler, TraversalEdges edges, EdgeFilter edgeFilter, PageDepthHandler postHandler, TempFileContext tempFileContext, Cache cache, Set visited ) throws ServletException, IOException { if (!visited.add(page.getPageRef())) { throw new AssertionError(); } if (preHandler != null) { T result = preHandler.handlePage(page, depth); if (result != null) { return result; } } for (PageReferrer edgeRef : edges.getEdges(page)) { PageRef edge = edgeRef.getPageRef(); if ( !visited.contains(edge) && ( edgeFilter == null || edgeFilter.applyEdge(edge) ) ) { T result = traversePagesDepthFirstRecurseSequential( servletContext, request, response, CapturePage.capturePage( servletContext, request, response, edge, level, cache ), depth + 1, level, preHandler, edges, edgeFilter, postHandler, tempFileContext, cache, visited ); if (result != null) { return result; } } } if (postHandler != null) { T result = postHandler.handlePage(page, depth); if (result != null) { return result; } } return null; } private static T traversePagesDepthFirstConcurrent( ServletContext servletContext, HttpServletRequest request, HttpServletResponse response, final Page page, CaptureLevel level, final PageDepthHandler preHandler, final TraversalEdges edges, final EdgeFilter edgeFilter, final PageDepthHandler postHandler, Cache cache ) throws ServletException, IOException { // Caches the results of edges call, to fit within specification that it will only be called once per page. // This also prevents the chance that caller can give different results or change the collection during traversal. // The next item is desired is shared with the underlying traversal final PageRef[] nextHint = new PageRef[]{page.getPageRef()}; T result = traversePagesAnyOrderConcurrent( servletContext, request, response, page, level, new PageHandler<>() { // All of the edges visited or already set as a next final Set visited = new HashSet<>(); // The already resolved parents, used for postHandler final List parents = new ArrayList<>(); // The next node that is to be processed, highest on list is active final List nexts = new ArrayList<>(); // Those that are to be done after what is next final List> afters = new ArrayList<>(); // The set of nodes we've received but are not yet ready to process Map received = null; // Kick it off { PageRef pageRef = page.getPageRef(); visited.add(pageRef); nexts.add(pageRef); Iterator empty = Collections.emptyIterator(); afters.add(empty); } private PageRef findNext(Iterator after) { while (after.hasNext()) { PageRef possNext = after.next().getPageRef(); if ( !visited.contains(possNext) && ( edgeFilter == null || edgeFilter.applyEdge(possNext) ) ) { return possNext; } } return null; } @Override public T handlePage(Page page) throws ServletException, IOException { PageRef pageRef = page.getPageRef(); // page and pageRef match, but sometimes we have a pageRef with a null page (indicating unknown) int index = nexts.size() - 1; if (DEBUG_NOW) { if (pageRef.equals(nextHint[0])) { System.err.println("Got nextHint from underlying traversal: " + pageRef); } } if (pageRef.equals(nexts.get(index))) { do { if (DEBUG) { System.err.println("pre.: " + pageRef); } if (preHandler != null) { T preResult = preHandler.handlePage(page, parents.size()); if (preResult != null) { return preResult; } } // Find the first edge that we still need, if any Iterator after = edges.getEdges(page).iterator(); PageRef next = findNext(after); if (next != null) { if (DEBUG) { System.err.println("next: " + next); } // Have at least one child, not ready for our postHandler yet // Make sure we only look for a given edge once visited.add(next); // Push child parents.add(page); nexts.add(next); afters.add(after); nextHint[0] = next; index++; page = null; pageRef = next; } else { // No children to wait for, run postHandlers and move to next while (true) { if (DEBUG) { System.err.println("post: " + pageRef); } if (postHandler != null) { T postResult = postHandler.handlePage(page, parents.size()); if (postResult != null) { return postResult; } } next = findNext(afters.get(index)); if (next != null) { if (DEBUG) { System.err.println("next: " + next); } // Make sure we only look for a given edge once visited.add(next); nexts.set(index, next); nextHint[0] = next; page = null; pageRef = next; break; } else { // Pop parent afters.remove(index); nexts.remove(index); index--; if (index < 0) { // Nothing left to check, all postHandlers done nextHint[0] = null; return null; } else { page = parents.remove(index); pageRef = page.getPageRef(); } } } } } while ( page != null || ( received != null && (page = received.remove(pageRef)) != null ) ); if (DEBUG_NOW) { System.err.println("nextHint now: " + nextHint[0]); } } else { if (received == null) { received = new HashMap<>(); } received.put(pageRef, page); if (DEBUG_NOW) { System.err.println("Received " + pageRef + ", size = " + received.size() + ", next = " + nextHint[0]); } } return null; } }, edges, edgeFilter, cache, nextHint ); /* TODO assert result != null || parents.isEmpty(); assert result != null || nexts.isEmpty(); assert result != null || afters.isEmpty(); assert result != null || received.isEmpty(); */ return result; } private CapturePage() { // Do nothing } private Page capturedPage; public void setCapturedPage(Page capturedPage) { NullArgumentException.checkNotNull(capturedPage, "page"); if (this.capturedPage != null) { throw new IllegalStateException( "Cannot capture more than one page: first page=" + this.capturedPage.getPageRef() + ", second page=" + capturedPage.getPageRef() ); } this.capturedPage = capturedPage; } private Page getCapturedPage() { return capturedPage; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy