All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.crawler.restlet.PagedRepresentation Maven / Gradle / Ivy

The newest version!
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
 
package org.archive.crawler.restlet;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.LongRange;
import org.archive.modules.fetcher.FetchStatusCodes;
import org.archive.util.FileUtils;
import org.eclipse.jetty.http.HttpStatus;
import org.restlet.data.CharacterSet;
import org.restlet.data.Form;
import org.restlet.data.MediaType;
import org.restlet.data.Reference;
import org.restlet.representation.CharacterRepresentation;
import org.restlet.representation.FileRepresentation;

/**
 * Representation wrapping a FileRepresentation, displaying its contents
 * in batches of lines at a time, with forward and backward navigation. 
 * 
 * @author gojomo
 */
public class PagedRepresentation extends CharacterRepresentation {
    // passed-in at construction
    /** wrapped FileRepresentation **/
    protected FileRepresentation fileRepresentation;
    /** wrapped EnhDirectoryResource; used to formulate self-links **/
    protected EnhDirectoryResource dirResource;
    
    /** position in file around which to fetch lines **/
    protected long position;
    /** desired line count; negative to go back from position; default 128 **/
    protected int lineCount;
    /** whether to display lines in reversed order (latest first) **/
    protected boolean reversedOrder; 
    
    // created when file is scanned
    /** text lines **/
    protected List lines;
    /** position range [start-of-first-line, past-end-of-last-line] in file **/
    protected LongRange range;
    /** File **/ 
    protected File file; 
    // TODO: maybe, freeze length for more consistent display of growing files
    // (now, as length/%/bumper are written after lines retrieved, they 
    // sometimes are indicative the file has grown before the page is 
    // even rendered)
    
    public PagedRepresentation(FileRepresentation representation,
            EnhDirectoryResource resource, String pos, String lines,
            String reverse) {
        super(MediaType.TEXT_HTML);
        fileRepresentation = representation;
        dirResource = resource; 
        
        position = StringUtils.isBlank(pos) ? 0 : Long.parseLong(pos);
        lineCount = StringUtils.isBlank(lines) ? 128 : Integer.parseInt(lines);
        reversedOrder = "y".equals(reverse);
        
        // TODO: remove if not necessary in future?
        setCharacterSet(CharacterSet.UTF_8);
    }

    @Override
    public Reader getReader() throws IOException {
        int estimatedSize = (Math.abs(lineCount) * 128) + 500; 
        StringWriter writer = new StringWriter(estimatedSize);
        write(writer); 
        return new StringReader(writer.toString());
    }

    /**
     * Actually read the requested lines, and reverses if appropriate. 
     * 
     * If at file start, refuses to show fewer lines than are possible
     * ('bounces' against start). 
     * 
     * @throws IOException
     */
    protected void loadLines() throws IOException {
        this.file = fileRepresentation.getFile();
        this.lines = new LinkedList();
        this.range = FileUtils.pagedLines(file, position, lineCount, lines, 128);
        // bounce against the front of the file: don't show runt (fewer
        // lines than requested) unless absolutely necessary)
        if(lines.size()();
            this.range = FileUtils.pagedLines(file, 0, Math.abs(lineCount), lines, 128);
        }
        if(reversedOrder) {
            Collections.reverse(lines);
        }
    }
    
    /** 
     * Write the paged HTML. 
     * 
     * @see org.restlet.representation.Representation#write(java.io.Writer)
     */
    @Override
    public void write(Writer writer) throws IOException {
        loadLines();
        
        PrintWriter pw = new PrintWriter(writer); 
        pw.println("Paged view: "+file);
        emitControls(pw);

        Function syntaxHighlighter = Function.identity();
        if (file.getName().equals("crawl.log")) {
            pw.println("");
            syntaxHighlighter = this::highlightCrawlLogLine;
        }

        pw.println("
");
        emitBumper(pw, true);
        for(String line : lines) {
            pw.println(syntaxHighlighter.apply(StringEscapeUtils.escapeHtml(line)));
        }
        emitBumper(pw, false);
        pw.println("
"); emitControls(pw); } /** * Map of fetch status codes to names. */ private static final Map FETCH_STATUS_NAMES = new HashMap<>(); static { for (Field field : FetchStatusCodes.class.getDeclaredFields()) { if (!Modifier.isStatic(field.getModifiers())) continue; if (!field.getType().equals(int.class)) continue; try { FETCH_STATUS_NAMES.put((Integer)field.get(null), field.getName()); } catch (IllegalAccessException e) { throw new RuntimeException(e); } } } private static final Pattern CRAWL_LOG_PATTERN = Pattern.compile("([^ ]+ +)(-?[0-9]+)( +.*)"); /** * Performs basic syntax highlighting of a crawl log line. Assumes the line is already HTML escaped. */ public String highlightCrawlLogLine(String line) { Matcher m = CRAWL_LOG_PATTERN.matcher(line); if (m.matches()) { String date = m.group(1); String status = m.group(2); String rest = m.group(3); int code = Integer.parseInt(status); String clazz = ""; if (code < 0) { clazz = "status-neg"; } else if (code >= 200 && code <= 299) { clazz = "status-2xx"; } else if (code >= 300 && code <= 399) { clazz = "status-3xx"; } else if (code >= 400 && code <= 499) { clazz = "status-4xx"; } else if (code >= 500 && code <= 599) { clazz = "status-5xx"; } String reason = FETCH_STATUS_NAMES.get(code); if (reason == null) reason = HttpStatus.getMessage(code); return date + "" + status + "" + rest; } else { return line; } } /** * Emit a "start" or "EOF" bumper as appropriate to prominently * indicate if page borders start- or end- of-file. * * @param pw PrintWriter * @param atTop boolean, true if at top of page */ protected void emitBumper(PrintWriter pw, boolean atTop) { if((!reversedOrder ^ atTop)&&(range.getMaximumLong()==file.length())) { pw.println("«EOF»"); return; } if((reversedOrder ^ atTop)&&(range.getMinimumLong()==0)) { pw.println("«START»"); } } /** * Emit the navigational controls. * * TODO: ugh! templatize, reduce duplication as possible * @param pw PrintWriter */ protected void emitControls(PrintWriter pw) { pw.println(""); if(reversedOrder) { pw.print(""); pw.println(""); pw.println(""); pw.println("
"); pw.print("« end"); pw.print("‹ later"); pw.println("bytes " +range.getMaximumLong() +"-"+range.getMinimumLong() +"/"+file.length() +" " +(int)(100*(range.getMaximumLong()/(float)file.length())) +"%"); pw.print("earlier ›"); pw.print("start »"); pw.println(""); pw.println("forward"); pw.println("| reversed"); } else { pw.print(""); pw.print("« start"); pw.print("‹ earlier"); pw.println("bytes " +range.getMinimumLong() +"-"+range.getMaximumLong() +"/"+file.length() +" " +(int)(100*(range.getMaximumLong()/(float)file.length())) +"%"); pw.print("later ›"); pw.print("end »"); pw.println("forward"); pw.println("| reversed"); } pw.print(" + "); pw.println(lines.size()); pw.print(" -  lines
"); } /** * Construct navigational URI for given parameters. * * @param pos desired position in file * @param lines desired signed line count * @param reverse if line ordering should be displayed in reverse * @return String URI appropriate to navigate to desired view */ protected String getControlUri(long pos, int lines, boolean reverse) { Form query = new Form(); query.add("format","paged"); if(pos!=0) { query.add("pos", Long.toString(pos)); } if(lines!=128) { if(Math.abs(lines)<1) { lines = 1; } query.add("lines",Integer.toString(lines)); } if(reverse) { query.add("reverse","y"); } Reference viewRef = dirResource.getRequest().getOriginalRef().clone(); viewRef.setQuery(query.getQueryString()); return viewRef.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy