All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.app.sitemap.SitemapsOrgGenerator Maven / Gradle / Ivy

There is a newer version: 8.0
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.app.sitemap;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * Class for generating Sitemaps to improve
 * search engine coverage of the DSpace site and limit the server load caused by
 * crawlers.
 *
 * @author Robert Tansley
 * @author Stuart Lewis
 */
public class SitemapsOrgGenerator extends AbstractGenerator {
    /**
     * Stem of URLs sitemaps will eventually appear at
     */
    protected String indexURLStem;

    /**
     * Tail of URLs sitemaps will eventually appear at
     */
    protected String indexURLTail;

    /**
     * The correct date format
     */
    protected DateFormat w3dtfFormat = new SimpleDateFormat(
        "yyyy-MM-dd'T'HH:mm:ss'Z'");

    /**
     * Construct a sitemaps.org protocol sitemap generator, writing files to the
     * given directory, and with the sitemaps eventually exposed at starting
     * with the given URL stem and tail.
     *
     * @param outputDirIn Directory to write sitemap files to
     * @param urlStem     start of URL that sitemap files will appear at, e.g.
     *                    {@code http://dspace.myu.edu/sitemap?sitemap=}
     * @param urlTail     end of URL that sitemap files will appear at, e.g.
     *                    {@code .html} or {@code null}
     */
    public SitemapsOrgGenerator(File outputDirIn, String urlStem, String urlTail) {
        super(outputDirIn);

        indexURLStem = urlStem;
        indexURLTail = (urlTail == null ? "" : urlTail);
    }

    @Override
    public String getFilename(int number) {
        return "sitemap" + number + ".xml";
    }

    @Override
    public String getLeadingBoilerPlate() {
        return "\n"
            + "";
    }

    @Override
    public int getMaxSize() {
        // 10 Mb
        return 10485760;
    }

    @Override
    public int getMaxURLs() {
        return 50000;
    }

    @Override
    public String getTrailingBoilerPlate() {
        return "";
    }

    @Override
    public String getURLText(String url, Date lastMod) {
        StringBuilder urlText = new StringBuilder();

        urlText.append("").append(url).append("");
        if (lastMod != null) {
            urlText.append("").append(w3dtfFormat.format(lastMod))
                   .append("");
        }
        urlText.append("\n");

        return urlText.toString();
    }

    @Override
    public boolean useCompression() {
        return false;
    }

    @Override
    public String getIndexFilename() {
        return "sitemap_index.xml";
    }

    @Override
    public void writeIndex(PrintStream output, int sitemapCount)
        throws IOException {
        String now = w3dtfFormat.format(new Date());

        output.println("\n");
        output
            .println("");

        for (int i = 0; i < sitemapCount; i++) {
            output.print("" + indexURLStem + i + indexURLTail
                             + "");
            output.print("" + now + "\n");
        }

        output.println("");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy