All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.redfin.sitemapgenerator.SitemapGenerator Maven / Gradle / Ivy

There is a newer version: 1.1.2
Show newest version
package com.redfin.sitemapgenerator;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;

import org.xml.sax.SAXException;

abstract class SitemapGenerator> {
	/** 50000 URLs per sitemap maximum */
	public static final int MAX_URLS_PER_SITEMAP = 50000;
	
	private final URL baseUrl;
	private final File baseDir;
	private final String fileNamePrefix;
	private final String fileNameSuffix;
	private final boolean allowMultipleSitemaps;
	private final ArrayList urls = new ArrayList();
	private final W3CDateFormat dateFormat;
	private final int maxUrls;
	private final boolean autoValidate;
	private final boolean gzip;
	private final ISitemapUrlRenderer renderer;
	private int mapCount = 0;
	private boolean finished = false;
	
	private final ArrayList outFiles = new ArrayList();
	
	public SitemapGenerator(AbstractSitemapGeneratorOptions options, ISitemapUrlRenderer renderer) {
		baseDir = options.baseDir;
		baseUrl = options.baseUrl;
		fileNamePrefix = options.fileNamePrefix;
		W3CDateFormat dateFormat = options.dateFormat;
		if (dateFormat == null) dateFormat = new W3CDateFormat();
		this.dateFormat = dateFormat;
		allowMultipleSitemaps = options.allowMultipleSitemaps;
		maxUrls = options.maxUrls;
		autoValidate = options.autoValidate;
		gzip = options.gzip;
		this.renderer = renderer;

		if(options.suffixStringPattern != null && !options.suffixStringPattern.isEmpty()) {
			fileNameSuffix = gzip ? options.suffixStringPattern + ".xml.gz" : options.suffixStringPattern + ".xml";
		}
		else {
			fileNameSuffix = gzip ? ".xml.gz" : ".xml";
		}
	}

	/** Add one URL of the appropriate type to this sitemap.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or else write out one sitemap immediately.
	 * @param url the URL to add to this sitemap
	 * @return this
	 */
	public THIS addUrl(U url) {
		if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps"); 
		UrlUtils.checkUrl(url.getUrl(), baseUrl);
		if (urls.size() == maxUrls) {
			if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false.  Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
			if (baseDir != null) {
				if (mapCount == 0) mapCount++;
				writeSiteMap();
				mapCount++;
				urls.clear();
			}
		}
		urls.add(url);
		return getThis();
	}
	
	/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or write out one sitemap immediately.
	 * @param urls the URLs to add to this sitemap
	 * @return this
	 */
	public THIS addUrls(Iterable urls) {
		for (U url : urls) addUrl(url);
		return getThis();
	}
	
	/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or write out one sitemap immediately.
	 * @param urls the URLs to add to this sitemap
	 * @return this
	 */
	public THIS addUrls(U... urls) {
		for (U url : urls) addUrl(url);
		return getThis();
	}
	
	/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or write out one sitemap immediately.
	 * @param urls the URLs to add to this sitemap
	 * @return this
	 * @throws MalformedURLException
	 */
	public THIS addUrls(String... urls) throws MalformedURLException {
		for (String url : urls) addUrl(url);
		return getThis();
	}
	
	/** Add one URL of the appropriate type to this sitemap.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or else write out one sitemap immediately.
	 * @param url the URL to add to this sitemap
	 * @return this
	 * @throws MalformedURLException
	 */
	public THIS addUrl(String url) throws MalformedURLException {
		U sitemapUrl;
		try {
			sitemapUrl = renderer.getUrlClass().getConstructor(String.class).newInstance(url);
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		return addUrl(sitemapUrl);
	}
	
	/** Add multiple URLs of the appropriate type to this sitemap, one at a time.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or write out one sitemap immediately.
	 * @param urls the URLs to add to this sitemap
	 * @return this
	 */
	public THIS addUrls(URL... urls) {
		for (URL url : urls) addUrl(url);
		return getThis();
	}
	
	/** Add one URL of the appropriate type to this sitemap.
	 * If we have reached the maximum number of URLs, we'll throw an exception if {@link #allowMultipleSitemaps} is false,
	 * or write out one sitemap immediately.
	 * @param url the URL to add to this sitemap
	 * @return this
	 */
	public THIS addUrl(URL url) {
		U sitemapUrl;
		try {
			sitemapUrl = renderer.getUrlClass().getConstructor(URL.class).newInstance(url);
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		return addUrl(sitemapUrl);
	}
	
	@SuppressWarnings("unchecked")
	THIS getThis() {
		return (THIS)this;
	}
	
	/** Write out remaining URLs; this method can only be called once.  This is necessary so we can keep an accurate count for {@link #writeSitemapsWithIndex()}.
	 * 
	 * @return a list of files we wrote out to disk
	 */
	public List write() {
		if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps");
		if (urls.size() == 0 && mapCount == 0) throw new RuntimeException("No URLs added, sitemap would be empty; you must add some URLs with addUrls");
		writeSiteMap();
		finished = true;
		return outFiles;
	}
	
	/**
	 * Writes out the sitemaps as a list of strings.
	 * Each string in the list is a formatted list of URLs.
	 * We return a list because the URLs may not all fit --
	 * google specifies a maximum of 50,000 URLs in one sitemap.
	 * @return a list of XML-formatted strings
	 */
	public List writeAsStrings() {
		List listOfSiteMapStrings = new ArrayList();
		for (int start = 0; start < urls.size(); start += maxUrls) {
			int end = start + maxUrls;
			if (end > urls.size()) {
				end = urls.size();
			}
			StringBuilder sb = new StringBuilder();
			writeSiteMapAsString(sb, urls.subList(start, end));
			listOfSiteMapStrings.add(sb.toString());
		}
		return listOfSiteMapStrings;
	}
	
	private void writeSiteMapAsString(StringBuilder sb, List urls) {
		sb.append("\n");
		sb.append("\n");
		for (U url : urls) {
			renderer.render(url, sb, dateFormat);
		}
		sb.append("");
	}
	
	/** After you've called {@link #write()}, call this to generate a sitemap index of all sitemaps you generated.  
	 * 
	 */
	public void writeSitemapsWithIndex() {
		if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
		File outFile = new File(baseDir, "sitemap_index.xml");
		SitemapIndexGenerator sig;		
		sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();		
		sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
	}
	
	private void writeSiteMap() {
		if (baseDir == null) {
			throw new NullPointerException("To write to files, baseDir must not be null");
		}
		if (urls.size() == 0) return;
		String fileNamePrefix;
		if (mapCount > 0) {
			fileNamePrefix = this.fileNamePrefix + mapCount;
		} else {
			fileNamePrefix = this.fileNamePrefix;
		}
		File outFile = new File(baseDir, fileNamePrefix+fileNameSuffix);
		outFiles.add(outFile);
		try {
			OutputStreamWriter out;
			if (gzip) {
				FileOutputStream fileStream = new FileOutputStream(outFile);
				GZIPOutputStream gzipStream = new GZIPOutputStream(fileStream);
				out = new OutputStreamWriter(gzipStream, Charset.forName("UTF-8").newEncoder());
			} else {
				out = new OutputStreamWriter(new FileOutputStream(outFile), Charset.forName("UTF-8").newEncoder());
			}
			
			writeSiteMap(out);
			if (autoValidate) SitemapValidator.validateWebSitemap(outFile);
		} catch (IOException e) {
			throw new RuntimeException("Problem writing sitemap file " + outFile, e);
		} catch (SAXException e) {
			throw new RuntimeException("Sitemap file failed to validate (bug?)", e);
		}
	}
	
	private void writeSiteMap(OutputStreamWriter out) throws IOException {
		StringBuilder sb = new StringBuilder();
		writeSiteMapAsString(sb, urls);
		out.write(sb.toString());
		out.close();
	}
	
}