All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.phloc.web.sitemap.XMLSitemapIndex Maven / Gradle / Ivy

There is a newer version: 6.0.0
Show newest version
/**
 * Copyright (C) 2006-2015 phloc systems
 * http://www.phloc.com
 * office[at]phloc[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.phloc.web.sitemap;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;

import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.NotThreadSafe;

import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.phloc.commons.ValueEnforcer;
import com.phloc.commons.annotations.Nonempty;
import com.phloc.commons.annotations.ReturnsMutableCopy;
import com.phloc.commons.collections.ContainerHelper;
import com.phloc.commons.hash.HashCodeGenerator;
import com.phloc.commons.io.file.FileUtils;
import com.phloc.commons.io.file.SimpleFileIO;
import com.phloc.commons.io.streams.StreamUtils;
import com.phloc.commons.microdom.IMicroDocument;
import com.phloc.commons.microdom.IMicroElement;
import com.phloc.commons.microdom.impl.MicroDocument;
import com.phloc.commons.microdom.serialize.MicroWriter;
import com.phloc.commons.state.ESuccess;
import com.phloc.commons.string.ToStringGenerator;
import com.phloc.commons.xml.serialize.IXMLWriterSettings;
import com.phloc.commons.xml.serialize.XMLWriterSettings;
import com.phloc.web.CWebCharset;
import com.phloc.web.datetime.PDTWebDateUtils;
import com.phloc.web.servlet.server.StaticServerInfo;

/**
 * Contains a set of {@link XMLSitemapURLSet} objects. Necessary to group
 * multiple sitemaps when the number of URLs or the total size of a single URL
 * set is exceeded.
 * 
 * @author Philip Helger
 */
@NotThreadSafe
public final class XMLSitemapIndex implements Serializable
{
  public static final boolean DEFAULT_USE_GZIP = true;
  private static final String ELEMENT_SITEMAPINDEX = "sitemapindex";
  private static final String ELEMENT_SITEMAP = "sitemap";
  private static final String ELEMENT_LOC = "loc";
  private static final String ELEMENT_LASTMOD = "lastmod";
  private static final Logger s_aLogger = LoggerFactory.getLogger (XMLSitemapIndex.class);

  private final List  m_aURLSets = new ArrayList  ();
  private final boolean m_bUseGZip;

  /**
   * Constructor using GZip output by default
   */
  public XMLSitemapIndex ()
  {
    this (DEFAULT_USE_GZIP);
  }

  /**
   * Constructor
   * 
   * @param bUseGZip
   *        If true all contained URL sets are written to disk
   *        using the GZip algorithm
   */
  public XMLSitemapIndex (final boolean bUseGZip)
  {
    m_bUseGZip = bUseGZip;
  }

  public boolean isUseGZip ()
  {
    return m_bUseGZip;
  }

  @Nonnegative
  public int getURLSetCount ()
  {
    return m_aURLSets.size ();
  }

  @Nonnull
  @ReturnsMutableCopy
  public List  getAllURLSets ()
  {
    return ContainerHelper.newList (m_aURLSets);
  }

  public void addURLSet (@Nonnull final XMLSitemapURLSet aURLSet)
  {
    ValueEnforcer.notNull (aURLSet, "URLset");

    if (aURLSet.isMultiFileSitemap ())
    {
      // Split into several smaller URL sets
      final int nEntries = aURLSet.getURLCount ();
      XMLSitemapURLSet aNewURLSet = new XMLSitemapURLSet ();
      for (int i = 0; i < nEntries; ++i)
      {
        final XMLSitemapURL aCurrentURL = aURLSet.getURL (i);
        aNewURLSet.addURL (aCurrentURL);
        if (aNewURLSet.isMultiFileSitemap ())
        {
          // We do have an overflow -> remove the last item and add the URL set
          aNewURLSet.removeLastURL ();
          m_aURLSets.add (aNewURLSet);

          // start with a new URL set containing the last overflow-creating
          // entry
          aNewURLSet = new XMLSitemapURLSet ();
          aNewURLSet.addURL (aCurrentURL);
        }
      }

      // Append last URL - always contains something!
      m_aURLSets.add (aNewURLSet);
    }
    else
      m_aURLSets.add (aURLSet);
  }

  @Nonnull
  @Nonempty
  public static String getSitemapFilename (@Nonnegative final int nIndex, final boolean bUseGZip)
  {
    return "sitemap" + nIndex + ".xml" + (bUseGZip ? ".gz" : "");
  }

  /**
   * Get the name of the sitemap file at the specified index
   * 
   * @param nIndex
   *        The index to be used. Should be ge; 0.
   * @return The name of the sitemap file. Neither null nor empty.
   * @see #getSitemapFilename(int, boolean)
   */
  @Nonnull
  @Nonempty
  public String getSitemapFilename (@Nonnegative final int nIndex)
  {
    return getSitemapFilename (nIndex, m_bUseGZip);
  }

  @Nonnull
  public IMicroDocument getAsDocument ()
  {
    final String sNamespaceURL = CXMLSitemap.XML_NAMESPACE_0_9;
    final IMicroDocument ret = new MicroDocument ();
    final IMicroElement eSitemapindex = ret.appendElement (sNamespaceURL, ELEMENT_SITEMAPINDEX);
    int nIndex = 0;
    for (final XMLSitemapURLSet aURLSet : m_aURLSets)
    {
      final IMicroElement eSitemap = eSitemapindex.appendElement (sNamespaceURL, ELEMENT_SITEMAP);

      // The location of the sub-sitemaps must be prefixed with the full server
      // and context path
      eSitemap.appendElement (sNamespaceURL, ELEMENT_LOC).appendText (StaticServerInfo.getInstance ()
                                                                                      .getFullContextPath () +
                                                                      "/" +
                                                                      getSitemapFilename (nIndex));

      final DateTime aLastModification = aURLSet.getLastModificationDateTime ();
      if (aLastModification != null)
        eSitemap.appendElement (sNamespaceURL, ELEMENT_LASTMOD)
                .appendText (PDTWebDateUtils.getAsStringXSD (aLastModification));
      ++nIndex;
    }
    return ret;
  }

  @Nonnull
  protected IXMLWriterSettings getXMLWriterSettings ()
  {
    // Important: No indent and align, because otherwise the calculated output
    // length would not be suitable
    return CXMLSitemap.XML_WRITER_SETTINGS;
  }

  @Nonnull
  public String getAsXMLString ()
  {
    return MicroWriter.getNodeAsString (getAsDocument (), getXMLWriterSettings ());
  }

  @Nonnull
  private OutputStream _createOutputStream (@Nonnull final File aFile)
  {
    OutputStream aOS = FileUtils.getOutputStream (aFile);
    if (m_bUseGZip)
      try
      {
        aOS = new GZIPOutputStream (aOS);
      }
      catch (final IOException ex)
      {
        throw new IllegalStateException ("Failed to create GZip OutputStream for " + aFile, ex);
      }
    return aOS;
  }

  @Nonnull
  public ESuccess writeToDisk (@Nonnull final File aBaseDir)
  {
    ValueEnforcer.notNull (aBaseDir, "Basedir");
    if (!FileUtils.existsDir (aBaseDir))
      throw new IllegalArgumentException ("The passed directory does not exist: " + aBaseDir);

    if (m_aURLSets.isEmpty ())
    {
      s_aLogger.warn ("No URL sets contained - not doing anything!");
      return ESuccess.FAILURE;
    }

    // Write base file
    if (SimpleFileIO.writeFile (new File (aBaseDir, CXMLSitemap.SITEMAP_ENTRY_FILENAME),
                                getAsXMLString (),
                                CWebCharset.CHARSET_XML_OBJ).isFailure ())
    {
      s_aLogger.error ("Failed to write sitemap.xml file!");
      return ESuccess.FAILURE;
    }

    // Write all URL sets
    int nIndex = 0;
    for (final XMLSitemapURLSet aURLSet : m_aURLSets)
    {
      final String sFilename = getSitemapFilename (nIndex);
      final File aFile = new File (aBaseDir, sFilename);
      final OutputStream aOS = _createOutputStream (aFile);
      if (StreamUtils.writeStream (aOS, aURLSet.getAsXMLString (), XMLWriterSettings.DEFAULT_XML_CHARSET_OBJ)
                     .isFailure ())
      {
        s_aLogger.error ("Failed to write single sitemap file " + aFile);
        return ESuccess.FAILURE;
      }
      nIndex++;
    }

    return ESuccess.SUCCESS;
  }

  @Override
  public boolean equals (final Object o)
  {
    if (o == this)
      return true;
    if (!(o instanceof XMLSitemapIndex))
      return false;
    final XMLSitemapIndex rhs = (XMLSitemapIndex) o;
    return m_aURLSets.equals (rhs.m_aURLSets) && m_bUseGZip == rhs.m_bUseGZip;
  }

  @Override
  public int hashCode ()
  {
    // Don't compare the other fields as they are calculated
    return new HashCodeGenerator (this).append (m_aURLSets).append (m_bUseGZip).getHashCode ();
  }

  @Override
  public String toString ()
  {
    return new ToStringGenerator (this).append ("URLSets", m_aURLSets).append ("useGZip", m_bUseGZip).toString ();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy