org.opencms.site.xmlsitemap.CmsXmlSitemapGenerator Maven / Gradle / Ivy
Show all versions of opencms-test Show documentation
/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (C) Alkacon Software (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.site.xmlsitemap;
import org.opencms.ade.detailpage.CmsDetailPageInfo;
import org.opencms.db.CmsAlias;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsProperty;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsRequestContext;
import org.opencms.file.CmsResource;
import org.opencms.file.CmsResourceFilter;
import org.opencms.file.CmsVfsResourceNotFoundException;
import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
import org.opencms.file.types.I_CmsResourceType;
import org.opencms.gwt.shared.alias.CmsAliasMode;
import org.opencms.jsp.CmsJspNavBuilder;
import org.opencms.jsp.CmsJspNavElement;
import org.opencms.loader.CmsLoaderException;
import org.opencms.loader.CmsResourceManager;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.relations.CmsRelation;
import org.opencms.relations.CmsRelationFilter;
import org.opencms.relations.CmsRelationType;
import org.opencms.site.CmsSite;
import org.opencms.util.CmsFileUtil;
import org.opencms.util.CmsStringUtil;
import org.opencms.util.CmsUUID;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
/**
* Class for generating XML sitemaps for SEO purposes, as described in
* http://www.sitemaps.org/protocol.html.
*/
public class CmsXmlSitemapGenerator {
/**
* A bean that consists of a sitemap URL bean and a priority score, to determine which of multiple entries with the same
* URL are to be preferred.
*/
protected class ResultEntry {
/** Internal priority to determine which of multiple entries with the same URL is used.
* Note that this has nothing to do with the priority in the URL bean itself!
*/
private int m_priority;
/** The URL bean. */
private CmsXmlSitemapUrlBean m_urlBean;
/**
* Creates a new result entry.
*
* @param urlBean the url bean
*
* @param priority the internal priority
*/
public ResultEntry(CmsXmlSitemapUrlBean urlBean, int priority) {
m_priority = priority;
m_urlBean = urlBean;
}
/**
* Gets the internal priority used to determine which of multiple entries with the same URL to use.
* This has nothing to do with the priority defined in the URL beans themselves!
*
* @return the internal priority
*/
public int getPriority() {
return m_priority;
}
/**
* Gets the URL bean.
*
* @return the URL bean
*/
public CmsXmlSitemapUrlBean getUrlBean() {
return m_urlBean;
}
}
/** The default change frequency. */
public static final String DEFAULT_CHANGE_FREQUENCY = "daily";
/** The default priority. */
public static final double DEFAULT_PRIORITY = 0.5;
/** The logger instance for this class. */
private static final Log LOG = CmsLog.getLog(CmsXmlSitemapGenerator.class);
/** The root path for the sitemap root folder. */
protected String m_baseFolderRootPath;
/** The site path of the base folder. */
protected String m_baseFolderSitePath;
/** Flag to control whether container page dates should be computed. */
protected boolean m_computeContainerPageDates;
/** The list of detail page info beans. */
protected List m_detailPageInfos = new ArrayList();
/** A map from type names to lists of potential detail resources of that type. */
protected Map> m_detailResources = new HashMap>();
/** A multimap from detail page root paths to corresponding types. */
protected Multimap m_detailTypesByPage = ArrayListMultimap.create();
/** A CMS context with guest privileges. */
protected CmsObject m_guestCms;
/** The include/exclude configuration used for choosing pages for the XML sitemap. */
protected CmsPathIncludeExcludeSet m_includeExcludeSet = new CmsPathIncludeExcludeSet();
/** A map from structure ids to page aliases below the base folder which point to the given structure id. */
protected Multimap m_pageAliasesBelowBaseFolderByStructureId = ArrayListMultimap.create();
/** The map used for storing the results, with URLs as keys. */
protected Map m_resultMap = new LinkedHashMap();
/** A guest user CMS object with the site root of the base folder. */
protected CmsObject m_siteGuestCms;
/** The site root of the base folder. */
protected String m_siteRoot;
/** A link to the site root. */
protected String m_siteRootLink;
/** Configured replacement server URL. */
private String m_serverUrl;
/**
* Creates a new sitemap generator instance.
*
* @param folderRootPath the root folder for the XML sitemap to generate
*
* @throws CmsException if something goes wrong
*/
public CmsXmlSitemapGenerator(String folderRootPath)
throws CmsException {
m_baseFolderRootPath = CmsFileUtil.removeTrailingSeparator(folderRootPath);
m_guestCms = OpenCms.initCmsObject(OpenCms.getDefaultUsers().getUserGuest());
m_siteGuestCms = OpenCms.initCmsObject(m_guestCms);
CmsSite site = OpenCms.getSiteManager().getSiteForRootPath(CmsStringUtil.joinPaths(folderRootPath, "/"));
m_siteRoot = site.getSiteRoot();
m_siteGuestCms.getRequestContext().setSiteRoot(m_siteRoot);
m_baseFolderSitePath = CmsStringUtil.joinPaths(
"/",
m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath));
}
/**
* Replaces the protocol/host/port of a link with the ones from the given server URI, if it's not empty.
*
* @param link the link to change
* @param server the server URI string
* @return the changed link
*/
public static String replaceServerUri(String link, String server) {
String serverUriStr = server;
if (CmsStringUtil.isEmptyOrWhitespaceOnly(serverUriStr)) {
return link;
}
try {
URI serverUri = new URI(serverUriStr);
URI linkUri = new URI(link);
URI result = new URI(
serverUri.getScheme(),
serverUri.getAuthority(),
linkUri.getPath(),
linkUri.getQuery(),
linkUri.getFragment());
return result.toString();
} catch (URISyntaxException e) {
LOG.error(e.getLocalizedMessage(), e);
return link;
}
}
/**
* Gets the change frequency for a sitemap entry from a list of properties.
*
* If the change frequency is not defined in the properties, this method will return null.
*
* @param properties the properties from which the change frequency should be obtained
*
* @return the change frequency string
*/
protected static String getChangeFrequency(List properties) {
CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_CHANGEFREQ, properties);
if (prop.isNullProperty()) {
return null;
}
String result = prop.getValue().trim();
return result;
}
/**
* Gets the page priority from a list of properties.
*
* If the page priority can't be found among the properties, -1 will be returned.
*
* @param properties the properties of a resource
*
* @return the page priority read from the properties, or -1
*/
protected static double getPriority(List properties) {
CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_PRIORITY, properties);
if (prop.isNullProperty()) {
return -1.0;
}
try {
double result = Double.parseDouble(prop.getValue().trim());
return result;
} catch (NumberFormatException e) {
return -1.0;
}
}
/**
* Removes files marked as internal from a resource list.
*
* @param resources the list which should be replaced
*/
protected static void removeInternalFiles(List resources) {
Iterator iter = resources.iterator();
while (iter.hasNext()) {
CmsResource resource = iter.next();
if (resource.isInternal()) {
iter.remove();
}
}
}
/**
* Generates a list of XML sitemap entry beans for the root folder which has been set in the constructor.
*
* @return the list of XML sitemap entries
*
* @throws CmsException if something goes wrong
*/
public List generateSitemapBeans() throws CmsException {
String baseSitePath = m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath);
initializeFileData(baseSitePath);
for (CmsResource resource : getDirectPages()) {
String sitePath = m_siteGuestCms.getSitePath(resource);
List propertyList = m_siteGuestCms.readPropertyObjects(resource, true);
String onlineLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, sitePath);
boolean isContainerPage = CmsResourceTypeXmlContainerPage.isContainerPage(resource);
long dateModified = resource.getDateLastModified();
if (isContainerPage) {
if (m_computeContainerPageDates) {
dateModified = computeContainerPageModificationDate(resource);
} else {
dateModified = -1;
}
}
CmsXmlSitemapUrlBean urlBean = new CmsXmlSitemapUrlBean(
replaceServerUri(onlineLink),
dateModified,
getChangeFrequency(propertyList),
getPriority(propertyList));
urlBean.setOriginalResource(resource);
addResult(urlBean, 3);
if (isContainerPage) {
Locale locale = getLocale(resource, propertyList);
addDetailLinks(resource, locale);
}
}
for (CmsUUID aliasStructureId : m_pageAliasesBelowBaseFolderByStructureId.keySet()) {
addAliasLinks(aliasStructureId);
}
List result = new ArrayList();
for (ResultEntry resultEntry : m_resultMap.values()) {
result.add(resultEntry.getUrlBean());
}
return result;
}
/**
* Gets the include/exclude configuration of this XML sitemap generator.
*
* @return the include/exclude configuration
*/
public CmsPathIncludeExcludeSet getIncludeExcludeSet() {
return m_includeExcludeSet;
}
/**
* Generates a sitemap and formats it as a string.
*
* @return the sitemap XML data
*
* @throws CmsException if something goes wrong
*/
public String renderSitemap() throws CmsException {
StringBuffer buffer = new StringBuffer();
List urlBeans = generateSitemapBeans();
buffer.append("\n");
buffer.append(getUrlSetOpenTag() + "\n");
for (CmsXmlSitemapUrlBean bean : urlBeans) {
buffer.append(getXmlForEntry(bean));
buffer.append("\n");
}
buffer.append("");
return buffer.toString();
}
/**
* Enables or disables computation of container page dates.
*
* @param computeContainerPageDates the new value
*/
public void setComputeContainerPageDates(boolean computeContainerPageDates) {
m_computeContainerPageDates = computeContainerPageDates;
}
/**
* Sets the replacement server URL.
*
* The replacement server URL will replace the scheme/host/port from the URLs returned by getOnlineLink.
*
* @param serverUrl the server URL
*/
public void setServerUrl(String serverUrl) {
m_serverUrl = serverUrl;
}
/**
* Adds the detail page links for a given page to the results.
*
* @param containerPage the container page resource
* @param locale the locale of the container page
*
* @throws CmsException if something goes wrong
*/
protected void addDetailLinks(CmsResource containerPage, Locale locale) throws CmsException {
List types = getDetailTypesForPage(containerPage);
for (I_CmsResourceType type : types) {
List resourcesForType = getDetailResources(type);
for (CmsResource detailRes : resourcesForType) {
if (!isValidDetailPageCombination(containerPage, locale, detailRes)) {
continue;
}
List detailProps = m_guestCms.readPropertyObjects(detailRes, true);
String detailLink = getDetailLink(containerPage, detailRes, locale);
detailLink = CmsFileUtil.removeTrailingSeparator(detailLink);
CmsXmlSitemapUrlBean detailUrlBean = new CmsXmlSitemapUrlBean(
replaceServerUri(detailLink),
detailRes.getDateLastModified(),
getChangeFrequency(detailProps),
getPriority(detailProps));
detailUrlBean.setOriginalResource(detailRes);
detailUrlBean.setDetailPageResource(containerPage);
addResult(detailUrlBean, 2);
}
}
}
/**
* Adds an URL bean to the internal map of results, but only if there is no existing entry with higher internal priority
* than the priority given as an argument.
*
* @param result the result URL bean to add
*
* @param resultPriority the internal priority to use for updating the map of results
*/
protected void addResult(CmsXmlSitemapUrlBean result, int resultPriority) {
String url = CmsFileUtil.removeTrailingSeparator(result.getUrl());
boolean writeEntry = true;
if (m_resultMap.containsKey(url)) {
LOG.warn("Encountered duplicate URL with while generating sitemap: " + result.getUrl());
ResultEntry entry = m_resultMap.get(url);
writeEntry = entry.getPriority() <= resultPriority;
}
if (writeEntry) {
m_resultMap.put(url, new ResultEntry(result, resultPriority));
}
}
/**
* Computes the container the container page modification date from its referenced contents.
*
* @param containerPage the container page
*
* @return the computed modification date
*
* @throws CmsException if something goes wrong
*/
protected long computeContainerPageModificationDate(CmsResource containerPage) throws CmsException {
CmsRelationFilter filter = CmsRelationFilter.relationsFromStructureId(
containerPage.getStructureId()).filterType(CmsRelationType.XML_STRONG);
List relations = m_guestCms.readRelations(filter);
long result = containerPage.getDateLastModified();
for (CmsRelation relation : relations) {
try {
CmsResource target = relation.getTarget(
m_guestCms,
CmsResourceFilter.DEFAULT_FILES.addRequireVisible());
long targetDate = target.getDateLastModified();
if (targetDate > result) {
result = targetDate;
}
} catch (CmsException e) {
LOG.warn(
"Could not get relation target for relation "
+ relation.toString()
+ " | "
+ e.getLocalizedMessage(),
e);
}
}
return result;
}
/**
* Gets the detail link for a given container page and detail content.
*
* @param pageRes the container page
* @param detailRes the detail content
* @param locale the locale for which we want the link
*
* @return the detail page link
*/
protected String getDetailLink(CmsResource pageRes, CmsResource detailRes, Locale locale) {
String pageSitePath = m_siteGuestCms.getSitePath(pageRes);
String detailSitePath = m_siteGuestCms.getSitePath(detailRes);
CmsRequestContext requestContext = m_siteGuestCms.getRequestContext();
String originalUri = requestContext.getUri();
Locale originalLocale = requestContext.getLocale();
try {
requestContext.setUri(pageSitePath);
requestContext.setLocale(locale);
return OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, detailSitePath, true);
} finally {
requestContext.setUri(originalUri);
requestContext.setLocale(originalLocale);
}
}
/**
* Gets the types for which a given resource is configured as a detail page.
*
* @param resource a resource for which we want to find the detail page types
*
* @return the list of resource types for which the given page is configured as a detail page
*/
protected List getDetailTypesForPage(CmsResource resource) {
Collection typesForPage = m_detailTypesByPage.get(resource.getRootPath());
String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath()));
Collection typesForFolder = m_detailTypesByPage.get(parentPath);
Set allTypes = new HashSet();
allTypes.addAll(typesForPage);
allTypes.addAll(typesForFolder);
List resTypes = new ArrayList();
CmsResourceManager resMan = OpenCms.getResourceManager();
for (String typeName : allTypes) {
try {
I_CmsResourceType resType = resMan.getResourceType(typeName);
resTypes.add(resType);
} catch (CmsLoaderException e) {
LOG.warn("Invalid resource type name" + typeName + "! " + e.getLocalizedMessage(), e);
}
}
return resTypes;
}
/**
* Gets the list of pages which should be directly added to the XML sitemap.
*
* @return the list of resources which should be directly added to the XML sitemap
*
* @throws CmsException if something goes wrong
*/
protected List getDirectPages() throws CmsException {
List result = new ArrayList();
result.addAll(getNavigationPages());
Set includeRoots = m_includeExcludeSet.getIncludeRoots();
for (String includeRoot : includeRoots) {
try {
CmsResource resource = m_guestCms.readResource(includeRoot);
if (resource.isFile()) {
result.add(resource);
} else {
List subtreeFiles = m_guestCms.readResources(
includeRoot,
CmsResourceFilter.DEFAULT_FILES,
true);
result.addAll(subtreeFiles);
}
} catch (CmsVfsResourceNotFoundException e) {
LOG.warn("Could not read include resource: " + includeRoot);
}
}
Iterator filterIter = result.iterator();
while (filterIter.hasNext()) {
CmsResource currentResource = filterIter.next();
if (currentResource.isInternal() || m_includeExcludeSet.isExcluded(currentResource.getRootPath())) {
filterIter.remove();
}
}
return result;
}
/**
* Writes the inner node content for an url element to a buffer.
*
* @param entry the entry for which the content should be written
* @return the inner XML
*/
protected String getInnerXmlForEntry(CmsXmlSitemapUrlBean entry) {
StringBuffer buffer = new StringBuffer();
entry.writeElement(buffer, "loc", entry.getUrl());
entry.writeLastmod(buffer);
entry.writeChangefreq(buffer);
entry.writePriority(buffer);
return buffer.toString();
}
/**
* Gets the list of pages from the navigation which should be directly added to the XML sitemap.
*
* @return the list of pages to add to the XML sitemap
*/
protected List getNavigationPages() {
List result = new ArrayList();
CmsJspNavBuilder navBuilder = new CmsJspNavBuilder(m_siteGuestCms);
try {
CmsResource rootDefaultFile = m_siteGuestCms.readDefaultFile(
m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath),
CmsResourceFilter.DEFAULT);
if (rootDefaultFile != null) {
result.add(rootDefaultFile);
}
} catch (Exception e) {
LOG.info(e.getLocalizedMessage(), e);
}
List navElements = navBuilder.getSiteNavigation(m_baseFolderSitePath, -1);
for (CmsJspNavElement navElement : navElements) {
CmsResource navResource = navElement.getResource();
if (navResource.isFolder()) {
try {
CmsResource defaultFile = m_guestCms.readDefaultFile(navResource, CmsResourceFilter.DEFAULT_FILES);
if (defaultFile != null) {
result.add(defaultFile);
} else {
LOG.warn("Could not get default file for " + navResource.getRootPath());
}
} catch (CmsException e) {
LOG.warn("Could not get default file for " + navResource.getRootPath());
}
} else {
result.add(navResource);
}
}
return result;
}
/**
* Gets the opening tag for the urlset element (can be overridden to add e.g. more namespaces.
*
* @return the opening tag
*/
protected String getUrlSetOpenTag() {
return "";
}
/**
* Writes the XML for an URL entry to a buffer.
*
* @param entry the XML sitemap entry bean
*
* @return an XML representation of this bean
*/
protected String getXmlForEntry(CmsXmlSitemapUrlBean entry) {
StringBuffer buffer = new StringBuffer();
buffer.append("");
buffer.append(getInnerXmlForEntry(entry));
buffer.append(" ");
return buffer.toString();
}
/**
* Checks whether the given alias is below the base folder.
*
* @param alias the alias to check
*
* @return true if the alias is below the base folder
*/
protected boolean isAliasBelowBaseFolder(CmsAlias alias) {
boolean isBelowBaseFolder = CmsStringUtil.isPrefixPath(m_baseFolderSitePath, alias.getAliasPath());
return isBelowBaseFolder;
}
/**
* Replaces the protocol/host/port of a link with the ones from the configured server URI, if it's not empty.
*
* @param link the link to change
*
* @return the changed link
*/
protected String replaceServerUri(String link) {
return replaceServerUri(link, m_serverUrl);
}
/**
* Adds the alias links for a given structure id to the results.
*
* @param aliasStructureId the alias target structure id
*/
private void addAliasLinks(CmsUUID aliasStructureId) {
try {
CmsResource aliasTarget = m_guestCms.readResource(aliasStructureId);
List properties = m_guestCms.readPropertyObjects(aliasTarget, true);
double priority = getPriority(properties);
String changeFrequency = getChangeFrequency(properties);
Collection aliases = m_pageAliasesBelowBaseFolderByStructureId.get(aliasStructureId);
for (CmsAlias alias : aliases) {
String aliasLink = (m_siteRootLink + "/" + alias.getAliasPath()).replaceAll("(?
*
* @param type the type to filter by
*
* @return the list of resources with the given type
*
* @throws CmsException if something goes wrong
*/
private List getDetailResources(I_CmsResourceType type) throws CmsException {
String typeName = type.getTypeName();
if (!m_detailResources.containsKey(typeName)) {
List result = new ArrayList();
CmsResourceFilter filter = CmsResourceFilter.DEFAULT_FILES.addRequireType(type);
List siteFiles = m_guestCms.readResources(m_siteRoot, filter, true);
result.addAll(siteFiles);
String shared = CmsFileUtil.removeTrailingSeparator(OpenCms.getSiteManager().getSharedFolder());
if (shared != null) {
List sharedFiles = m_guestCms.readResources(shared, filter, true);
result.addAll(sharedFiles);
}
m_detailResources.put(typeName, result);
}
return m_detailResources.get(typeName);
}
/**
* Gets the locale to use for the given resource.
*
* @param resource the resource
* @param propertyList the properties of the resource
*
* @return the locale to use for the given resource
*/
private Locale getLocale(CmsResource resource, List propertyList) {
return OpenCms.getLocaleManager().getDefaultLocale(m_guestCms, m_guestCms.getSitePath(resource));
}
/**
* Reads the data necessary for building the sitemap from the VFS and initializes the internal data structures.
*
* @param baseSitePath the base site path
*
* @throws CmsException if something goes wrong
*/
private void initializeFileData(String baseSitePath) throws CmsException {
m_resultMap.clear();
m_siteRootLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, "/");
m_siteRootLink = CmsFileUtil.removeTrailingSeparator(m_siteRootLink);
m_detailPageInfos = OpenCms.getADEManager().getAllDetailPages(m_guestCms);
for (CmsDetailPageInfo detailPageInfo : m_detailPageInfos) {
String type = detailPageInfo.getType();
String path = detailPageInfo.getUri();
path = CmsFileUtil.removeTrailingSeparator(path);
m_detailTypesByPage.put(path, type);
}
List siteAliases = OpenCms.getAliasManager().getAliasesForSite(
m_siteGuestCms,
m_siteGuestCms.getRequestContext().getSiteRoot());
for (CmsAlias alias : siteAliases) {
if (isAliasBelowBaseFolder(alias) && (alias.getMode() == CmsAliasMode.page)) {
CmsUUID aliasId = alias.getStructureId();
m_pageAliasesBelowBaseFolderByStructureId.put(aliasId, alias);
}
}
}
/**
* Checks whether the page/detail content combination is a valid detail page.
*
* @param page the container page
* @param locale the locale
* @param detailRes the detail content resource
*
* @return true if this is a valid detail page combination
*/
private boolean isValidDetailPageCombination(CmsResource page, Locale locale, CmsResource detailRes) {
return true;
}
}