All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.site.xmlsitemap.CmsXmlSitemapGenerator Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (C) Alkacon Software (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.site.xmlsitemap;

import org.opencms.ade.detailpage.CmsDetailPageInfo;
import org.opencms.db.CmsAlias;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsProperty;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsRequestContext;
import org.opencms.file.CmsResource;
import org.opencms.file.CmsResourceFilter;
import org.opencms.file.CmsVfsResourceNotFoundException;
import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
import org.opencms.file.types.I_CmsResourceType;
import org.opencms.gwt.shared.alias.CmsAliasMode;
import org.opencms.jsp.CmsJspNavBuilder;
import org.opencms.jsp.CmsJspNavElement;
import org.opencms.loader.CmsLoaderException;
import org.opencms.loader.CmsResourceManager;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.relations.CmsRelation;
import org.opencms.relations.CmsRelationFilter;
import org.opencms.relations.CmsRelationType;
import org.opencms.site.CmsSite;
import org.opencms.util.CmsFileUtil;
import org.opencms.util.CmsStringUtil;
import org.opencms.util.CmsUUID;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;

/**
 * Class for generating XML sitemaps for SEO purposes, as described in
 * http://www.sitemaps.org/protocol.html.

*/ public class CmsXmlSitemapGenerator { /** * A bean that consists of a sitemap URL bean and a priority score, to determine which of multiple entries with the same * URL are to be preferred.

*/ protected class ResultEntry { /** Internal priority to determine which of multiple entries with the same URL is used. * Note that this has nothing to do with the priority in the URL bean itself! */ private int m_priority; /** The URL bean. */ private CmsXmlSitemapUrlBean m_urlBean; /** * Creates a new result entry.

* * @param urlBean the url bean * * @param priority the internal priority */ public ResultEntry(CmsXmlSitemapUrlBean urlBean, int priority) { m_priority = priority; m_urlBean = urlBean; } /** * Gets the internal priority used to determine which of multiple entries with the same URL to use.

* This has nothing to do with the priority defined in the URL beans themselves! * * @return the internal priority */ public int getPriority() { return m_priority; } /** * Gets the URL bean.

* * @return the URL bean */ public CmsXmlSitemapUrlBean getUrlBean() { return m_urlBean; } } /** The default change frequency. */ public static final String DEFAULT_CHANGE_FREQUENCY = "daily"; /** The default priority. */ public static final double DEFAULT_PRIORITY = 0.5; /** The logger instance for this class. */ private static final Log LOG = CmsLog.getLog(CmsXmlSitemapGenerator.class); /** The root path for the sitemap root folder. */ protected String m_baseFolderRootPath; /** The site path of the base folder. */ protected String m_baseFolderSitePath; /** Flag to control whether container page dates should be computed. */ protected boolean m_computeContainerPageDates; /** The list of detail page info beans. */ protected List m_detailPageInfos = new ArrayList(); /** A map from type names to lists of potential detail resources of that type. */ protected Map> m_detailResources = new HashMap>(); /** A multimap from detail page root paths to corresponding types. */ protected Multimap m_detailTypesByPage = ArrayListMultimap.create(); /** A CMS context with guest privileges. */ protected CmsObject m_guestCms; /** The include/exclude configuration used for choosing pages for the XML sitemap. */ protected CmsPathIncludeExcludeSet m_includeExcludeSet = new CmsPathIncludeExcludeSet(); /** A map from structure ids to page aliases below the base folder which point to the given structure id. */ protected Multimap m_pageAliasesBelowBaseFolderByStructureId = ArrayListMultimap.create(); /** The map used for storing the results, with URLs as keys. */ protected Map m_resultMap = new LinkedHashMap(); /** A guest user CMS object with the site root of the base folder. */ protected CmsObject m_siteGuestCms; /** The site root of the base folder. */ protected String m_siteRoot; /** A link to the site root. */ protected String m_siteRootLink; /** Configured replacement server URL. */ private String m_serverUrl; /** * Creates a new sitemap generator instance.

* * @param folderRootPath the root folder for the XML sitemap to generate * * @throws CmsException if something goes wrong */ public CmsXmlSitemapGenerator(String folderRootPath) throws CmsException { m_baseFolderRootPath = CmsFileUtil.removeTrailingSeparator(folderRootPath); m_guestCms = OpenCms.initCmsObject(OpenCms.getDefaultUsers().getUserGuest()); m_siteGuestCms = OpenCms.initCmsObject(m_guestCms); CmsSite site = OpenCms.getSiteManager().getSiteForRootPath(CmsStringUtil.joinPaths(folderRootPath, "/")); m_siteRoot = site.getSiteRoot(); m_siteGuestCms.getRequestContext().setSiteRoot(m_siteRoot); m_baseFolderSitePath = CmsStringUtil.joinPaths( "/", m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath)); } /** * Replaces the protocol/host/port of a link with the ones from the given server URI, if it's not empty.

* * @param link the link to change * @param server the server URI string * @return the changed link */ public static String replaceServerUri(String link, String server) { String serverUriStr = server; if (CmsStringUtil.isEmptyOrWhitespaceOnly(serverUriStr)) { return link; } try { URI serverUri = new URI(serverUriStr); URI linkUri = new URI(link); URI result = new URI( serverUri.getScheme(), serverUri.getAuthority(), linkUri.getPath(), linkUri.getQuery(), linkUri.getFragment()); return result.toString(); } catch (URISyntaxException e) { LOG.error(e.getLocalizedMessage(), e); return link; } } /** * Gets the change frequency for a sitemap entry from a list of properties.

* * If the change frequency is not defined in the properties, this method will return null.

* * @param properties the properties from which the change frequency should be obtained * * @return the change frequency string */ protected static String getChangeFrequency(List properties) { CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_CHANGEFREQ, properties); if (prop.isNullProperty()) { return null; } String result = prop.getValue().trim(); return result; } /** * Gets the page priority from a list of properties.

* * If the page priority can't be found among the properties, -1 will be returned.

* * @param properties the properties of a resource * * @return the page priority read from the properties, or -1 */ protected static double getPriority(List properties) { CmsProperty prop = CmsProperty.get(CmsPropertyDefinition.PROPERTY_XMLSITEMAP_PRIORITY, properties); if (prop.isNullProperty()) { return -1.0; } try { double result = Double.parseDouble(prop.getValue().trim()); return result; } catch (NumberFormatException e) { return -1.0; } } /** * Removes files marked as internal from a resource list.

* * @param resources the list which should be replaced */ protected static void removeInternalFiles(List resources) { Iterator iter = resources.iterator(); while (iter.hasNext()) { CmsResource resource = iter.next(); if (resource.isInternal()) { iter.remove(); } } } /** * Generates a list of XML sitemap entry beans for the root folder which has been set in the constructor.

* * @return the list of XML sitemap entries * * @throws CmsException if something goes wrong */ public List generateSitemapBeans() throws CmsException { String baseSitePath = m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath); initializeFileData(baseSitePath); for (CmsResource resource : getDirectPages()) { String sitePath = m_siteGuestCms.getSitePath(resource); List propertyList = m_siteGuestCms.readPropertyObjects(resource, true); String onlineLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, sitePath); boolean isContainerPage = CmsResourceTypeXmlContainerPage.isContainerPage(resource); long dateModified = resource.getDateLastModified(); if (isContainerPage) { if (m_computeContainerPageDates) { dateModified = computeContainerPageModificationDate(resource); } else { dateModified = -1; } } CmsXmlSitemapUrlBean urlBean = new CmsXmlSitemapUrlBean( replaceServerUri(onlineLink), dateModified, getChangeFrequency(propertyList), getPriority(propertyList)); urlBean.setOriginalResource(resource); addResult(urlBean, 3); if (isContainerPage) { Locale locale = getLocale(resource, propertyList); addDetailLinks(resource, locale); } } for (CmsUUID aliasStructureId : m_pageAliasesBelowBaseFolderByStructureId.keySet()) { addAliasLinks(aliasStructureId); } List result = new ArrayList(); for (ResultEntry resultEntry : m_resultMap.values()) { result.add(resultEntry.getUrlBean()); } return result; } /** * Gets the include/exclude configuration of this XML sitemap generator.

* * @return the include/exclude configuration */ public CmsPathIncludeExcludeSet getIncludeExcludeSet() { return m_includeExcludeSet; } /** * Generates a sitemap and formats it as a string.

* * @return the sitemap XML data * * @throws CmsException if something goes wrong */ public String renderSitemap() throws CmsException { StringBuffer buffer = new StringBuffer(); List urlBeans = generateSitemapBeans(); buffer.append("\n"); buffer.append(getUrlSetOpenTag() + "\n"); for (CmsXmlSitemapUrlBean bean : urlBeans) { buffer.append(getXmlForEntry(bean)); buffer.append("\n"); } buffer.append(""); return buffer.toString(); } /** * Enables or disables computation of container page dates.

* * @param computeContainerPageDates the new value */ public void setComputeContainerPageDates(boolean computeContainerPageDates) { m_computeContainerPageDates = computeContainerPageDates; } /** * Sets the replacement server URL.

* * The replacement server URL will replace the scheme/host/port from the URLs returned by getOnlineLink. * * @param serverUrl the server URL */ public void setServerUrl(String serverUrl) { m_serverUrl = serverUrl; } /** * Adds the detail page links for a given page to the results.

* * @param containerPage the container page resource * @param locale the locale of the container page * * @throws CmsException if something goes wrong */ protected void addDetailLinks(CmsResource containerPage, Locale locale) throws CmsException { List types = getDetailTypesForPage(containerPage); for (I_CmsResourceType type : types) { List resourcesForType = getDetailResources(type); for (CmsResource detailRes : resourcesForType) { if (!isValidDetailPageCombination(containerPage, locale, detailRes)) { continue; } List detailProps = m_guestCms.readPropertyObjects(detailRes, true); String detailLink = getDetailLink(containerPage, detailRes, locale); detailLink = CmsFileUtil.removeTrailingSeparator(detailLink); CmsXmlSitemapUrlBean detailUrlBean = new CmsXmlSitemapUrlBean( replaceServerUri(detailLink), detailRes.getDateLastModified(), getChangeFrequency(detailProps), getPriority(detailProps)); detailUrlBean.setOriginalResource(detailRes); detailUrlBean.setDetailPageResource(containerPage); addResult(detailUrlBean, 2); } } } /** * Adds an URL bean to the internal map of results, but only if there is no existing entry with higher internal priority * than the priority given as an argument.

* * @param result the result URL bean to add * * @param resultPriority the internal priority to use for updating the map of results */ protected void addResult(CmsXmlSitemapUrlBean result, int resultPriority) { String url = CmsFileUtil.removeTrailingSeparator(result.getUrl()); boolean writeEntry = true; if (m_resultMap.containsKey(url)) { LOG.warn("Encountered duplicate URL with while generating sitemap: " + result.getUrl()); ResultEntry entry = m_resultMap.get(url); writeEntry = entry.getPriority() <= resultPriority; } if (writeEntry) { m_resultMap.put(url, new ResultEntry(result, resultPriority)); } } /** * Computes the container the container page modification date from its referenced contents.

* * @param containerPage the container page * * @return the computed modification date * * @throws CmsException if something goes wrong */ protected long computeContainerPageModificationDate(CmsResource containerPage) throws CmsException { CmsRelationFilter filter = CmsRelationFilter.relationsFromStructureId( containerPage.getStructureId()).filterType(CmsRelationType.XML_STRONG); List relations = m_guestCms.readRelations(filter); long result = containerPage.getDateLastModified(); for (CmsRelation relation : relations) { try { CmsResource target = relation.getTarget( m_guestCms, CmsResourceFilter.DEFAULT_FILES.addRequireVisible()); long targetDate = target.getDateLastModified(); if (targetDate > result) { result = targetDate; } } catch (CmsException e) { LOG.warn( "Could not get relation target for relation " + relation.toString() + " | " + e.getLocalizedMessage(), e); } } return result; } /** * Gets the detail link for a given container page and detail content.

* * @param pageRes the container page * @param detailRes the detail content * @param locale the locale for which we want the link * * @return the detail page link */ protected String getDetailLink(CmsResource pageRes, CmsResource detailRes, Locale locale) { String pageSitePath = m_siteGuestCms.getSitePath(pageRes); String detailSitePath = m_siteGuestCms.getSitePath(detailRes); CmsRequestContext requestContext = m_siteGuestCms.getRequestContext(); String originalUri = requestContext.getUri(); Locale originalLocale = requestContext.getLocale(); try { requestContext.setUri(pageSitePath); requestContext.setLocale(locale); return OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, detailSitePath, true); } finally { requestContext.setUri(originalUri); requestContext.setLocale(originalLocale); } } /** * Gets the types for which a given resource is configured as a detail page.

* * @param resource a resource for which we want to find the detail page types * * @return the list of resource types for which the given page is configured as a detail page */ protected List getDetailTypesForPage(CmsResource resource) { Collection typesForPage = m_detailTypesByPage.get(resource.getRootPath()); String parentPath = CmsFileUtil.removeTrailingSeparator(CmsResource.getParentFolder(resource.getRootPath())); Collection typesForFolder = m_detailTypesByPage.get(parentPath); Set allTypes = new HashSet(); allTypes.addAll(typesForPage); allTypes.addAll(typesForFolder); List resTypes = new ArrayList(); CmsResourceManager resMan = OpenCms.getResourceManager(); for (String typeName : allTypes) { try { I_CmsResourceType resType = resMan.getResourceType(typeName); resTypes.add(resType); } catch (CmsLoaderException e) { LOG.warn("Invalid resource type name" + typeName + "! " + e.getLocalizedMessage(), e); } } return resTypes; } /** * Gets the list of pages which should be directly added to the XML sitemap.

* * @return the list of resources which should be directly added to the XML sitemap * * @throws CmsException if something goes wrong */ protected List getDirectPages() throws CmsException { List result = new ArrayList(); result.addAll(getNavigationPages()); Set includeRoots = m_includeExcludeSet.getIncludeRoots(); for (String includeRoot : includeRoots) { try { CmsResource resource = m_guestCms.readResource(includeRoot); if (resource.isFile()) { result.add(resource); } else { List subtreeFiles = m_guestCms.readResources( includeRoot, CmsResourceFilter.DEFAULT_FILES, true); result.addAll(subtreeFiles); } } catch (CmsVfsResourceNotFoundException e) { LOG.warn("Could not read include resource: " + includeRoot); } } Iterator filterIter = result.iterator(); while (filterIter.hasNext()) { CmsResource currentResource = filterIter.next(); if (currentResource.isInternal() || m_includeExcludeSet.isExcluded(currentResource.getRootPath())) { filterIter.remove(); } } return result; } /** * Writes the inner node content for an url element to a buffer.

* * @param entry the entry for which the content should be written * @return the inner XML */ protected String getInnerXmlForEntry(CmsXmlSitemapUrlBean entry) { StringBuffer buffer = new StringBuffer(); entry.writeElement(buffer, "loc", entry.getUrl()); entry.writeLastmod(buffer); entry.writeChangefreq(buffer); entry.writePriority(buffer); return buffer.toString(); } /** * Gets the list of pages from the navigation which should be directly added to the XML sitemap.

* * @return the list of pages to add to the XML sitemap */ protected List getNavigationPages() { List result = new ArrayList(); CmsJspNavBuilder navBuilder = new CmsJspNavBuilder(m_siteGuestCms); try { CmsResource rootDefaultFile = m_siteGuestCms.readDefaultFile( m_siteGuestCms.getRequestContext().removeSiteRoot(m_baseFolderRootPath), CmsResourceFilter.DEFAULT); if (rootDefaultFile != null) { result.add(rootDefaultFile); } } catch (Exception e) { LOG.info(e.getLocalizedMessage(), e); } List navElements = navBuilder.getSiteNavigation(m_baseFolderSitePath, -1); for (CmsJspNavElement navElement : navElements) { CmsResource navResource = navElement.getResource(); if (navResource.isFolder()) { try { CmsResource defaultFile = m_guestCms.readDefaultFile(navResource, CmsResourceFilter.DEFAULT_FILES); if (defaultFile != null) { result.add(defaultFile); } else { LOG.warn("Could not get default file for " + navResource.getRootPath()); } } catch (CmsException e) { LOG.warn("Could not get default file for " + navResource.getRootPath()); } } else { result.add(navResource); } } return result; } /** * Gets the opening tag for the urlset element (can be overridden to add e.g. more namespaces.

* * @return the opening tag */ protected String getUrlSetOpenTag() { return ""; } /** * Writes the XML for an URL entry to a buffer.

* * @param entry the XML sitemap entry bean * * @return an XML representation of this bean */ protected String getXmlForEntry(CmsXmlSitemapUrlBean entry) { StringBuffer buffer = new StringBuffer(); buffer.append(""); buffer.append(getInnerXmlForEntry(entry)); buffer.append(""); return buffer.toString(); } /** * Checks whether the given alias is below the base folder.

* * @param alias the alias to check * * @return true if the alias is below the base folder */ protected boolean isAliasBelowBaseFolder(CmsAlias alias) { boolean isBelowBaseFolder = CmsStringUtil.isPrefixPath(m_baseFolderSitePath, alias.getAliasPath()); return isBelowBaseFolder; } /** * Replaces the protocol/host/port of a link with the ones from the configured server URI, if it's not empty.

* * @param link the link to change * * @return the changed link */ protected String replaceServerUri(String link) { return replaceServerUri(link, m_serverUrl); } /** * Adds the alias links for a given structure id to the results.

* * @param aliasStructureId the alias target structure id */ private void addAliasLinks(CmsUUID aliasStructureId) { try { CmsResource aliasTarget = m_guestCms.readResource(aliasStructureId); List properties = m_guestCms.readPropertyObjects(aliasTarget, true); double priority = getPriority(properties); String changeFrequency = getChangeFrequency(properties); Collection aliases = m_pageAliasesBelowBaseFolderByStructureId.get(aliasStructureId); for (CmsAlias alias : aliases) { String aliasLink = (m_siteRootLink + "/" + alias.getAliasPath()).replaceAll("(? * * @param type the type to filter by * * @return the list of resources with the given type * * @throws CmsException if something goes wrong */ private List getDetailResources(I_CmsResourceType type) throws CmsException { String typeName = type.getTypeName(); if (!m_detailResources.containsKey(typeName)) { List result = new ArrayList(); CmsResourceFilter filter = CmsResourceFilter.DEFAULT_FILES.addRequireType(type); List siteFiles = m_guestCms.readResources(m_siteRoot, filter, true); result.addAll(siteFiles); String shared = CmsFileUtil.removeTrailingSeparator(OpenCms.getSiteManager().getSharedFolder()); if (shared != null) { List sharedFiles = m_guestCms.readResources(shared, filter, true); result.addAll(sharedFiles); } m_detailResources.put(typeName, result); } return m_detailResources.get(typeName); } /** * Gets the locale to use for the given resource.

* * @param resource the resource * @param propertyList the properties of the resource * * @return the locale to use for the given resource */ private Locale getLocale(CmsResource resource, List propertyList) { return OpenCms.getLocaleManager().getDefaultLocale(m_guestCms, m_guestCms.getSitePath(resource)); } /** * Reads the data necessary for building the sitemap from the VFS and initializes the internal data structures.

* * @param baseSitePath the base site path * * @throws CmsException if something goes wrong */ private void initializeFileData(String baseSitePath) throws CmsException { m_resultMap.clear(); m_siteRootLink = OpenCms.getLinkManager().getOnlineLink(m_siteGuestCms, "/"); m_siteRootLink = CmsFileUtil.removeTrailingSeparator(m_siteRootLink); m_detailPageInfos = OpenCms.getADEManager().getAllDetailPages(m_guestCms); for (CmsDetailPageInfo detailPageInfo : m_detailPageInfos) { String type = detailPageInfo.getType(); String path = detailPageInfo.getUri(); path = CmsFileUtil.removeTrailingSeparator(path); m_detailTypesByPage.put(path, type); } List siteAliases = OpenCms.getAliasManager().getAliasesForSite( m_siteGuestCms, m_siteGuestCms.getRequestContext().getSiteRoot()); for (CmsAlias alias : siteAliases) { if (isAliasBelowBaseFolder(alias) && (alias.getMode() == CmsAliasMode.page)) { CmsUUID aliasId = alias.getStructureId(); m_pageAliasesBelowBaseFolderByStructureId.put(aliasId, alias); } } } /** * Checks whether the page/detail content combination is a valid detail page.

* * @param page the container page * @param locale the locale * @param detailRes the detail content resource * * @return true if this is a valid detail page combination */ private boolean isValidDetailPageCombination(CmsResource page, Locale locale, CmsResource detailRes) { return true; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy