All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.phloc.web.useragent.spider.WebSpiderManager Maven / Gradle / Ivy

There is a newer version: 6.0.0
Show newest version
/**
 * Copyright (C) 2006-2015 phloc systems
 * http://www.phloc.com
 * office[at]phloc[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.phloc.web.useragent.spider;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.phloc.commons.annotations.ReturnsImmutableObject;
import com.phloc.commons.collections.ContainerHelper;
import com.phloc.commons.io.resource.ClassPathResource;
import com.phloc.commons.microdom.IMicroDocument;
import com.phloc.commons.microdom.IMicroElement;
import com.phloc.commons.microdom.reader.XMLListHandler;
import com.phloc.commons.microdom.serialize.MicroReader;
import com.phloc.commons.microdom.utils.MicroUtils;

/**
 * Provides a list of known web spiders.
 * 
 * @author Philip Helger
 */
@NotThreadSafe
public final class WebSpiderManager
{
  private static final class SingletonHolder
  {
    static final WebSpiderManager s_aInstance = new WebSpiderManager ();
  }

  private static final Logger s_aLogger = LoggerFactory.getLogger (WebSpiderManager.class);
  private final Map  m_aMap = new HashMap  ();

  private WebSpiderManager ()
  {
    _readSpiderList ("codelists/spiderlist.xml");
    _readSearchSpiders ("codelists/spiders_vbulletin.xml");
    _readListPhloc ("codelists/spiderlist-phloc.xml");
  }

  @Nonnull
  private static String _getUnifiedID (@Nonnull final String sID)
  {
    return sID.toLowerCase (Locale.US);
  }

  private void _readSpiderList (final String sPath)
  {
    final IMicroDocument aDoc = MicroReader.readMicroXML (new ClassPathResource (sPath));
    for (final IMicroElement eSpider : aDoc.getDocumentElement ().getAllChildElements ("spider"))
    {
      final WebSpiderInfo aSpider = new WebSpiderInfo (eSpider.getAttribute ("ident"));
      aSpider.setName (MicroUtils.getChildTextContent (eSpider, "name"));
      final String sType = MicroUtils.getChildTextContent (eSpider, "type");
      final EWebSpiderType eType = EWebSpiderType.getFromIDOrNull (sType);
      if (sType != null && eType == null)
        s_aLogger.warn ("Unknown web spider type '" + sType + "'");
      aSpider.setType (eType);
      aSpider.setInfo (MicroUtils.getChildTextContent (eSpider, "info"));
      m_aMap.put (_getUnifiedID (aSpider.getID ()), aSpider);
    }
  }

  private void _readSearchSpiders (final String sPath)
  {
    final IMicroDocument aDoc = MicroReader.readMicroXML (new ClassPathResource (sPath));
    for (final IMicroElement eSpider : aDoc.getDocumentElement ().getAllChildElements ("spider"))
    {
      final String sID = eSpider.getAttribute ("ident");
      WebSpiderInfo aSpider = m_aMap.get (_getUnifiedID (sID));
      if (aSpider == null)
      {
        aSpider = new WebSpiderInfo (sID);
        aSpider.setName (MicroUtils.getChildTextContent (eSpider, "name"));
        m_aMap.put (_getUnifiedID (aSpider.getID ()), aSpider);
      }
    }
  }

  private void _readListPhloc (final String sPath)
  {
    final List  aList = new ArrayList  ();
    if (XMLListHandler.readList (new ClassPathResource (sPath), aList).isFailure ())
      throw new IllegalStateException ("Failed to read spiderlist-phloc from " + sPath);
    for (final String sSpider : aList)
    {
      final String sID = _getUnifiedID (sSpider);
      if (!m_aMap.containsKey (sID))
      {
        final WebSpiderInfo aSpider = new WebSpiderInfo (sID);
        aSpider.setName (sSpider);
        m_aMap.put (sID, aSpider);
      }
    }
  }

  public static WebSpiderManager getInstance ()
  {
    return SingletonHolder.s_aInstance;
  }

  @Nonnull
  @ReturnsImmutableObject
  public Collection  getAllKnownSpiders ()
  {
    return ContainerHelper.makeUnmodifiable (m_aMap.values ());
  }

  @Nullable
  public WebSpiderInfo getWebSpiderFromUserAgent (@Nonnull final String sUserAgent)
  {
    // Search case insensitive (key set is lowercase!)
    final String sUserAgentLC = _getUnifiedID (sUserAgent);
    for (final Map.Entry  aEntry : m_aMap.entrySet ())
      if (sUserAgentLC.contains (aEntry.getKey ()))
        return aEntry.getValue ();

    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy