All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.panforge.robotstxt.Group Maven / Gradle / Ivy

There is a newer version: 1.4.6
Show newest version
/*
 * Copyright 2016 Piotr Andzel.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.panforge.robotstxt;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

/**
 * Robots.txt group of directives.
 */
class Group {
  private final List userAgents = new ArrayList<>();
  private final AccessList accessList = new AccessList();
  private boolean anyAgent;
  private Integer crawlDelay;

  /**
   * Checks if is any agent.
   * @return true if any agent
   */
  public boolean isAnyAgent() {
    return anyAgent;
  }
  
  /**
   * Checks if group is exact in terms of user agents.
   * @param group group to compare
   * @return {@code true} if sections are exact.
   */
  public boolean isExact(Group group) {
    if (isAnyAgent() && group.isAnyAgent()) return true;
    if ((isAnyAgent() && !group.isAnyAgent() || (!isAnyAgent() && group.isAnyAgent()))) return false;

    return group.userAgents.stream().anyMatch(sectionUserAgent->userAgents.stream().anyMatch(userAgent->userAgent.equalsIgnoreCase(sectionUserAgent)));
  }
  
  /**
   * Adds user agent.
   * @param userAgent host name
   */
  public void addUserAgent(String userAgent) {
    if (userAgent.equals("*")) {
      anyAgent = true;
    } else {
      this.userAgents.add(userAgent);
    }
  }

  /**
   * Gets access list.
   * @return access list
   */
  public AccessList getAccessList() {
    return accessList;
  }
  
  /**
   * Adds access.
   * @param access access
   */
  public void addAccess(Access access) {
    this.accessList.addAccess(access);
  }

  /**
   * Gets user agents.
   * @return user agents
   */
  public List getUserAgents() {
    return userAgents;
  }
  
  /**
   * Select any access matching input path.
   * @param userAgent user agent
   * @param relativePath path to test
   * @param matchingStrategy matcher
   * @return list of matching elements
   * @throws SelectionException if unable to select
   */
  public List select(String userAgent, String relativePath, MatchingStrategy matchingStrategy) {
    if ((userAgent==null && !isAnyAgent()) || relativePath==null || !matchUserAgent(userAgent)) {
      return Collections.EMPTY_LIST;
    }
    return accessList.select(relativePath, matchingStrategy).stream().collect(Collectors.toList());
  }
  
  /**
   * Checks if the section is applicable for a given user agent.
   * @param userAgent requested user agent
   * @return true if the section is applicable for the requested user agent
   */
  public boolean matchUserAgent(String userAgent) {
    if (anyAgent) return true;
    if (!anyAgent && userAgent==null) return false;
    return userAgents.stream().anyMatch(agent->agent.equalsIgnoreCase(userAgent));
  }

  /**
   * Sets crawl delay.
   *
   * @param crawlDelay crawl delay.
   */
  public void setCrawlDelay(Integer crawlDelay) {
    this.crawlDelay = crawlDelay;
  }

  /**
   * Gets crawl delay.
   * @return crawl delay
   */
  public Integer getCrawlDelay() {
    return crawlDelay;
  }
  
  @Override
  public String toString() {
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw);
    
    if (anyAgent) {
      pw.format("User-agent: %s", "*").println();
    }
    
    userAgents.forEach(userAgent->pw.format("User-agent: %s", userAgent).println());
    
    pw.println(accessList);
    
    if (crawlDelay!=null) {
      pw.format("Crawl-delay: %d", crawlDelay).println();
    }
    
    pw.flush();
    
    return sw.toString();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy