All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.panforge.robotstxt.RobotsTxt Maven / Gradle / Ivy

There is a newer version: 1.4.6
Show newest version
/*
 * Copyright 2016 Piotr Andzel.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.panforge.robotstxt;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;

/**
 * Represents access policy from a single "robots.txt" file.
 * 

* Use {@link RobotsTxt#read(java.io.InputStream)} to read and parse robots.txt. */ public interface RobotsTxt { /** * Checks access to the given HTTP path. * @param userAgent user agent to be used evaluate authorization * @param path path to access * @return true if there is an access to the requested path */ boolean query(String userAgent, String path); /** * Asks for grant. * @param userAgent user agent to be used evaluate authorization * @param path path to access * @return grant (never null) */ default Grant ask(String userAgent, String path) { return new Grant() { @Override public boolean hasAccess() { return true; } @Override public String getClause() { return ""; } @Override public List getUserAgents() { return Collections.EMPTY_LIST; } @Override public Integer getCrawlDelay() { return null; } }; } /** * Gets crawl delay. * @return crawl delay in seconds or 0 if no delay declared * @deprecated use {@link #ask} to get {@link Grant} from which {@link Grant#getCrawlDelay} might be invoked. */ @Deprecated Integer getCrawlDelay(); /** * Gets host. * @return host or null if no host declared */ String getHost(); /** * Gets site maps. * @return list of site map URL's. */ List getSitemaps(); /** * Gets a list of disallowed resources. * @param userAgent user agent * @return list of disallowed resources */ List getDisallowList(String userAgent); /** * Reads robots.txt available at the URL. * @param input stream of content * @return parsed robots.txt object * @throws IOException if unable to read content. */ static RobotsTxt read(InputStream input) throws IOException { RobotsTxtReader reader = new RobotsTxtReader(); return reader.readRobotsTxt(input); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy