com.panforge.robotstxt.RobotsTxt Maven / Gradle / Ivy
/*
* Copyright 2016 Piotr Andzel.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.panforge.robotstxt;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
/**
* Represents access policy from a single "robots.txt" file.
*
* Use {@link RobotsTxt#read(java.io.InputStream)} to read and parse robots.txt.
*/
public interface RobotsTxt {
/**
* Checks access to the given HTTP path.
* @param userAgent user agent to be used evaluate authorization
* @param path path to access
* @return true
if there is an access to the requested path
*/
boolean query(String userAgent, String path);
/**
* Asks for grant.
* @param userAgent user agent to be used evaluate authorization
* @param path path to access
* @return grant (never null
)
*/
default Grant ask(String userAgent, String path) {
return new Grant() {
@Override
public boolean hasAccess() {
return true;
}
@Override
public String getClause() {
return "";
}
@Override
public List getUserAgents() {
return Collections.EMPTY_LIST;
}
@Override
public Integer getCrawlDelay() {
return null;
}
};
}
/**
* Gets crawl delay.
* @return crawl delay in seconds or 0
if no delay declared
* @deprecated use {@link #ask} to get {@link Grant} from which {@link Grant#getCrawlDelay} might be invoked.
*/
@Deprecated
Integer getCrawlDelay();
/**
* Gets host.
* @return host or null
if no host declared
*/
String getHost();
/**
* Gets site maps.
* @return list of site map URL's.
*/
List getSitemaps();
/**
* Gets a list of disallowed resources.
* @param userAgent user agent
* @return list of disallowed resources
*/
List getDisallowList(String userAgent);
/**
* Reads robots.txt available at the URL.
* @param input stream of content
* @return parsed robots.txt object
* @throws IOException if unable to read content.
*/
static RobotsTxt read(InputStream input) throws IOException {
RobotsTxtReader reader = new RobotsTxtReader();
return reader.readRobotsTxt(input);
}
}