com.qwazr.crawler.web.WebCrawlItemImpl Maven / Gradle / Ivy
Show all versions of qwazr-crawlers Show documentation
/*
* Copyright 2014-2020 Emmanuel Keller / QWAZR
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.qwazr.crawler.web;
import com.qwazr.crawler.common.CrawlItemBase;
import com.qwazr.crawler.web.driver.DriverInterface;
import java.net.URI;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
final class WebCrawlItemImpl extends CrawlItemBase implements WebCrawlItem {
private final URI redirect;
private final Map links;
private final Set filteredLinks;
private final Integer statusCode;
private final String contentType;
private final DriverInterface.Body body;
WebCrawlItemImpl(final Builder builder) {
super(builder);
this.redirect = builder.redirect;
this.statusCode = builder.statusCode;
this.contentType = builder.contentType;
this.links = builder.links == null ? Collections.emptyMap() : Collections.unmodifiableMap(builder.links);
this.filteredLinks = builder.filteredLinks == null ?
Collections.emptySet() :
Collections.unmodifiableSet(builder.filteredLinks);
this.body = builder.body;
}
@Override
public URI getRedirect() {
return redirect;
}
@Override
public Integer getStatusCode() {
return statusCode;
}
@Override
public String getContentType() {
return contentType;
}
@Override
public Map getLinks() {
return links;
}
@Override
public Set getFilteredLinks() {
return filteredLinks;
}
@Override
public DriverInterface.Body getBody() {
return body;
}
final static class Builder extends BaseBuilder {
final String uriString;
private URI redirect;
private Integer statusCode;
private String contentType;
private LinkedHashMap links;
private LinkedHashSet filteredLinks;
private DriverInterface.Body body;
protected Builder(URI uri, int depth) {
super(uri, depth);
this.uriString = uri.toASCIIString();
}
@Override
protected Builder me() {
return this;
}
public Builder redirect(URI redirect) {
this.redirect = redirect == null ? null : item.resolve(redirect);
return this;
}
public Builder statusCode(Integer statusCode) {
this.statusCode = statusCode;
return this;
}
public Builder contentType(String contentType) {
this.contentType = contentType;
return this;
}
public Builder link(URI uri) {
if (uri == null)
return this;
if (links == null)
links = new LinkedHashMap<>();
links.computeIfAbsent(uri, u -> new AtomicInteger()).incrementAndGet();
return this;
}
public Builder filteredLink(URI uri) {
if (uri == null)
return this;
if (filteredLinks == null)
filteredLinks = new LinkedHashSet<>();
filteredLinks.add(uri);
return this;
}
public Builder body(DriverInterface.Body body) {
this.body = body;
return this;
}
WebCrawlItem build() {
return new WebCrawlItemImpl(this);
}
}
}