All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qwazr.crawler.web.WebCrawlItemImpl Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014-2020 Emmanuel Keller / QWAZR
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.qwazr.crawler.web; import com.qwazr.crawler.common.CrawlItemBase; import com.qwazr.crawler.web.driver.DriverInterface; import java.net.URI; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; final class WebCrawlItemImpl extends CrawlItemBase implements WebCrawlItem { private final URI redirect; private final Map links; private final Set filteredLinks; private final Integer statusCode; private final String contentType; private final DriverInterface.Body body; WebCrawlItemImpl(final Builder builder) { super(builder); this.redirect = builder.redirect; this.statusCode = builder.statusCode; this.contentType = builder.contentType; this.links = builder.links == null ? Collections.emptyMap() : Collections.unmodifiableMap(builder.links); this.filteredLinks = builder.filteredLinks == null ? Collections.emptySet() : Collections.unmodifiableSet(builder.filteredLinks); this.body = builder.body; } @Override public URI getRedirect() { return redirect; } @Override public Integer getStatusCode() { return statusCode; } @Override public String getContentType() { return contentType; } @Override public Map getLinks() { return links; } @Override public Set getFilteredLinks() { return filteredLinks; } @Override public DriverInterface.Body getBody() { return body; } final static class Builder extends BaseBuilder { final String uriString; private URI redirect; private Integer statusCode; private String contentType; private LinkedHashMap links; private LinkedHashSet filteredLinks; private DriverInterface.Body body; protected Builder(URI uri, int depth) { super(uri, depth); this.uriString = uri.toASCIIString(); } @Override protected Builder me() { return this; } public Builder redirect(URI redirect) { this.redirect = redirect == null ? null : item.resolve(redirect); return this; } public Builder statusCode(Integer statusCode) { this.statusCode = statusCode; return this; } public Builder contentType(String contentType) { this.contentType = contentType; return this; } public Builder link(URI uri) { if (uri == null) return this; if (links == null) links = new LinkedHashMap<>(); links.computeIfAbsent(uri, u -> new AtomicInteger()).incrementAndGet(); return this; } public Builder filteredLink(URI uri) { if (uri == null) return this; if (filteredLinks == null) filteredLinks = new LinkedHashSet<>(); filteredLinks.add(uri); return this; } public Builder body(DriverInterface.Body body) { this.body = body; return this; } WebCrawlItem build() { return new WebCrawlItemImpl(this); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy