All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.saucesubfresh.starter.crawler.parser.provider.LinksSelector Maven / Gradle / Ivy

/*
 * Copyright © the webmagic project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.saucesubfresh.starter.crawler.parser.provider;

import com.saucesubfresh.starter.crawler.parser.BaseElementSelector;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

/**
 * Links selector based on jsoup. Use absolute url. 
* * @see ... */ public class LinksSelector extends BaseElementSelector { @Override public String select(Element element) { throw new UnsupportedOperationException(); } @Override public List selectList(Element element) { Elements elements = element.select("a"); List links = new ArrayList<>(elements.size()); for (Element element0 : elements) { if (StringUtils.isNotBlank(element0.baseUri())) { links.add(element0.attr("abs:href")); } else { links.add(element0.attr("href")); } } return links; } @Override public Element selectElement(Element element) { throw new UnsupportedOperationException(); } @Override public List selectElements(Element element) { throw new UnsupportedOperationException(); } @Override public boolean hasAttribute() { return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy