All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.tblsoft.solr.crawl.attr.DlDdDtAttributeExtractor Maven / Gradle / Ivy

package de.tblsoft.solr.crawl.attr;

import de.tblsoft.solr.crawl.JSoupAnalyzer;
import org.jsoup.nodes.Element;

import java.util.ArrayList;
import java.util.List;

public class DlDdDtAttributeExtractor implements AttributeExtractor {

    private JSoupAnalyzer  jSoupAnalyzer;

    public DlDdDtAttributeExtractor(JSoupAnalyzer jSoupAnalyzer) {
        this.jSoupAnalyzer = jSoupAnalyzer;
    }

    @Override
    public List extractAttributes(Attributes attributes) {
        List attributeNames = new ArrayList<>();
        for(String selector : attributes.getSelector()) {

            for (Element element : jSoupAnalyzer.getJsoupDocument().select(selector).select("dl")) {
                String dt = element.select("dt").text();
                String dd = element.select("dd").text();

                Attribute attribute = new Attribute();
                attribute.setName(dt);
                attribute.setValue(dd);
                attributeNames.add(attribute);
            }

        }
        return attributeNames;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy