crawlercommons.sitemaps.sax.extension.LinksHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of crawler-commons Show documentation
Show all versions of crawler-commons Show documentation
crawler-commons is a set of reusable Java components that implement
functionality common to any web crawler.
The newest version!
/**
* Copyright 2018 Crawler-Commons
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package crawlercommons.sitemaps.sax.extension;
import java.net.URL;
import java.util.Map;
import java.util.TreeMap;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import crawlercommons.sitemaps.extension.LinkAttributes;
/** Handle SAX events in the Google Image sitemap extension namespace. */
public class LinksHandler extends ExtensionHandler {
public LinksHandler() {
reset();
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("link".equals(localName)) {
String href = attributes.getValue("href");
if (href != null && !href.trim().isEmpty()) {
URL url = getURLValue(href.trim());
if (url != null) {
LinkAttributes attr = new LinkAttributes(url);
this.attributes.add(attr);
Map params = new TreeMap<>();
for (int i = 0; i < attributes.getLength(); i++) {
String k = attributes.getLocalName(i);
if (!k.equals("href")) {
params.put(k, attributes.getValue(i).trim());
}
}
attr.setParams(params);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy