All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.flamenk.article.manipulators.OnClickNodeManipulator Maven / Gradle / Ivy

Go to download

Flamenk is an srticle extractor, extracts the article present in a web page.

The newest version!
/*
 * Copyright 2013 Torindo Nesci.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.flamenk.article.manipulators;

import com.flamenk.TagConstants;
import com.flamenk.article.Article;
import com.flamenk.dom.HtmlDocument;
import com.flamenk.dom.HtmlNode;
import static com.flamenk.dom.HtmlNodeDisplayMode.NOT_DISPLAY;
import com.flamenk.dom.HtmlNodeRange;
import com.flamenk.util.Consumer;
import static com.flamenk.util.NodeUtil.hasName;
import com.google.common.base.Optional;

/**
 * A manipulator of a and span nodes
 * having the onclick properties.
 * The a and span with the onclick
 * are removed from the output if:
 * 
    *
  • Are before the max range.
  • *
  • Are after the max range.
  • *
* * If they are after the max range, the subsequent nodes are removed as well. * If they are in a list, the list is removed as well including the list header. * *

This implementation is Thread Safe. * @author Torindo Nesci */ public final class OnClickNodeManipulator implements HtmlNodeManipulator { private static final OnClickNodeManipulator INSTANCE = new OnClickNodeManipulator(); private static final Consumer HIDE_NODE_CONSUMER = new Consumer() { public void consume(HtmlNode node) { node.setDisplayMode(NOT_DISPLAY); } }; private static final String ONCLICK = "onclick"; private OnClickNodeManipulator() { } /** * Return the singleton instance of {@link OnClickNodeManipulator}. * * @return The instance of {@link OnClickNodeManipulator}. */ public static OnClickNodeManipulator getInstance() { return INSTANCE; } @Override public void manipulate(HtmlNode node, Article article, HtmlNodeRange maxRankRange) { if (!hasName(node, TagConstants.A) && !hasName(node, TagConstants.SPAN)) { return; } if (!node.hasAttribute(ONCLICK) || maxRankRange.isNodeInRange(node)) { return; } Optional listNode = ListUtil.listNodeFor(node); if (!listNode.isPresent() && maxRankRange.isNodeBeforeRange(node)) { node.setDisplayMode(NOT_DISPLAY); } else if (!listNode.isPresent() && maxRankRange.isNodeAfterRange(node)) { HtmlDocument htmlDoc = node.getHtmlDocument(); HtmlNodeRange range = new HtmlNodeRange().withIncludedStartNode(node); htmlDoc.consumeNodesInRange(range, HIDE_NODE_CONSUMER); } else if (listNode.isPresent() && maxRankRange.isNodeBeforeRange(node)) { listNode.get().setDisplayMode(NOT_DISPLAY); ListUtil.hideListHeader(listNode.get()); } else if (listNode.isPresent() && maxRankRange.isNodeAfterRange(node)) { HtmlDocument htmlDoc = node.getHtmlDocument(); HtmlNodeRange range = new HtmlNodeRange() .withIncludedStartNode(listNode.get()); htmlDoc.consumeNodesInRange(range, HIDE_NODE_CONSUMER); ListUtil.hideListHeader(listNode.get()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy