
mServer.crawler.sender.orf.HtmlDocumentUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
package mServer.crawler.sender.orf;
import java.time.Duration;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
/**
* A util class to collect useful JSoup HTML {@link Document} related methods.
*
* @author Nicklas Wiegandt (Nicklas2751)
* Mail: [email protected]
* Jabber: [email protected]
* Riot.im: nicklas2751:matrix.elaon.de
*
*/
public final class HtmlDocumentUtils {
private static final Logger LOG = LogManager.getLogger(HtmlDocumentUtils.class);
private static final String DAUER_REGEX_PATTERN = "\\d+:\\d+";
private HtmlDocumentUtils() {
super();
}
/**
* Searches for the given selector if found it returns the text of the first result.
*
* @param aElementSelector The selector for the searched element.
* @param aDocument The document in which will be searched.
* @return A {@link Optional} containing the found element or else an empty {@link Optional}.
*/
public static Optional getElementAttributeString(final String aElementSelector,
final String aAttributeKey, final Document aDocument) {
final Elements selected = aDocument.select(aElementSelector);
if (!selected.isEmpty() && selected.first().hasAttr(aAttributeKey)) {
return Optional.of(selected.first().attr(aAttributeKey));
}
return Optional.empty();
}
/**
* Searches for the given selector if found it returns the text of the first result.
*
* @param aElementSelector The selector for the searched element.
* @param aDocument The document in which will be searched.
* @return A {@link Optional} containing the found element or else an empty {@link Optional}.
*/
public static Optional getElementString(final String aElementSelector,
final Document aDocument) {
final Elements selected = aDocument.select(aElementSelector);
if (!selected.isEmpty()) {
return Optional.of(selected.first().text());
}
return Optional.empty();
}
public static Optional parseDuration(final Optional aDauerText) {
return aDauerText.isPresent() ? parseDuration(aDauerText.get()) : Optional.empty();
}
public static Optional parseDuration(final String aDauerText) {
final Matcher dauerMatcher = Pattern.compile(DAUER_REGEX_PATTERN).matcher(aDauerText);
if (dauerMatcher.find()) {
final String[] dauerSplits = dauerMatcher.group().split(":");
if (2 == dauerSplits.length) {
try {
return Optional.of(Duration.ofMinutes(Long.parseLong(dauerSplits[0]))
.withSeconds(Long.parseLong(dauerSplits[1])));
} catch (final NumberFormatException numberFormatException) {
LOG.error("A duration for can't be parsed.", numberFormatException);
}
}
}
return Optional.empty();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy