Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
*
*
* @param property an entry from a ValueMap
* @param htmlFields lst of properties containing html
* @return stream containing extracted references
*/
static Stream collectPaths(Map.Entry property, Set htmlFields) {
Object p = property.getValue();
Stream stream;
if (p.getClass() == String[].class) {
stream = Arrays.stream((String[]) p);
} else if (p.getClass() == String.class){
stream = Stream.of((String) p);
} else {
stream = Stream.empty();
}
if (htmlFields.contains(property.getKey())) {
stream = stream.flatMap(val -> {
try {
// parse html and extract links via underlying tagsoup library
LinkContentHandler linkHandler = new LinkContentHandler();
HtmlParser parser = new HtmlParser();
parser.parse(new ByteArrayInputStream(val.getBytes("utf-8")), linkHandler, new Metadata(), new ParseContext());
return linkHandler.getLinks().stream().map(Link::getUri);
} catch (Exception e) {
return Stream.empty();
}
});
}
return stream;
}
/**
* Collect broken references from properties of the given resource
*
* @param resource the resource to check
* @param regex regex to to detect properties containing references. Set from @FormField
* @param skipList properties to ignore. Set from @FormField
* @param htmlFields field containing html .
* @return broken references keyed by property. The value is a List because a property can contain multiple links,
* e.g. if it is multivalued or it is html containing multiple links.
*/
static Map> collectBrokenReferences(Resource resource, Pattern regex, Set skipList, Set htmlFields) {
return resource.getValueMap().entrySet().stream()
.filter(entry -> !skipList.contains(entry.getKey()))
.collect(Collectors.toMap(
entry -> resource.getPath() + "/" + entry.getKey(),
entry -> {
List brokenPaths = collectPaths(entry, htmlFields)
.filter(href -> regex.matcher(href).matches())
.filter(path -> ResourceUtil.isNonExistingResource(resource.getResourceResolver().resolve(path)))
.collect(Collectors.toList());
return brokenPaths;
})).entrySet().stream().filter(e -> !e.getValue().isEmpty())
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
}
// access from unit tests
Map> getReportData() {
return reportData;
}
}