org.craftercms.search.batch.impl.BinaryFileWithMetadataBatchIndexer Maven / Gradle / Ivy
package org.craftercms.search.batch.impl;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.craftercms.commons.lang.RegexUtils;
import org.craftercms.core.processors.ItemProcessor;
import org.craftercms.core.processors.impl.ItemProcessorPipeline;
import org.craftercms.core.service.Content;
import org.craftercms.core.service.ContentStoreService;
import org.craftercms.core.service.Context;
import org.craftercms.search.batch.BatchIndexer;
import org.craftercms.search.batch.UpdateSet;
import org.craftercms.search.batch.UpdateStatus;
import org.craftercms.search.batch.exception.BatchIndexingException;
import org.craftercms.search.service.Query;
import org.craftercms.search.service.SearchService;
import org.craftercms.search.utils.SearchResultUtils;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import static org.craftercms.search.batch.utils.IndexingUtils.*;
/**
* {@link org.craftercms.search.batch.BatchIndexer} that tries to match binary files with metadata files. Right now, a metadata file
* can reference several binary files. Also, this indexer supports the concept of "child" binaries, where the parent is the metadata
* file and the binary file only exists in the index as long as the metadata file exists and it references the binary.
*
* @author avasquez
*/
public class BinaryFileWithMetadataBatchIndexer implements BatchIndexer {
private static final Log logger = LogFactory.getLog(BinaryFileWithMetadataBatchIndexer.class);
public static final String DEFAULT_METADATA_PATH_FIELD_NAME = "metadataPath";
public static final String DEFAULT_LOCAL_ID_FIELD_NAME = "localId";
protected ItemProcessor itemProcessor;
protected List metadataPathPatterns;
protected List binaryPathPatterns;
protected List childBinaryPathPatterns;
protected List referenceXPaths;
protected List includePropertyPatterns;
protected List excludePropertyPatterns;
@Deprecated protected List excludeMetadataProperties;
protected String metadataPathFieldName;
protected String localIdFieldName;
public BinaryFileWithMetadataBatchIndexer() {
metadataPathFieldName = DEFAULT_METADATA_PATH_FIELD_NAME;
localIdFieldName = DEFAULT_LOCAL_ID_FIELD_NAME;
}
public void setItemProcessor(ItemProcessor itemProcessor) {
this.itemProcessor = itemProcessor;
}
public void setItemProcessors(List itemProcessors) {
this.itemProcessor = new ItemProcessorPipeline(itemProcessors);
}
public void setMetadataPathPatterns(List metadataPathPatterns) {
this.metadataPathPatterns = metadataPathPatterns;
}
public void setBinaryPathPatterns(List binaryPathPatterns) {
this.binaryPathPatterns = binaryPathPatterns;
}
public void setChildBinaryPathPatterns(List childBinaryPathPatterns) {
this.childBinaryPathPatterns = childBinaryPathPatterns;
}
public void setReferenceXPaths(List referenceXPaths) {
this.referenceXPaths = referenceXPaths;
}
public void setIncludePropertyPatterns(List includePropertyPatterns) {
this.includePropertyPatterns = includePropertyPatterns;
}
public void setExcludePropertyPatterns(List excludePropertyPatterns) {
this.excludePropertyPatterns = excludePropertyPatterns;
}
@Deprecated
public void setExcludeMetadataProperties(List excludeMetadataProperties) {
this.excludeMetadataProperties = excludeMetadataProperties;
}
public void setMetadataPathFieldName(String metadataPathFieldName) {
this.metadataPathFieldName = metadataPathFieldName;
}
public void setLocalIdFieldName(String localIdFieldName) {
this.localIdFieldName = localIdFieldName;
}
@Override
public void updateIndex(SearchService searchService, String indexId, String siteName, ContentStoreService contentStoreService,
Context context, UpdateSet updateSet, UpdateStatus updateStatus) throws BatchIndexingException {
doUpdates(indexId, siteName, searchService, contentStoreService, context, updateSet.getUpdatePaths(), updateStatus);
doDeletes(indexId, siteName, searchService, contentStoreService, context, updateSet.getDeletePaths(), updateStatus);
}
protected void doUpdates(String indexId, String siteName, SearchService searchService, ContentStoreService contentStoreService,
Context context, List updatePaths, UpdateStatus updateStatus) {
Set metadataUpdatePaths = new LinkedHashSet<>();
Set binaryUpdatePaths = new LinkedHashSet<>();
for (String path : updatePaths) {
if (isMetadata(path)) {
metadataUpdatePaths.add(path);
} else if (isBinary(path)) {
binaryUpdatePaths.add(path);
}
}
for (String metadataPath : metadataUpdatePaths) {
List newBinaryPaths = Collections.emptyList();
List previousBinaryPaths = searchBinaryPathsFromMetadataPath(searchService, indexId, siteName, metadataPath);
Document metadataDoc = loadMetadata(contentStoreService, context, siteName, metadataPath);
if (metadataDoc != null) {
newBinaryPaths = getBinaryFilePaths(metadataDoc);
}
// If there are previous binaries that are not associated to the metadata anymore, reindex them without metadata or delete
// them if they're child binaries.
if (CollectionUtils.isNotEmpty(previousBinaryPaths)) {
for (String previousBinaryPath : previousBinaryPaths) {
if (CollectionUtils.isNotEmpty(newBinaryPaths) && !newBinaryPaths.contains(previousBinaryPath)) {
binaryUpdatePaths.remove(previousBinaryPath);
if (isChildBinary(previousBinaryPath)) {
if (logger.isDebugEnabled()) {
logger.debug("Reference of child binary " + previousBinaryPath + " removed from parent " + metadataPath +
". Deleting binary from index...");
}
doDelete(searchService, indexId, siteName, previousBinaryPath, updateStatus);
} else {
if (logger.isDebugEnabled()) {
logger.debug("Reference of binary " + previousBinaryPath + " removed from " + metadataPath +
". Reindexing without metadata...");
}
updateBinary(searchService, indexId, siteName, contentStoreService, context, previousBinaryPath, updateStatus);
}
}
}
}
// Index the new associated binaries
if (CollectionUtils.isNotEmpty(newBinaryPaths)) {
MultiValueMap metadata = extractMetadata(metadataPath, metadataDoc);
for (String newBinaryPath : newBinaryPaths) {
binaryUpdatePaths.remove(newBinaryPath);
updateBinaryWithMetadata(searchService, indexId, siteName, contentStoreService, context, newBinaryPath,
metadata, updateStatus);
}
}
}
for (String binaryPath : binaryUpdatePaths) {
String metadataPath = searchMetadataPathFromBinaryPath(searchService, indexId, siteName, binaryPath);
if (StringUtils.isNotEmpty(metadataPath)) {
// If the binary file has an associated metadata, index the file with the metadata
Document metadataDoc = loadMetadata(contentStoreService, context, siteName, metadataPath);
if (metadataDoc != null) {
MultiValueMap metadata = extractMetadata(metadataPath, metadataDoc);
updateBinaryWithMetadata(searchService, indexId, siteName, contentStoreService, context, binaryPath,
metadata, updateStatus);
}
} else {
// If not, index by itself
updateBinary(searchService, indexId, siteName, contentStoreService, context, binaryPath, updateStatus);
}
}
}
protected void doDeletes(String indexId, String siteName, SearchService searchService, ContentStoreService contentStoreService,
Context context, List deletePaths, UpdateStatus updateStatus) {
for (String path : deletePaths) {
if (isMetadata(path)) {
List binaryPaths = searchBinaryPathsFromMetadataPath(searchService, indexId, siteName, path);
for (String binaryPath : binaryPaths) {
if (isChildBinary(binaryPath)) {
if (logger.isDebugEnabled()) {
logger.debug("Parent of binary " + binaryPath + " deleted. Deleting child binary too");
}
// If the binary is a child binary, when the metadata file is deleted, then delete it
doDelete(searchService, indexId, siteName, binaryPath, updateStatus);
} else {
if (logger.isDebugEnabled()) {
logger.debug("Metadata with reference of binary " + binaryPath + " deleted. Reindexing without metadata...");
}
// Else, update binary without metadata
updateBinary(searchService, indexId, siteName, contentStoreService, context, binaryPath, updateStatus);
}
}
} else if (isBinary(path)) {
doDelete(searchService, indexId, siteName, path, updateStatus);
}
}
}
protected boolean isMetadata(String path) {
return RegexUtils.matchesAny(path, metadataPathPatterns);
}
protected boolean isBinary(String path) {
return RegexUtils.matchesAny(path, binaryPathPatterns);
}
protected boolean isChildBinary(String path) {
return RegexUtils.matchesAny(path, childBinaryPathPatterns);
}
@SuppressWarnings("unchecked")
protected List searchBinaryPathsFromMetadataPath(SearchService searchService, String indexId, String siteName,
String metadataPath) {
Query query = searchService.createQuery();
query.setQuery("crafterSite:\"" + siteName + "\" AND metadataPath:\"" + metadataPath + "\"");
query.setFieldsToReturn(localIdFieldName);
Map result = searchService.search(indexId, query);
List © 2015 - 2025 Weber Informatics LLC | Privacy Policy