
com.rbmhtechnology.vind.solr.backend.SolrUtils Maven / Gradle / Ivy
The newest version!
package com.rbmhtechnology.vind.solr.backend;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.rbmhtechnology.vind.api.Document;
import com.rbmhtechnology.vind.api.query.FulltextSearch;
import com.rbmhtechnology.vind.api.query.datemath.DateMathExpression;
import com.rbmhtechnology.vind.api.query.facet.Facet;
import com.rbmhtechnology.vind.api.query.facet.TermFacetOption;
import com.rbmhtechnology.vind.api.query.filter.Filter;
import com.rbmhtechnology.vind.api.query.get.RealTimeGet;
import com.rbmhtechnology.vind.api.query.sort.Sort;
import com.rbmhtechnology.vind.api.result.FacetResults;
import com.rbmhtechnology.vind.api.result.GetResult;
import com.rbmhtechnology.vind.api.result.SuggestionResult;
import com.rbmhtechnology.vind.api.result.facet.*;
import com.rbmhtechnology.vind.model.*;
import com.rbmhtechnology.vind.model.value.LatLng;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.response.*;
import org.apache.solr.client.solrj.response.IntervalFacet;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static com.rbmhtechnology.vind.api.query.facet.Facet.*;
import static com.rbmhtechnology.vind.api.query.filter.Filter.*;
import static com.rbmhtechnology.vind.model.FieldDescriptor.*;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
* @author Thomas Kurz ([email protected])
* @since 22.06.16.
*/
public class SolrUtils {
private static final Logger log = LoggerFactory.getLogger(SolrSearchServer.class);
private static final String INTERNAL_FIELD_PREFIX = String.format("%s(%s|%s)(%s|%s|%s|%s|%s|%s|%s|%s)",
Fieldname._DYNAMIC,
Fieldname._MULTI,Fieldname._SINGLE,
Fieldname.Type.BOOLEAN.getName(), Fieldname.Type.DATE.getName(),
Fieldname.Type.INTEGER.getName(), Fieldname.Type.LONG.getName(),Fieldname.Type.NUMBER.getName(),
Fieldname.Type.STRING.getName(),Fieldname.Type.BINARY.getName(),Fieldname.Type.LOCATION.getName());
private static final String INTERNAL_FACET_FIELD_PREFIX = String.format("%s(%s|%s)(%s)?%s(%s|%s|%s|%s|%s|%s|%s)",
Fieldname._DYNAMIC,
Fieldname._MULTI,Fieldname._SINGLE,
Fieldname._STORED,
Fieldname._FACET,
Fieldname.Type.BOOLEAN.getName(), Fieldname.Type.DATE.getName(),
Fieldname.Type.INTEGER.getName(), Fieldname.Type.LONG.getName(),Fieldname.Type.NUMBER.getName(),
Fieldname.Type.STRING.getName(),Fieldname.Type.LOCATION.getName());
private static final String INTERNAL_SCOPE_FACET_FIELD_PREFIX = String.format("%s(%s|%s)(%s)?(%s|%s|%s)(%s|%s|%s|%s|%s|%s|%s)",
Fieldname._DYNAMIC,
Fieldname._MULTI,Fieldname._SINGLE,
Fieldname._STORED,
Fieldname._FACET,Fieldname._SUGGEST,Fieldname._FILTER,
Fieldname.Type.BOOLEAN.getName(), Fieldname.Type.DATE.getName(),
Fieldname.Type.INTEGER.getName(), Fieldname.Type.LONG.getName(),Fieldname.Type.NUMBER.getName(),
Fieldname.Type.STRING.getName(),Fieldname.Type.LOCATION.getName());
private static final String INTERNAL_SUGGEST_FIELD_PREFIX = String.format("%s(%s|%s)(%s)?%s(%s|%s|%s|%s|%s|%s|%s|%s)",
Fieldname._DYNAMIC,
Fieldname._MULTI,Fieldname._SINGLE,
Fieldname._STORED,
Fieldname._SUGGEST,
Fieldname.Type.BOOLEAN.getName(), Fieldname.Type.DATE.getName(),
Fieldname.Type.INTEGER.getName(), Fieldname.Type.LONG.getName(),Fieldname.Type.NUMBER.getName(),
Fieldname.Type.STRING.getName(),Fieldname.Type.LOCATION.getName(), Fieldname.Type.ANALYZED.getName());
private static final String INTERNAL_CONTEXT_PREFIX = "(%s_)?";
public static Map getChildCounts(SolrResponse response) {
//check if there are subdocs
if (Objects.nonNull(response.getResponse())) {
final Object subDocumentFacetResult = response.getResponse().get("facets");
if (Objects.nonNull(subDocumentFacetResult)) {
Map childCounts = new HashMap<>();
log.debug("Parsing subdocument facet result from JSON ");
final Object count = ((SimpleOrderedMap) subDocumentFacetResult).get("count");
final Number facetCount = Objects.nonNull(count)? NumberUtils.toLong(count.toString(), 0L) : new Integer(0);
if (Objects.nonNull(((SimpleOrderedMap) subDocumentFacetResult).get("parent_facet")) && facetCount.longValue() > 0) {
final List parentDocs = (ArrayList) ((SimpleOrderedMap) ((SimpleOrderedMap) subDocumentFacetResult).get("parent_facet")).get("buckets");
childCounts = parentDocs.stream()
.collect(Collectors.toMap(
p -> (String) p.get("val"),
p -> {
final Object childrenCount = ((SimpleOrderedMap) p.get("children_facet")).get("count");
return Objects.nonNull(childrenCount)? NumberUtils.toInt(childrenCount.toString(), 0) : new Integer(0);
})
);
}
return childCounts;
}
}
return null;
}
public static Map getSubdocumentCounts(SolrResponse response) {
//check if there are subdocs
if (Objects.nonNull(response.getResponse())) {
final Object subDocumentFacetResult = response.getResponse().get("facets");
if (Objects.nonNull(subDocumentFacetResult)) {
Map childCounts = new HashMap<>();
log.debug("Parsing subdocument facet result from JSON ");
final int facetCount = NumberUtils.toInt(((SimpleOrderedMap) subDocumentFacetResult).get("count").toString(),0) ;
if (facetCount > 0 && Objects.nonNull(((SimpleOrderedMap) subDocumentFacetResult).get("childrenCount"))) {
final SimpleOrderedMap parentDocs = ((SimpleOrderedMap) ((SimpleOrderedMap) subDocumentFacetResult).get("childrenCount"));
final Integer childCount = NumberUtils.toInt(parentDocs.get("count").toString(),0);
final Integer parentCount;
if(childCount > 0) {
final Object objectCount= ((SimpleOrderedMap) ((List) ((SimpleOrderedMap) parentDocs.get("parentFilteredCount")).get("buckets")).get(0)).get("count");
parentCount =NumberUtils.toInt(objectCount.toString(),0);
} else {
parentCount = 0;
}
childCounts.put(parentCount, childCount);
}
return childCounts;
}
}
return null;
}
public static final class Query {
public static String serializeFacetFilter(Filter filter, DocumentFactory factory, String searchContext, boolean strict) {
final SolrFilterSerializer serializer = new SolrFilterSerializer(factory, strict);
final String serializedFilters = serializer.serialize(filter, searchContext);
final String typeFilterString = Fieldname.TYPE + ":" + factory.getType();
return serializedFilters.equals("")?
typeFilterString :
"(" + String.join(" AND ", typeFilterString,"("+serializedFilters+")") + ")";
}
public static String buildFilterString(Filter filter, DocumentFactory factory,String searchContext, boolean strict) {
return buildFilterString(filter, factory, (DocumentFactory)null, searchContext, strict);
}
public static String buildFilterString(Filter filter, DocumentFactory factory,DocumentFactory childFactory,String searchContext, boolean strict) {
final String serializedFilters = new ChildrenFilterSerializer(factory,childFactory,searchContext, strict, false).serialize(filter);
final String typeFilterString = "+_type_:" + factory.getType();
if(StringUtils.isNotBlank(serializedFilters)) {
return String.join(" +", typeFilterString, serializedFilters);
} else {
return typeFilterString;
}
}
public static void buildFilterString(Filter filter, DocumentFactory factory,SolrQuery query,String searchContext, boolean strict) {
buildFilterString(filter, factory, null, query, searchContext, strict);
}
public static void buildFilterString(Filter filter, DocumentFactory factory,DocumentFactory childFactory,SolrQuery query,String searchContext, boolean strict) {
// query.add(CommonParams.FQ,"_type_:"+factory.getType());
final String serialize = new ChildrenFilterSerializer(factory,childFactory,searchContext, strict, false).serialize(filter);
if(StringUtils.isNotBlank(serialize)) {
query.add(CommonParams.FQ, serialize);
}
}
public static String buildSortString(FulltextSearch search, List sortList, DocumentFactory factory) {
return sortList.stream().map(sort -> {
if (sort instanceof Sort.SimpleSort) {
Sort.SimpleSort ssort = (Sort.SimpleSort) sort;
FieldDescriptor descriptor = factory.getField(ssort.getField());
if (descriptor != null) {
if (!descriptor.isSort()) {
log.error("Cannot sort on field '{}'. The field is not defined as sortable.", ssort.getField());
throw new RuntimeException("Cannot sort on field " + ssort.getField());
}
return Fieldname.getFieldname(descriptor, UseCase.Sort, search.getSearchContext()) + " " + ssort.getDirection();
} else {
return ssort.getField() + " " + ssort.getDirection();
}
} else if (sort instanceof Sort.SpecialSort.ScoredDate) {
Sort.SpecialSort.ScoredDate ssort = (Sort.SpecialSort.ScoredDate) sort;
return "score " + ssort.getDirection();//TODO this is wrong isn't it?
} else if (sort instanceof Sort.SpecialSort.DistanceSort) {
Sort.SpecialSort.DistanceSort ssort = (Sort.SpecialSort.DistanceSort) sort;
if (search.getGeoDistance() == null) {
throw new RuntimeException("Sorting by distance requires a geodistance set");
}
return "geodist() " + ssort.getDirection();
} else if (sort instanceof Sort.SpecialSort.Score) {
Sort.SpecialSort.Score scoreSort = (Sort.SpecialSort.Score) sort;
return "score " + scoreSort.getDirection();
} else {
final Sort.DescriptorSort s = (Sort.DescriptorSort) sort;
final String fieldname = Fieldname.getFieldname(s.getDescriptor(), UseCase.Sort, search.getSearchContext());
if (fieldname == null) {
throw new RuntimeException("The field '"+ s.getDescriptor().getName()+"' is not set as sortable");
}
return fieldname + " " + s.getDirection();
}
}).collect(Collectors.joining(", "));
}
//TODO sorting stuff is a mess
public static String buildBoostFunction(List sortList, String searchContext) {
//String bf =
return sortList.stream().map(sort -> {
if (sort instanceof Sort.SpecialSort.ScoredDate) {
Sort.SpecialSort.ScoredDate ssort = (Sort.SpecialSort.ScoredDate) sort;
return String.format("recip(abs(ms(NOW/HOUR,%s)),3.16e-11,1,.1)", Fieldname.getFieldname(ssort.getDescriptor(), UseCase.Stored, searchContext));
} else return null;
}).filter(Objects::nonNull).collect(Collectors.joining(" "));
}
public static String buildQueryFieldString(Collection> fulltext, String searchContext) {
return fulltext.stream()
.map(descriptor ->
SolrUtils.Fieldname.getFieldname(descriptor, UseCase.Fulltext, searchContext) +
"^" +
descriptor.getBoost()
)
.collect(Collectors.joining(" "));
}
public static String[] buildFacetFieldList(Map facets, DocumentFactory factory, DocumentFactory childFactory, String searchContext) {
final List termFacetQuery = facets.values().stream()
.filter(facet -> facet instanceof TermFacet)
.map(facet -> (TermFacet) facet)
.map(facet -> {
if(Objects.nonNull(facet.getFieldDescriptor())) {
return facet;
} else {
FieldDescriptor> field = factory.getField(facet.getFieldName());
if(Objects.isNull(field) && Objects.nonNull(childFactory)) {
field = childFactory.getField(facet.getFieldName());
}
return new TermFacet(field);
}
})
.map(facet -> Fieldname.getFieldname(facet.getFieldDescriptor(), UseCase.valueOf(facet.getScope().name()), searchContext))
.filter(Objects::nonNull)
.collect(Collectors.toList());
final List typeFacet = facets.values().stream()
.filter(facet -> facet instanceof TypeFacet)
.map(facet -> Fieldname.TYPE)
.filter(Objects::nonNull)
.collect(Collectors.toList());
termFacetQuery.addAll(typeFacet);
return termFacetQuery.stream().toArray(String[]::new);
}
public static ObjectNode buildJsonTermFacet(Map facets, int facetLimit, DocumentFactory factory, DocumentFactory childFactory, String searchContext) {
final ObjectNode jsonFacets = JsonNodeFactory.instance.objectNode();
final List termFacetQuery = facets.entrySet().stream()
.filter(facet -> facet.getValue() instanceof TermFacet)
//.map(facet -> facet.setValue((Facet.TermFacet) facet.getValue()))
.map(facet -> {
final ObjectNode termFacet = JsonNodeFactory.instance.objectNode();
termFacet.put("type","terms");
final TermFacet value = (TermFacet) facet.getValue();
FieldDescriptor> field = factory.getField(value.getFieldName());
if(Objects.isNull(field) && Objects.nonNull(childFactory)) {
field = childFactory.getField(value.getFieldName());
final ObjectNode domainObject = JsonNodeFactory.instance.objectNode();
domainObject.set("blockChildren",
JsonNodeFactory.instance.objectNode().put(Fieldname.TYPE,factory.getType()));
termFacet.set("domain", domainObject);
}
final UseCase useCase = UseCase.valueOf(facet.getValue().getScope().name());
final String fieldName = Fieldname.getFieldname(field, useCase, searchContext);
if(StringUtils.isEmpty(fieldName)) {
log.warn("Field {} is not set for faceting", fieldName);
return null;
}
termFacet.put("field", fieldName);
termFacet.put("limit", facetLimit);
if (Objects.nonNull(value.getOption())) {
final TermFacetOption option = value.getOption();
if(Objects.nonNull(option.getPrefix())) {
termFacet.put("prefix", option.getPrefix());
}
if(Objects.nonNull(option.getLimit())) {
termFacet.put("limit", option.getLimit());
}
if(Objects.nonNull(option.getMethod())) {
termFacet.put("method", String.valueOf(option.getMethod()).toLowerCase());
}
if(Objects.nonNull(option.getMincount())) {
termFacet.put("mincount", option.getMincount());
}
if(Objects.nonNull(option.getOffset())) {
termFacet.put("offset", option.getOffset());
}
if(Objects.nonNull(option.getOverrefine())) {
termFacet.put("overrefine", option.getOverrefine());
}
if(Objects.nonNull(option.getOverrequest())) {
termFacet.put("overrequest", option.getOverrequest());
}
if(Objects.nonNull(option.getSort())) {
termFacet.put("sort", option.getSort());
}
if(Objects.nonNull(option.isAllBuckets())) {
termFacet.put("allBuckets", option.isAllBuckets());
}
if(Objects.nonNull(option.isMissing())) {
termFacet.put("missing", option.isMissing());
}
if(Objects.nonNull(option.isNumBuckets())) {
termFacet.put("numBuckets", option.isNumBuckets());
}
if(Objects.nonNull(option.isRefine())) {
termFacet.put("refine", option.isRefine());
}
}
return termFacet;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
final List typeFacet = facets.values().stream()
.filter(facet -> facet instanceof TypeFacet)
.map(facet ->{
final ObjectNode termFacet = JsonNodeFactory.instance.objectNode();
termFacet.put("type","terms");
termFacet.put("field", Fieldname.TYPE);
termFacet.put("limit", facetLimit);
return termFacet;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
termFacetQuery.addAll(typeFacet);
final ObjectNode jsonFieldFacet = JsonNodeFactory.instance.objectNode();
termFacetQuery.stream().forEach( facet -> jsonFieldFacet.set(facet.get("field").asText(),facet));
return jsonFieldFacet;
}
public static String buildSolrQueryValue(Object o){
if(o != null){
if(ZonedDateTime.class.isAssignableFrom(o.getClass())) {
return ((ZonedDateTime)o).format(DateTimeFormatter.ISO_INSTANT);
}
if(Date.class.isAssignableFrom(o.getClass())) {
return DateTimeFormatter.ISO_INSTANT.format(((Date)o).toInstant());
}
if(DateMathExpression.class.isAssignableFrom(o.getClass())) {
//TODO: Do not delegate on the toString DateMath, a solr specific parse would be better
DateMathExpression dateMath = (DateMathExpression) o;
return dateMath.toString();
}
if(ByteBuffer.class.isAssignableFrom(o.getClass())) {
return new String (((ByteBuffer) o).array());
}
return o.toString(); //TODO check if this is this correct
}
return "";
}
public static String buildSolrTimeGap(Long duration){
String solrGap = "+"+String.valueOf(duration)+ "MILLISECOND";
return solrGap;
}
public static String buildSolrFieldAlias(String field, String alias){
return String.join(":", alias, field);
}
public static String buildSolrFacetTags(String ... keys){
return StringUtils.join("tag='",StringUtils.join(keys,','),"'");
}
public static String buildSolrFacetKey(String s){
if(s == null || s.contains(" ")) throw new RuntimeException("key string may not be empty or contain blanks");
return String.format("{!key=%s}",s);
}
public static String buildSolrFacetCustomName(String field, Facet facet){
return StringUtils.join("{!",buildSolrFacetTags(facet.getTagedPivots())," ex=dt key='", facet.getFacetName(), "'}", field);
}
public static String buildSolarPivotCustomName(String name,String... fields){
return StringUtils.join("{!ex=dt key=", name, "}", StringUtils.join(fields,','));
}
public static String buildSolrPivotSubFacetName(String name, String... fields){
return StringUtils.join("{!query='", name,"' stats='", name,"' range='", name,"' ","ex=dt key='", name, "'}", StringUtils.join(fields,','));
}
public static String buildSolrTermsQuery(List values, FieldDescriptor field, Scope scope, String context) {
final String prefixQuery =
"{!terms f=" + Fieldname.getFieldname(field,UseCase.valueOf(scope.name()), context) + "}";
final String query = values.stream()
.map( v -> FieldValue.getStringFieldValue(v, field))
.collect(Collectors.joining(","));
return prefixQuery + query;
}
public static String buildSolrStatsQuery(String solrfieldName, StatsFacet stats){
String query = buildSolrFacetCustomName(solrfieldName,stats);
String statsQuery = "{!";
if(stats.getMin()) {
statsQuery += "min=true ";
}
if(stats.getMax()) {
statsQuery += "max=true ";
}
if(stats.getSum()) {
statsQuery += "sum=true ";
}
if(stats.getCount()) {
statsQuery += "count=true ";
}
if(stats.getMissing()) {
statsQuery += "missing=true ";
}
if(stats.getSumOfSquares()) {
statsQuery += "sumOfSquares=true ";
}
if(stats.getMean()) {
statsQuery += "mean=true ";
}
if(stats.getStddev()) {
statsQuery += "stddev=true ";
}
if(stats.getPercentiles().length > 0) {
statsQuery += "percentiles='"+ StringUtils.join(stats.getPercentiles(),',')+"' ";
}
if(stats.getDistinctValues()) {
statsQuery += "distinctValues=true ";
}
if(stats.getCountDistinct()) {
statsQuery += "countDistinct=true ";
}
if(stats.getCardinality()) {
statsQuery += "cardinality=true ";
}
query = query.replace("{!", statsQuery);
return query;
}
public static Object buildUpdateQuery(FieldDescriptor field, Object value){
return SolrUtils.Result.castForDescriptor(value, field);
}
public static String buildSubdocumentFacet(FulltextSearch search, DocumentFactory factory,String searchContext) {
final Optional facetOptional = search.getFacets().values().stream()
.filter(facet -> SubdocumentFacet.class.isAssignableFrom(facet.getClass()))
.map(genericFacet -> (SubdocumentFacet) genericFacet)
.map(facet -> {
final String type = facet.getFacetName();
String filter;
//final String childrenFilterSerialized;
filter = search.getChildrenSearches().stream()
.filter(FulltextSearch::hasFilter)
.map( childrenSearch -> {
final String childrenFilterSerialized = serializeFacetFilter(childrenSearch.getFilter(), search.getChildrenFactory(), searchContext, search.getStrict());
return "(" +childrenFilterSerialized + " AND " + search.getSearchString() +")";
})
.collect(Collectors.joining(" OR "));
if(StringUtils.isBlank(filter)) {
filter = search.getSearchString();
}
filter = "{!edismax}" + filter;
final String domainQuery= Fieldname.TYPE + ":" + type;
//Parent Facet
final ObjectNode childrenFacet = JsonNodeFactory.instance.objectNode()
.put("type", "query")
.put("q", filter);
childrenFacet.set("domain", JsonNodeFactory.instance.objectNode()
.put("blockChildren", domainQuery));
final ObjectNode parentFacet = JsonNodeFactory.instance.objectNode()
.put("type","terms")
.put("field", Fieldname.ID)
.put("limit",999999999)
.put("mincount",1);
parentFacet.set("sort", JsonNodeFactory.instance.objectNode().put("index","asc"));
parentFacet.set("domain", JsonNodeFactory.instance.objectNode().put("blockParent", domainQuery));
parentFacet.set("facet", JsonNodeFactory.instance.objectNode().set("children_facet", childrenFacet));
final ObjectNode parentFilteredObject = JsonNodeFactory.instance.objectNode();
parentFilteredObject.set("parentFilteredCount", JsonNodeFactory.instance.objectNode()
.put("type", "terms")
.put("field", Fieldname.TYPE)
.set("domain", JsonNodeFactory.instance.objectNode().put("blockParent", domainQuery)));
final ObjectNode childrenCount = JsonNodeFactory.instance.objectNode()
.put("type","query")
.put("mincount",1)
.put("q", filter);
childrenCount.set("domain", JsonNodeFactory.instance.objectNode()
.put("blockChildren", domainQuery));
childrenCount.set("facet", parentFilteredObject);
//Subdocument Count facet
final ObjectNode subDocumentFacet = JsonNodeFactory.instance.objectNode();
subDocumentFacet.set("parent_facet", parentFacet);
subDocumentFacet.set("childrenCount", childrenCount);
return subDocumentFacet.toString();
})
.findAny();
return facetOptional.orElse(null);
}
}
public static final class FieldValue {
public static Object getFieldCaseValue(Object value, FieldDescriptor descriptor, UseCase useCase) {
if (ComplexFieldDescriptor.class.isAssignableFrom(descriptor.getClass())) {
ComplexFieldDescriptor complexDescriptor = (ComplexFieldDescriptor) descriptor;
if(value!=null) {
if(Object[].class.isAssignableFrom(value.getClass())){
return getFieldCaseValue(Arrays.asList((Object[]) value), descriptor, useCase);
}
if(Collection.class.isAssignableFrom(value.getClass()) && !useCase.equals(UseCase.Sort)){
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy