
org.molgenis.elasticsearch.index.IndexRequestGenerator Maven / Gradle / Ivy
package org.molgenis.elasticsearch.index;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.base.Joiner;
import org.molgenis.MolgenisFieldTypes.FieldTypeEnum;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.Repository;
import org.molgenis.elasticsearch.util.MapperTypeSanitizer;
import org.molgenis.util.Cell;
import org.molgenis.util.RepositoryUtils;
import com.google.common.collect.Lists;
/**
* Creates an IndexRequest for indexing entities with ElasticSearch
*
* @author erwin
*
*/
public class IndexRequestGenerator
{
private static final Logger LOG = Logger.getLogger(IndexRequestGenerator.class);
private final Client client;
private final String indexName;
public IndexRequestGenerator(Client client, String indexName)
{
if (client == null)
{
throw new IllegalArgumentException("Client is null");
}
if (indexName == null)
{
throw new IllegalArgumentException("IndexName is null");
}
this.client = client;
this.indexName = indexName;
}
public Iterable buildIndexRequest(final Repository repository)
{
return new Iterable()
{
@Override
public Iterator iterator()
{
return indexRequestIterator(repository);
}
};
}
private Iterator indexRequestIterator(final Repository repository)
{
final Set xrefAndMrefColumns = new HashSet();
for (AttributeMetaData attr : repository.getEntityMetaData().getAtomicAttributes())
{
FieldTypeEnum fieldType = attr.getDataType().getEnumType();
boolean isXrefOrMref = fieldType.equals(FieldTypeEnum.XREF) || fieldType.equals(FieldTypeEnum.MREF);
if (isXrefOrMref) xrefAndMrefColumns.add(attr.getName());
}
return new Iterator()
{
private final long rows = RepositoryUtils.count(repository);
private static final int docsPerBulk = 1000;
private final Iterator extends Entity> it = repository.iterator();
private int row = 0;
@Override
public boolean hasNext()
{
return it.hasNext();
}
@SuppressWarnings("unchecked")
@Override
public BulkRequestBuilder next()
{
BulkRequestBuilder bulkRequest = client.prepareBulk();
final long maxRow = Math.min(row + docsPerBulk, rows);
for (; row < maxRow; ++row)
{
Entity entity = it.next();
Map doc = new HashMap();
for (String attrName : entity.getAttributeNames())
{
// Serialize collections to be able to sort on them, elasticsearch does not support sorting on
// list fields
Object id = null;
Object key = null;
Object value = entity.get(attrName);
if (value instanceof Entity)
{
Entity refEntity = (Entity) value;
EntityMetaData refEntityMetaData = refEntity.getEntityMetaData();
key = refEntity.get(refEntityMetaData.getIdAttribute().getName());
value = refEntity.get(refEntityMetaData.getLabelAttribute().getName());
}
if (value instanceof Cell)
{
Cell> cell = (Cell>) value;
id = cell.getId();
key = cell.getKey();
value = cell.getValue();
}
if (value instanceof Collection)
{
Collection> values = (Collection>) value;
if (!values.isEmpty())
{
Object exampleValue = values.iterator().next();
if (exampleValue instanceof Cell)
{
List mrefIds = null;
List mrefKeys = null;
for (Iterator> it = ((Collection>) values).iterator(); it.hasNext();)
{
Cell> cell = it.next();
Integer cellId = cell.getId();
if (cellId != null)
{
if (mrefIds == null) mrefIds = new ArrayList();
mrefIds.add(cellId);
}
String cellKey = cell.getKey();
if (cellKey != null)
{
if (mrefKeys == null) mrefKeys = new ArrayList();
mrefKeys.add(cellKey);
}
}
if (mrefIds != null) id = mrefIds;
if (mrefKeys != null) key = mrefKeys;
}
else if (exampleValue instanceof Entity)
{
List | |
© 2015 - 2025 Weber Informatics LLC | Privacy Policy