All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.molgenis.data.elasticsearch.index.IndexRequestGenerator Maven / Gradle / Ivy

The newest version!
package org.molgenis.data.elasticsearch.index;

import static java.util.Objects.requireNonNull;
import static org.molgenis.MolgenisFieldTypes.FieldTypeEnum.CATEGORICAL;
import static org.molgenis.MolgenisFieldTypes.FieldTypeEnum.CATEGORICAL_MREF;
import static org.molgenis.MolgenisFieldTypes.FieldTypeEnum.MREF;
import static org.molgenis.MolgenisFieldTypes.FieldTypeEnum.XREF;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.molgenis.MolgenisFieldTypes.FieldTypeEnum;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.Repository;
import org.molgenis.data.elasticsearch.ElasticsearchEntityFactory;
import org.molgenis.data.elasticsearch.util.MapperTypeSanitizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Creates an IndexRequest for indexing entities with ElasticSearch
 * 
 * @author erwin
 * 
 */
public class IndexRequestGenerator
{
	private static final Logger LOG = LoggerFactory.getLogger(IndexRequestGenerator.class);

	private final Client client;
	private final String indexName;
	private final ElasticsearchEntityFactory elasticsearchEntityFactory;

	public IndexRequestGenerator(Client client, String indexName, ElasticsearchEntityFactory elasticsearchEntityFactory)
	{
		this.client = requireNonNull(client);
		this.indexName = requireNonNull(indexName);
		this.elasticsearchEntityFactory = requireNonNull(elasticsearchEntityFactory);
	}

	public Iterable buildIndexRequest(final Repository repository)
	{
		return new Iterable()
		{
			@Override
			public Iterator iterator()
			{
				return indexRequestIterator(repository);
			}
		};
	}

	private Iterator indexRequestIterator(final Repository repository)
	{
		final Set xrefAndMrefColumns = new HashSet();
		for (AttributeMetaData attr : repository.getEntityMetaData().getAtomicAttributes())
		{
			FieldTypeEnum fieldType = attr.getDataType().getEnumType();
			boolean isXrefOrMref = fieldType == XREF || fieldType == MREF || fieldType == CATEGORICAL
					|| fieldType == CATEGORICAL_MREF;
			if (isXrefOrMref) xrefAndMrefColumns.add(attr.getName());
		}

		return new Iterator()
		{
			private final long rows = repository.count();
			private static final int docsPerBulk = 1000;
			private final Iterator it = repository.iterator();
			private final EntityMetaData entityMetaData = repository.getEntityMetaData();
			private int row = 0;

			@Override
			public boolean hasNext()
			{
				return it.hasNext();
			}

			@Override
			public BulkRequestBuilder next()
			{
				BulkRequestBuilder bulkRequest = client.prepareBulk();

				final long maxRow = Math.min(row + docsPerBulk, rows);

				for (; row < maxRow; ++row)
				{
					Entity entity = it.next();
					Map doc = elasticsearchEntityFactory.create(entityMetaData, entity);
					IndexRequestBuilder request = client.prepareIndex(indexName,
							MapperTypeSanitizer.sanitizeMapperType(repository.getName()));

					request.setSource(doc);
					bulkRequest.add(request);
					if ((row + 1) % 100 == 0) LOG.info("Added [" + (row + 1) + "] documents");
				}
				LOG.info("Added [" + row + "] documents");

				return bulkRequest;
			}

			@Override
			public void remove()
			{
				throw new UnsupportedOperationException();
			}
		};
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy