All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.molgenis.data.omx2emx.Omx2EmxConverter Maven / Gradle / Ivy

The newest version!
package org.molgenis.data.omx2emx;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.apache.commons.io.IOUtils;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.Entity;
import org.molgenis.data.MolgenisDataException;
import org.molgenis.data.Repository;
import org.molgenis.data.RepositoryCollection;
import org.molgenis.data.Writable;
import org.molgenis.data.WritableFactory;
import org.molgenis.data.support.MapEntity;
import org.molgenis.util.EntityUtils;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

public class Omx2EmxConverter
{
	private static enum OMX_TABS
	{
		OBSERVABLEFEATURE, PROTOCOL, DATASET, CATEGORY, ONTOLOGY, ONTOLOGYTERM, ACCESSION, OBSERVATIONTARGET, INDIVIDUAL, PANEL, SPECIES
	}

	private static enum DATASET_COLUMNS
	{
		IDENTIFIER, NAME, PROTOCOLUSED_IDENTIFIER, DESCRIPTION
	}

	public static enum OBSERVABLE_FEATURE_COLUMNS
	{
		IDENTIFIER, NAME, DESCRIPTION, DATATYPE, UNIT_IDENTIFIER, DEFINITIONS_IDENTIFIER
	}

	public static enum PROTOCOL_COLUMNS
	{
		IDENTIFIER, NAME, DESCRIPTION, FEATURES_IDENTIFIER, SUBPROTOCOLS_IDENTIFIER, ROOT, ACTIVE
	}

	public static enum CATEGORY_COLUMNS
	{
		IDENTIFIER, NAME, VALUECODE, OBSERVABLEFEATURE_IDENTIFIER
	}

	private final RepositoryCollection omxRepositoryCollection;
	private final String namespace;
	private Map protocols;

	public Omx2EmxConverter(RepositoryCollection omxRepositoryCollection, String namespace)
	{
		this.omxRepositoryCollection = omxRepositoryCollection;
		this.namespace = namespace;
	}

	public void convert(WritableFactory writableFactory)
	{
		System.out.println("Write entities...");
		writeEntities(writableFactory);

		System.out.println("Write attributes...");
		writeAttributes(writableFactory);

		System.out.println("Write datasets...");
		writeDatasets(writableFactory);

		System.out.println("Conversion done");
	}

	private void writeDatasets(WritableFactory writableFactory)
	{
		Map> categories = Maps.newHashMap();

		// Categories
		if (omxContainsEntity(OMX_TABS.CATEGORY.toString()))
		{
			Set observableFeatureIdentifiers = Sets.newHashSet();
			Repository categoryRepo = omxRepositoryCollection.getRepository(OMX_TABS.CATEGORY.toString());
			for (Entity category : categoryRepo)
			{
				String observableFeatureIdentifier = category
						.getString(CATEGORY_COLUMNS.OBSERVABLEFEATURE_IDENTIFIER.toString());
				if (!observableFeatureIdentifiers.contains(observableFeatureIdentifier))
				{
					observableFeatureIdentifiers.add(observableFeatureIdentifier);
					Writable lut = writableFactory.createWritable(getFullEntityName(observableFeatureIdentifier),
							Arrays.asList("Name"));

					for (Entity cat : categoryRepo)
					{
						String catObservableFeatureIdentifier = cat
								.getString(CATEGORY_COLUMNS.OBSERVABLEFEATURE_IDENTIFIER.toString());

						if (catObservableFeatureIdentifier.equalsIgnoreCase(observableFeatureIdentifier))
						{
							String valueCode = cat.getString(CATEGORY_COLUMNS.VALUECODE.toString());
							String name = cat.getString(CATEGORY_COLUMNS.NAME.toString());

							Entity entity = new MapEntity();
							entity.set("Name", name);
							lut.add(entity);

							Map categoryMap = categories.get(catObservableFeatureIdentifier);
							if (categoryMap == null)
							{
								categoryMap = Maps. newHashMap();
								categories.put(catObservableFeatureIdentifier, categoryMap);
							}
							categoryMap.put(valueCode, name);
						}
					}
				}
			}
		}

		// Individuals
		if (omxContainsEntity(OMX_TABS.INDIVIDUAL.toString()))
		{

			Writable writable = writableFactory.createWritable(getFullEntityName("Individual"),
					Arrays.asList("Identifier", "Name", "Description"));
			Repository individualRepo = omxRepositoryCollection.getRepository(OMX_TABS.INDIVIDUAL.toString());
			writable.add(individualRepo.stream());
		}

		// Panels
		if (omxContainsEntity(OMX_TABS.PANEL.toString()))
		{
			Writable writable = writableFactory.createWritable(getFullEntityName("Panel"),
					Arrays.asList("Identifier", "Name", "NumberOfIndividuals"));
			Repository panelRepo = omxRepositoryCollection.getRepository(OMX_TABS.PANEL.toString());
			writable.add(panelRepo.stream());
		}

		for (String entityName : omxRepositoryCollection.getEntityNames())
		{
			if (entityName.toLowerCase().startsWith("dataset_"))
			{
				Repository repo = omxRepositoryCollection.getRepository(entityName);
				List attributeNames = Lists.newArrayList("Identifier");
				for (AttributeMetaData attr : repo.getEntityMetaData().getAtomicAttributes())
				{
					attributeNames.add(attr.getName());
				}

				String dataset = entityName.substring("dataset_".length());
				Writable writable = writableFactory.createWritable(getFullEntityName(dataset), attributeNames);
				for (Entity excelEntity : repo)
				{
					Entity entity = new MapEntity();
					entity.set(excelEntity);
					entity.set("Identifier", UUID.randomUUID().toString());

					for (String attributeName : attributeNames)
					{
						if (categories.containsKey(attributeName))
						{
							String valueCode = entity.getString(attributeName);
							entity.set(attributeName, categories.get(attributeName).get(valueCode));
						}
					}

					writable.add(entity);
				}
			}
		}

	}

	private void writeAttributes(WritableFactory writableFactory)
	{
		Writable attributes = writableFactory.createWritable("attributes", Arrays.asList("name", "entity", "label",
				"dataType", "description", "refEntity", "nillable", "idAttribute", "visible"));
		try
		{
			// Rename Indivual and Panel to Individuals and Panels because it would collide with omx Panel and
			// Individual tables

			// Individual
			if (omxContainsEntity(OMX_TABS.INDIVIDUAL.toString()))
			{
				Entity identifier = new MapEntity();
				identifier.set("name", "Identifier");
				identifier.set("entity", getFullEntityName("Individual"));
				identifier.set("dataType", "string");
				identifier.set("nillable", false);
				identifier.set("idAttribute", true);
				attributes.add(identifier);

				Entity name = new MapEntity();
				name.set("name", "Name");
				name.set("entity", getFullEntityName("Individual"));
				name.set("dataType", "string");
				name.set("nillable", true);
				name.set("idAttribute", false);
				attributes.add(name);

				Entity description = new MapEntity();
				description.set("name", "Description");
				description.set("entity", getFullEntityName("Individual"));
				description.set("dataType", "string");
				description.set("nillable", false);
				description.set("idAttribute", false);
				attributes.add(description);
			}

			// Panel
			if (omxContainsEntity(OMX_TABS.PANEL.toString()))
			{
				Entity identifier = new MapEntity();
				identifier.set("name", "Identifier");
				identifier.set("entity", getFullEntityName("Panel"));
				identifier.set("dataType", "string");
				identifier.set("nillable", false);
				identifier.set("idAttribute", true);
				attributes.add(identifier);

				Entity name = new MapEntity();
				name.set("name", "Name");
				name.set("entity", getFullEntityName("Panel"));
				name.set("dataType", "string");
				name.set("nillable", true);
				name.set("idAttribute", false);
				attributes.add(name);

				Entity nrOfIndividuals = new MapEntity();
				nrOfIndividuals.set("name", "NumberOfIndividuals");
				nrOfIndividuals.set("entity", getFullEntityName("Panel"));
				nrOfIndividuals.set("dataType", "int");
				nrOfIndividuals.set("nillable", true);
				nrOfIndividuals.set("idAttribute", false);
				attributes.add(nrOfIndividuals);
			}

			// Categorical
			if (omxContainsEntity(OMX_TABS.CATEGORY.toString()))
			{
				Set observableFeatureIdentifiers = Sets.newHashSet();
				Repository categoryRepo = omxRepositoryCollection.getRepository(OMX_TABS.CATEGORY.toString());

				for (Entity category : categoryRepo)
				{
					observableFeatureIdentifiers
							.add(category.getString(CATEGORY_COLUMNS.OBSERVABLEFEATURE_IDENTIFIER.toString()));
				}

				for (String observableFeatureIdentifier : observableFeatureIdentifiers)
				{
					Entity categoryName = new MapEntity();
					categoryName.set("name", "Name");
					categoryName.set("entity", getFullEntityName(observableFeatureIdentifier));
					categoryName.set("dataType", "string");// ?
					categoryName.set("nillable", false);
					categoryName.set("idAttribute", true);
					attributes.add(categoryName);
				}
			}

			// DataSet identifiers
			for (String entity : omxRepositoryCollection.getEntityNames())
			{
				if (entity.toLowerCase().startsWith("dataset_"))
				{
					String dataset = entity.substring("dataset_".length());
					Entity idAttribute = new MapEntity();
					idAttribute.set("name", "Identifier");
					idAttribute.set("entity", getFullEntityName(dataset));
					idAttribute.set("label", "Identifier");
					idAttribute.set("dataType", "string");
					idAttribute.set("nillable", false);
					idAttribute.set("idAttribute", true);
					idAttribute.set("visible", false);
					attributes.add(idAttribute);
				}
			}

			for (Entity feature : getObservableFeatures())
			{
				String identifier = feature.getString(OBSERVABLE_FEATURE_COLUMNS.IDENTIFIER.toString());
				List protocols = getObservableFeatureProtocol(identifier);
				if (protocols.isEmpty())
				{
					System.out.println("WARN: dangling ObservableFeature with identifier [" + identifier + "]");
				}
				else
				{
					for (Entity protocol : protocols)
					{
						String dataType = feature.getString(OBSERVABLE_FEATURE_COLUMNS.DATATYPE.toString());
						String entity = protocol.getString(PROTOCOL_COLUMNS.IDENTIFIER.toString());

						Entity attribute = new MapEntity();
						attribute.set("name", identifier);
						attribute.set("entity", getFullEntityName(entity));
						attribute.set("label", feature.getString(OBSERVABLE_FEATURE_COLUMNS.NAME.toString()));
						attribute.set("dataType", dataType);
						attribute.set("description",
								feature.getString(OBSERVABLE_FEATURE_COLUMNS.DESCRIPTION.toString()));
						attribute.set("nillable", true);
						attribute.set("idAttribute", false);

						// Categorical
						if ((dataType != null) && dataType.equalsIgnoreCase("categorical"))
						{
							attribute.set("refEntity", getFullEntityName(identifier));
						}
						// xref/mref
						else if ((dataType != null)
								&& (dataType.equalsIgnoreCase("xref") || dataType.equalsIgnoreCase("mref")))
						{
							// We assume that all ObservedValues of an ObservableFeature in an xref column point to the
							// same
							// entity, find it

							for (String entityName : omxRepositoryCollection.getEntityNames())
							{
								// See where the first not null row points to
								Repository repo = omxRepositoryCollection.getRepository(entityName);
								if (entityName.toLowerCase().startsWith("dataset_"))

								{
									List refs = null;
									Iterator it = repo.iterator();
									while ((refs == null) && it.hasNext())
									{
										Entity row = it.next();
										refs = row.getList(identifier);
									}

									if (refs != null)
									{
										for (String ref : refs)
										{
											String refEntity = getRefEntity(ref);
											if (refEntity != null)
											{
												attribute.set("refEntity", refEntity);
												break;
											}
										}
									}
								}

							}
						}

						attributes.add(attribute);
					}
				}
			}

			// Compound attributes
			for (Entity protocol : getProtocols().values())
			{
				List subprotocolIdentifiers = protocol
						.getList(PROTOCOL_COLUMNS.SUBPROTOCOLS_IDENTIFIER.toString());
				if (subprotocolIdentifiers != null)
				{
					for (String subprotocolIdentifier : subprotocolIdentifiers)
					{
						String entity = protocol.getString(PROTOCOL_COLUMNS.IDENTIFIER.toString());
						String refEntity = subprotocolIdentifier;

						Entity subprotocol = getProtocol(subprotocolIdentifier);

						Entity attribute = new MapEntity();
						attribute.set("name", subprotocolIdentifier);
						attribute.set("entity", getFullEntityName(entity));
						attribute.set("dataType", "compound");
						attribute.set("label", subprotocol.get(PROTOCOL_COLUMNS.NAME.toString()));
						attribute.set("refEntity", getFullEntityName(refEntity));
						attributes.add(attribute);
					}
				}
			}

		}
		finally
		{
			IOUtils.closeQuietly(attributes);
		}
	}

	private String getRefEntity(String identifier)
	{
		if (containsIdentifier(OMX_TABS.INDIVIDUAL.toString(), identifier))
		{
			return getFullEntityName("Individual");
		}

		if (containsIdentifier(OMX_TABS.PANEL.toString(), identifier))
		{
			return getFullEntityName("Panel");
		}

		return null;
	}

	private List getObservableFeatureProtocol(String observableFeatureIdentifier)
	{
		List protocols = new ArrayList();
		for (Entity protocol : getProtocols().values())
		{
			List protocolFeatures = protocol.getList(PROTOCOL_COLUMNS.FEATURES_IDENTIFIER.toString());
			if (protocolFeatures != null)
			{
				for (String protocolFeature : protocolFeatures)
				{
					if (observableFeatureIdentifier.equalsIgnoreCase(protocolFeature))
					{
						protocols.add(protocol);
					}
				}
			}
		}

		return protocols;
	}

	private String getFullEntityName(String name)
	{
		name = name.replace('-', '_'); // sanitize name
		return namespace == null ? name : namespace + "_" + name;
	}

	private void writeEntities(WritableFactory writableFactory)
	{
		Writable entities = writableFactory.createWritable("entities",
				Arrays.asList("name", "description", "abstract", "label", "extends"));
		try
		{
			// Protocols
			for (Entity protocol : getProtocols().values())
			{
				Entity protocolMeta = new MapEntity();
				protocolMeta.set("name", getFullEntityName(protocol.getString(PROTOCOL_COLUMNS.IDENTIFIER.toString())));
				protocolMeta.set("label", protocol.getString(PROTOCOL_COLUMNS.NAME.toString()));
				protocolMeta.set("description", protocol.getString(PROTOCOL_COLUMNS.DESCRIPTION.toString()));
				protocolMeta.set("abstract", true);
				entities.add(protocolMeta);
			}

			// Datasets
			for (Entity dataset : getDatasets())
			{
				String protocolUsedIdentifier = dataset.getString(DATASET_COLUMNS.PROTOCOLUSED_IDENTIFIER.toString());
				if (protocolUsedIdentifier == null)
				{
					throw new MolgenisDataException("Missing protocolUsed");
				}

				Entity protocolUsed = getProtocol(protocolUsedIdentifier);
				if (protocolUsed == null)
				{
					throw new MolgenisDataException("Missing protocol [" + protocolUsedIdentifier + "]");
				}

				Entity datasetMeta = new MapEntity();
				datasetMeta.set("name", getFullEntityName(dataset.getString(DATASET_COLUMNS.IDENTIFIER.toString())));
				datasetMeta.set("label", dataset.getString(DATASET_COLUMNS.NAME.toString()));
				datasetMeta.set("description", dataset.getString(DATASET_COLUMNS.DESCRIPTION.toString()));
				datasetMeta.set("extends",
						getFullEntityName(protocolUsed.getString(PROTOCOL_COLUMNS.IDENTIFIER.toString())));
				entities.add(datasetMeta);
			}

			// Categories
			if (omxContainsEntity(OMX_TABS.CATEGORY.toString()))
			{
				Set observableFeatureIdentifiers = Sets.newHashSet();
				Repository categoryRepo = omxRepositoryCollection.getRepository(OMX_TABS.CATEGORY.toString());

				for (Entity category : categoryRepo)
				{
					observableFeatureIdentifiers
							.add(category.getString(CATEGORY_COLUMNS.OBSERVABLEFEATURE_IDENTIFIER.toString()));
				}

				for (String observableFeatureIdentifier : observableFeatureIdentifiers)
				{
					Entity catMeta = new MapEntity();
					catMeta.set("name", getFullEntityName(observableFeatureIdentifier));
					catMeta.set("label", observableFeatureIdentifier);
					entities.add(catMeta);
				}
			}

			// Rename Indivual and Panel to Individuals and Panels because it would collide with omx Panel and
			// Individual
			// tables

			// Individuals
			if (omxContainsEntity(OMX_TABS.INDIVIDUAL.toString()))
			{
				Entity individualMeta = new MapEntity();
				individualMeta.set("name", getFullEntityName("Individual"));
				individualMeta.set("label", "Individual");
				entities.add(individualMeta);
			}

			// Panels
			if (omxContainsEntity(OMX_TABS.PANEL.toString()))
			{
				Entity panelMeta = new MapEntity();
				panelMeta.set("name", getFullEntityName("Panel"));
				panelMeta.set("label", "Panel");
				entities.add(panelMeta);
			}

		}
		finally
		{
			IOUtils.closeQuietly(entities);
		}
	}

	private Iterable getDatasets()
	{
		return omxRepositoryCollection.getRepository(OMX_TABS.DATASET.toString());
	}

	private Iterable getObservableFeatures()
	{
		return omxRepositoryCollection.getRepository(OMX_TABS.OBSERVABLEFEATURE.toString());
	}

	private Map getProtocols()
	{
		if (protocols == null)
		{
			protocols = Maps.newLinkedHashMap();
			for (Entity protocol : omxRepositoryCollection.getRepository(OMX_TABS.PROTOCOL.toString()))
			{
				if (!EntityUtils.isEmpty(protocol))
				{
					protocols.put(protocol.getString(PROTOCOL_COLUMNS.IDENTIFIER.toString()), protocol);
				}
			}
		}

		return protocols;
	}

	private Entity getProtocol(String identifier)
	{
		Entity protocol = getProtocols().get(identifier);
		if (protocol == null)
		{
			throw new IllegalArgumentException("Unknown protocol [" + identifier + "]");
		}

		return protocol;
	}

	private boolean omxContainsEntity(String name)
	{
		for (String entity : omxRepositoryCollection.getEntityNames())
		{
			if (entity.equalsIgnoreCase(name))
			{
				return true;
			}
		}

		return false;
	}

	private boolean containsIdentifier(String entityName, String identifier)
	{
		if (!omxContainsEntity(entityName))
		{
			return false;
		}

		for (Entity entity : omxRepositoryCollection.getRepository(entityName))
		{
			String entityIdentifier = entity.getString("identifier");
			if ((entityIdentifier != null) && entityIdentifier.equalsIgnoreCase(identifier))
			{
				return true;
			}
		}

		return false;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy