All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.bancaditalia.oss.vtl.impl.environment.SDMXEnvironment Maven / Gradle / Ivy

/*
 * Copyright © 2020 Banca D'Italia
 *
 * Licensed under the EUPL, Version 1.2 (the "License");
 * You may not use this work except in compliance with the
 * License.
 * You may obtain a copy of the License at:
 *
 * https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt
 *
 * Unless required by applicable law or agreed to in
 * writing, software distributed under the License is
 * distributed on an "AS IS" basis,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied.
 *
 * See the License for the specific language governing
 * permissions and limitations under the License.
 */
package it.bancaditalia.oss.vtl.impl.environment;

import static it.bancaditalia.oss.sdmx.api.PortableDataSet.OBS_LABEL;
import static it.bancaditalia.oss.sdmx.api.PortableDataSet.TIME_LABEL;
import static it.bancaditalia.oss.vtl.impl.types.data.date.PeriodHolder.Formatter.MONTH_PERIOD_FORMATTER;
import static it.bancaditalia.oss.vtl.impl.types.data.date.PeriodHolder.Formatter.QUARTER_PERIOD_FORMATTER;
import static it.bancaditalia.oss.vtl.impl.types.data.date.PeriodHolder.Formatter.SEMESTER_PERIOD_FORMATTER;
import static it.bancaditalia.oss.vtl.impl.types.data.date.PeriodHolder.Formatter.YEAR_PERIOD_FORMATTER;
import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.NUMBERDS;
import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.STRINGDS;
import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.TIMEDS;
import static it.bancaditalia.oss.vtl.util.ConcatSpliterator.concatenating;
import static it.bancaditalia.oss.vtl.util.Utils.entriesToMap;
import static it.bancaditalia.oss.vtl.util.Utils.entryByKey;
import static it.bancaditalia.oss.vtl.util.Utils.keepingKey;
import static it.bancaditalia.oss.vtl.util.Utils.keepingValue;
import static it.bancaditalia.oss.vtl.util.Utils.toEntry;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toSet;

import java.io.Serializable;
import java.security.InvalidParameterException;
import java.time.DateTimeException;
import java.time.format.DateTimeFormatter;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQuery;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import it.bancaditalia.oss.sdmx.api.BaseObservation;
import it.bancaditalia.oss.sdmx.api.Codelist;
import it.bancaditalia.oss.sdmx.api.DataFlowStructure;
import it.bancaditalia.oss.sdmx.api.Dimension;
import it.bancaditalia.oss.sdmx.api.PortableTimeSeries;
import it.bancaditalia.oss.sdmx.api.SdmxMetaElement;
import it.bancaditalia.oss.sdmx.client.SdmxClientHandler;
import it.bancaditalia.oss.sdmx.exceptions.DataStructureException;
import it.bancaditalia.oss.sdmx.exceptions.SdmxException;
import it.bancaditalia.oss.vtl.config.ConfigurationManager;
import it.bancaditalia.oss.vtl.config.ConfigurationManagerFactory;
import it.bancaditalia.oss.vtl.config.VTLProperty;
import it.bancaditalia.oss.vtl.environment.Environment;
import it.bancaditalia.oss.vtl.exceptions.VTLException;
import it.bancaditalia.oss.vtl.exceptions.VTLMissingComponentsException;
import it.bancaditalia.oss.vtl.exceptions.VTLNestedException;
import it.bancaditalia.oss.vtl.impl.types.config.VTLPropertyImpl;
import it.bancaditalia.oss.vtl.impl.types.data.DateValue;
import it.bancaditalia.oss.vtl.impl.types.data.DoubleValue;
import it.bancaditalia.oss.vtl.impl.types.data.NullValue;
import it.bancaditalia.oss.vtl.impl.types.data.StringValue;
import it.bancaditalia.oss.vtl.impl.types.data.TimePeriodValue;
import it.bancaditalia.oss.vtl.impl.types.data.date.DateHolder;
import it.bancaditalia.oss.vtl.impl.types.data.date.MonthPeriodHolder;
import it.bancaditalia.oss.vtl.impl.types.data.date.PeriodHolder;
import it.bancaditalia.oss.vtl.impl.types.data.date.QuarterPeriodHolder;
import it.bancaditalia.oss.vtl.impl.types.data.date.SemesterPeriodHolder;
import it.bancaditalia.oss.vtl.impl.types.data.date.YearPeriodHolder;
import it.bancaditalia.oss.vtl.impl.types.dataset.DataPointBuilder;
import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureBuilder;
import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureComponentImpl;
import it.bancaditalia.oss.vtl.impl.types.dataset.LightFDataSet;
import it.bancaditalia.oss.vtl.impl.types.domain.EntireNumberDomainSubset;
import it.bancaditalia.oss.vtl.impl.types.lineage.LineageExternal;
import it.bancaditalia.oss.vtl.model.data.ComponentRole;
import it.bancaditalia.oss.vtl.model.data.ComponentRole.Attribute;
import it.bancaditalia.oss.vtl.model.data.ComponentRole.Identifier;
import it.bancaditalia.oss.vtl.model.data.ComponentRole.Measure;
import it.bancaditalia.oss.vtl.model.data.DataSet;
import it.bancaditalia.oss.vtl.model.data.DataSetMetadata;
import it.bancaditalia.oss.vtl.model.data.DataStructureComponent;
import it.bancaditalia.oss.vtl.model.data.ScalarValue;
import it.bancaditalia.oss.vtl.model.data.VTLValue;
import it.bancaditalia.oss.vtl.model.data.VTLValueMetadata;
import it.bancaditalia.oss.vtl.model.domain.NumberDomain;
import it.bancaditalia.oss.vtl.model.domain.StringDomain;
import it.bancaditalia.oss.vtl.model.domain.StringDomainSubset;
import it.bancaditalia.oss.vtl.model.domain.StringEnumeratedDomainSubset;
import it.bancaditalia.oss.vtl.model.domain.TimeDomain;
import it.bancaditalia.oss.vtl.model.domain.TimeDomainSubset;
import it.bancaditalia.oss.vtl.session.MetadataRepository;
import it.bancaditalia.oss.vtl.util.SerFunction;
import it.bancaditalia.oss.vtl.util.Utils;

public class SDMXEnvironment implements Environment, Serializable
{
	private static final long serialVersionUID = 1L;
	private static final Logger LOGGER = LoggerFactory.getLogger(SDMXEnvironment.class); 
	private static final DataStructureComponent OBS_VALUE_MEASURE = new DataStructureComponentImpl<>(OBS_LABEL.toLowerCase(), Measure.class, NUMBERDS);
	private static final DataStructureComponent TIME_PERIOD_IDENTIFIER = DataStructureComponentImpl.of(TIME_LABEL.toLowerCase(), Identifier.class, TIMEDS);
	private static final Set UNSUPPORTED = Stream.of("CONNECTORS_AUTONAME", "action", "validFromDate", "ID").collect(toSet());
	private static final SortedMap PROVIDERS = SdmxClientHandler.getProviders(); // it will contain only built-in providers for now.
	private static final Map> FORMATTERS = new HashMap<>();
	private static final Pattern SDMX_PATTERN = Pattern.compile("^(.+):(?:(.+?)(?:\\((.+)\\))?)/(.+)$");

	public static final VTLProperty SDMX_ENVIRONMENT_AUTODROP_IDENTIFIERS = 
			new VTLPropertyImpl("vtl.sdmx.keep.identifiers", "True to keep subspaced identifiers", "false", false, false, "false");

	static
	{
		ConfigurationManagerFactory.registerSupportedProperties(SDMXEnvironment.class, SDMX_ENVIRONMENT_AUTODROP_IDENTIFIERS);
		
		FORMATTERS.put(DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm:ss"), DateHolder::of);
		FORMATTERS.put(DateTimeFormatter.ofPattern("yyyy-MM-dd hh:mm"), DateHolder::of);
		FORMATTERS.put(DateTimeFormatter.ofPattern("yyyy-MM-dd hh"), DateHolder::of);
		FORMATTERS.put(DateTimeFormatter.ofPattern("yyyy-MM-dd"), DateHolder::of);
		FORMATTERS.put(YEAR_PERIOD_FORMATTER.get(), YearPeriodHolder::new);
		FORMATTERS.put(SEMESTER_PERIOD_FORMATTER.get(), SemesterPeriodHolder::new);
		FORMATTERS.put(QUARTER_PERIOD_FORMATTER.get(), QuarterPeriodHolder::new);
		FORMATTERS.put(MONTH_PERIOD_FORMATTER.get(), MonthPeriodHolder::new);
	}

	@Override
	public boolean contains(String name)
	{
		return getMatcher(name).isPresent();
	}

	private Optional getMatcher(String name)
	{
		return Optional.of(SDMX_PATTERN.matcher(name))
				.filter(matcher -> matcher.matches() && PROVIDERS.containsKey(matcher.group(1)));
	}

	@Override
	public Optional getValue(String name)
	{
		return getMatcher(name)
			.map(matcher ->	{
				String provider = matcher.group(1);
				String dataflow = matcher.group(2);
				String query = dataflow + "/" + matcher.group(4);
				try
				{
					List> table = SdmxClientHandler.getTimeSeries(provider, query, null, null);
					return parseSDMXTable(name, table);
				}
				catch (SdmxException | DataStructureException e)
				{
					throw new VTLNestedException("Fatal error contacting SDMX provider '" + provider + "'", e);
				}
			});
	}

	@Override
	public Optional getValueMetadata(String name)
	{
		return getMatcher(name)
			.map(matcher ->	{
				String provider = matcher.group(1);
				String dataflow = matcher.group(2);
				String query = matcher.group(4);

				return getMetadataSDMX(provider, dataflow, query.split("\\."));
			});
	}

	protected DataSet parseSDMXTable(String name, List> table) throws DataStructureException
	{
		DataSetMetadata metadata = (DataSetMetadata) getValueMetadata(name)
				.orElseThrow(() -> new NullPointerException("Could not retrieve SDMX metadata for " + name));

		Map, Map>> seriesMeta = Utils.getStream(table)
				.map(toEntry(SerFunction.identity(), PortableTimeSeries::getAttributesMap))
				.map(e -> {
					ConcurrentMap attrs = new ConcurrentHashMap<>(e.getKey().getAttributesMap());
					attrs.putAll(e.getKey().getDimensionsMap());
					return new SimpleEntry<>(e.getKey(), attrs);
				})
				.map(keepingKey(SDMXEnvironment::extractAttrs))
				.collect(entriesToMap());

		return new LightFDataSet<>(metadata, t -> Utils.getStream(t) // for each series
				.map(s -> s.stream() // build a dp
						.map(o -> obsToCompValues(seriesMeta.get(s), o)
							.map(keepingValue(k -> metadata.getComponent(k).orElseThrow(() -> new VTLMissingComponentsException(k, metadata))))
							.map(keepingKey((k, v) -> (ScalarValue) k.getDomain().cast(v)))
							.collect(DataPointBuilder.toDataPoint(LineageExternal.of("SDMXEnv(" + name + ")"), metadata))))
				.collect(concatenating(Utils.ORDERED)), table);
	}

	private static Stream>> obsToCompValues(Map> seriesLevelAttrs, 
			BaseObservation o)
	{
		return Stream.concat(Utils.getStream(seriesLevelAttrs), Stream.concat(obsLevelAttrs(o),
						Stream.of(new SimpleEntry<>(TIME_LABEL, asDate(o)),
								new SimpleEntry<>(OBS_LABEL, DoubleValue.of(o.getValueAsDouble())))));
	}

	private static ScalarValue, ? extends TimeDomain> asDate(BaseObservation o)
	{
		DateTimeException last = null;
		for (DateTimeFormatter formatter : FORMATTERS.keySet())
			try
			{
				TemporalAccessor parsed = formatter.parse(o.getTimeslot(), FORMATTERS.get(formatter));
				if (parsed instanceof PeriodHolder)
					return TimePeriodValue.of((PeriodHolder) parsed);
				else
					return DateValue.of(parsed);
			}
			catch (DateTimeException e)
			{
				last = e;
			}

		if (last != null)
			throw last;
		else 
			throw new IllegalStateException("this point should not be reached");
	}

	private static Map> extractAttrs(Map attrs)
	{
		return Utils.getStream(attrs.entrySet())
				.filter(e -> !UNSUPPORTED.contains(e.getKey()))
				.map(keepingKey(StringValue::of))
				.map(keepingKey(v -> (ScalarValue) v))
				.collect(entriesToMap());
	}

	private static Stream>> obsLevelAttrs(BaseObservation observation)
	{
		return Utils.getStream(observation.getAttributes())
				.filter(entryByKey(k -> !UNSUPPORTED.contains(k)))
				.map(keepingKey(v -> (ScalarValue) (v != null ? StringValue.of(v) : NullValue.instance(STRINGDS))));
	}

	private static DataStructureComponent, StringDomain> elementToComponent(Class role, SdmxMetaElement meta)
	{
		String normalizedName = meta.getId().matches("'.*'") ? meta.getId().replaceAll("'(.*)'", "$1") : meta.getId().toLowerCase();

		Codelist codelist = meta.getCodeList();
		if (codelist == null)
			return new DataStructureComponentImpl<>(normalizedName, role, STRINGDS);
		
		MetadataRepository repository = ConfigurationManager.getDefault().getMetadataRepository();
		StringEnumeratedDomainSubset domain = repository.defineDomain(codelist.getId(), StringEnumeratedDomainSubset.class, codelist.keySet());
		Objects.requireNonNull(domain, "domain null for " + codelist.getId() + " - " + meta);
		
		return new DataStructureComponentImpl<>(normalizedName, role, domain);
	}

	protected VTLValueMetadata getMetadataSDMX(String provider, String dataflow, String[] tokens)
	{
		try
		{
			LOGGER.trace("Retrieving DSD for {}:{}", provider, dataflow);
			DataFlowStructure dsd = SdmxClientHandler.getDataFlowStructure(provider, dataflow);

			// Load all the codes of each dimension
			List dimensions = dsd.getDimensions();
			if (tokens.length != dimensions.size())
				throw new InvalidParameterException("Query items " + Arrays.toString(tokens) + " do not match the dimensions of " 
						+ dataflow + " " + dimensions.stream().map(Dimension::getId).collect(toList()));
			for (Dimension d: dimensions)
			{
				String dimId = d.getId();
				LOGGER.trace("Retrieving codelist for dimension {} of {}:{}", dimId, provider, dataflow);
				SdmxClientHandler.getCodes(provider, dataflow, dimId);
			}
			

			// remove the fixed (not wildcarded) dimensions from the list of identifiers
			List activeAttributes = new ArrayList<>(dsd.getAttributes());
			List activeDims = new ArrayList<>();

			if (!"true".equals(SDMX_ENVIRONMENT_AUTODROP_IDENTIFIERS.getValue()))
				for (int i = 0; i < tokens.length; i++)
					if (tokens[i].isEmpty() || tokens[i].indexOf('+') != -1)
						activeDims.add(dimensions.get(i));
					else
						activeAttributes.add(dimensions.get(i));
			else
				activeDims = dimensions;

			return Stream
					.concat(activeDims.stream().map(d -> (DataStructureComponent) elementToComponent(Identifier.class, d)), 
							activeAttributes.stream().map(d -> (DataStructureComponent) elementToComponent(Attribute.class, d)))
					.reduce(new DataStructureBuilder(), DataStructureBuilder::addComponent, DataStructureBuilder::merge)
					.addComponent(TIME_PERIOD_IDENTIFIER)
					.addComponent(OBS_VALUE_MEASURE)
					.build();
		}
		catch (SdmxException e)
		{
			throw new VTLException("SDMX", e);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy