All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eobjects.analyzer.beans.ReferenceDataMatcherAnalyzer Maven / Gradle / Ivy

The newest version!
/**
 * AnalyzerBeans
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.eobjects.analyzer.beans;

import java.util.ArrayList;
import java.util.List;

import org.eobjects.analyzer.beans.api.Alias;
import org.eobjects.analyzer.beans.api.Analyzer;
import org.eobjects.analyzer.beans.api.AnalyzerBean;
import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.Initialize;
import org.eobjects.analyzer.beans.api.OutputColumns;
import org.eobjects.analyzer.beans.api.Validate;
import org.eobjects.analyzer.beans.categories.ValidationCategory;
import org.eobjects.analyzer.beans.convert.ConvertToStringTransformer;
import org.eobjects.analyzer.beans.transform.DictionaryMatcherTransformer;
import org.eobjects.analyzer.beans.transform.StringPatternMatcherTransformer;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.data.MockInputColumn;
import org.eobjects.analyzer.data.MockInputRow;
import org.eobjects.analyzer.reference.Dictionary;
import org.eobjects.analyzer.reference.StringPattern;
import org.eobjects.analyzer.reference.SynonymCatalog;

@AnalyzerBean("Reference data matcher")
@Alias("Matching analyzer")
@Description("Check your data values against multiple forms of reference data in one simple analyzer step.\n"
		+ "This analyzer provides a handy shortcut for doing matching with dictionaries, synonym lookups or string patterns matching, retrieving matching matrices for all matches.")
@Categorized(ValidationCategory.class)
public class ReferenceDataMatcherAnalyzer implements Analyzer {

	@Configured(order = 1)
	InputColumn[] columns;

	@Configured(order = 2, required = false)
	Dictionary[] dictionaries;

	@Configured(order = 3, required = false)
	SynonymCatalog[] synonymCatalogs;

	@Configured(order = 4, required = false)
	StringPattern[] stringPatterns;

	private BooleanAnalyzer _booleanAnalyzer;
	private DictionaryMatcherTransformer[] _dictionaryMatchers;
	private StringPatternMatcherTransformer[] _stringPatternMatchers;
	private List> _matchColumns;

	public ReferenceDataMatcherAnalyzer(InputColumn[] columns,
			Dictionary[] dictionaries, SynonymCatalog[] synonymCatalogs,
			StringPattern[] stringPatterns) {
		this();
		this.columns = columns;
		this.dictionaries = dictionaries;
		this.stringPatterns = stringPatterns;
		this.synonymCatalogs = synonymCatalogs;
	}

	public ReferenceDataMatcherAnalyzer() {
	}

	@Validate
	public void validate() {
		if (!isDictionaryMatchingEnabled() && !isSynonymCatalogLookupEnabled()
				&& !isStringPatternMatchingEnabled()) {
			throw new IllegalStateException(
					"No dictionaries, synonym catalogs or string patterns selected");
		}
	}

	@Initialize
	public void init() {
		_dictionaryMatchers = new DictionaryMatcherTransformer[columns.length];
		_stringPatternMatchers = new StringPatternMatcherTransformer[columns.length];

		_matchColumns = new ArrayList>();

		OutputColumns outputColumns;
		for (int i = 0; i < columns.length; i++) {
			if (isDictionaryMatchingEnabled()) {
				// create matcher for dictionaries
				DictionaryMatcherTransformer dictionaryMatcher = new DictionaryMatcherTransformer(
						columns[i], dictionaries);
				outputColumns = dictionaryMatcher.getOutputColumns();
				addMatchColumns(outputColumns);
				_dictionaryMatchers[i] = dictionaryMatcher;
			}

			if (isSynonymCatalogLookupEnabled()) {
				outputColumns = new OutputColumns(synonymCatalogs.length);
				for (int j = 0; j < synonymCatalogs.length; j++) {
					SynonymCatalog synonymCatalog = synonymCatalogs[j];
					outputColumns.setColumnType(j, Boolean.class);
					outputColumns.setColumnName(j, columns[i].getName()
							+ " in " + synonymCatalog.getName());
				}
				addMatchColumns(outputColumns);
			}

			if (isStringPatternMatchingEnabled()) {
				// create matcher for string patterns
				StringPatternMatcherTransformer stringPatternMatcher = new StringPatternMatcherTransformer(
						columns[i], stringPatterns);
				outputColumns = stringPatternMatcher.getOutputColumns();
				addMatchColumns(outputColumns);
				_stringPatternMatchers[i] = stringPatternMatcher;
			}
		}

		@SuppressWarnings("unchecked")
		InputColumn[] columnArray = _matchColumns
				.toArray(new InputColumn[_matchColumns.size()]);
		_booleanAnalyzer = new BooleanAnalyzer(columnArray);
		_booleanAnalyzer.init();
	}

	private boolean isStringPatternMatchingEnabled() {
		return stringPatterns != null && stringPatterns.length > 0;
	}

	private boolean isSynonymCatalogLookupEnabled() {
		return synonymCatalogs != null && synonymCatalogs.length > 0;
	}

	private boolean isDictionaryMatchingEnabled() {
		return dictionaries != null && dictionaries.length > 0;
	}

	private void addMatchColumns(OutputColumns outputColumns) {
		int count = outputColumns.getColumnCount();
		for (int i = 0; i < count; i++) {
			String columnName = outputColumns.getColumnName(i);
			InputColumn col = new MockInputColumn(columnName,
					Boolean.class);
			_matchColumns.add(col);
		}
	}

	@Override
	public void run(InputRow row, int distinctCount) {
		MockInputRow mockInputRow = new MockInputRow();

		int matchColumnIndex = 0;
		for (int i = 0; i < columns.length; i++) {
			final Object value = row.getValue(columns[i]);
			final String stringValue = ConvertToStringTransformer
					.transformValue(value);
			mockInputRow.put(columns[i], value);

			if (isDictionaryMatchingEnabled()) {
				Object[] matches = _dictionaryMatchers[i].transform(row);
				for (Object match : matches) {
					assert match instanceof Boolean;

					InputColumn matchColumn = _matchColumns
							.get(matchColumnIndex);
					matchColumnIndex++;
					mockInputRow.put(matchColumn, match);
				}
			}

			if (isSynonymCatalogLookupEnabled()) {
				for (SynonymCatalog synonymCatalog : synonymCatalogs) {
					final InputColumn matchColumn = _matchColumns
							.get(matchColumnIndex);
					matchColumnIndex++;
					final String masterTerm = synonymCatalog
							.getMasterTerm(stringValue);
					if (masterTerm == null) {
						// no match
						mockInputRow.put(matchColumn, Boolean.FALSE);
					} else {
						mockInputRow.put(matchColumn, Boolean.TRUE);
					}
				}
			}

			if (isStringPatternMatchingEnabled()) {
				Object[] matches = _stringPatternMatchers[i].transform(row);
				for (Object match : matches) {
					assert match instanceof Boolean;
					InputColumn matchColumn = _matchColumns
							.get(matchColumnIndex);
					matchColumnIndex++;
					mockInputRow.put(matchColumn, match);
				}
			}
		}

		_booleanAnalyzer.run(mockInputRow, distinctCount);
	}

	@Override
	public BooleanAnalyzerResult getResult() {
		return _booleanAnalyzer.getResult();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy