All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.spark.functions.JsonParserFunction Maven / Gradle / Ivy

There is a newer version: 6.0.0
Show newest version
/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.spark.functions;

import org.apache.metamodel.schema.Column;
import org.apache.spark.api.java.function.Function;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.connection.JsonDatastore;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

public class JsonParserFunction implements Function {

    private static final long serialVersionUID = 1L;
    private final JsonDatastore _jsonDatastore;
    private Column[] _columns;

    public JsonParserFunction(JsonDatastore jsonDatastore) {
        _jsonDatastore = jsonDatastore;
    }

    @Override
    public Object[] call(String line) throws Exception {
        final ObjectMapper mapper = new ObjectMapper();
        final JsonNode readTree = mapper.readTree(line);
        final Object[] values = getValues(readTree);
        return values;
    }

    private Object[] getValues(JsonNode readTree) {
        final Column[] columns = getColumns();
        final Object[] list = new Object[columns.length];
        for (int i = 0; i < columns.length; i++) {
            final JsonNode node = readTree.findValue(columns[i].getName());
            final String value = node.asText();
            list[i] = value;
        }
        return list;
    }

    public Column[] getColumns() {
        if (_columns == null) {
            try (final DatastoreConnection openConnection = _jsonDatastore.openConnection()) {
                _columns = openConnection.getDataContext().getDefaultSchema().getTable(0).getColumns();
            }
        }
        return _columns;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy