
com.google.refine.importers.FixedWidthImporter Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (C) 2018, OpenRefine contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.CharMatcher;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class FixedWidthImporter extends TabularImportingParserBase {
public FixedWidthImporter() {
super(false);
}
@Override
public ObjectNode createParserUIInitializationData(
ImportingJob job, List fileRecords, String format) {
ObjectNode options = super.createParserUIInitializationData(job, fileRecords, format);
ArrayNode columnWidths = ParsingUtilities.mapper.createArrayNode();
if (fileRecords.size() > 0) {
ObjectNode firstFileRecord = fileRecords.get(0);
String encoding = ImportingUtilities.getEncoding(firstFileRecord);
String location = JSONUtilities.getString(firstFileRecord, "location", null);
if (location != null) {
File file = new File(job.getRawDataDir(), location);
int[] columnWidthsA = guessColumnWidths(file, encoding);
if (columnWidthsA != null) {
for (int w : columnWidthsA) {
JSONUtilities.append(columnWidths, w);
}
}
}
JSONUtilities.safePut(options, "headerLines", 0);
JSONUtilities.safePut(options, "columnWidths", columnWidths);
JSONUtilities.safePut(options, "guessCellValueTypes", false);
}
return options;
}
@Override
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
int limit,
ObjectNode options,
List exceptions) {
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy