All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.univocity.parsers.common.processor.BatchedColumnProcessor Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2014 uniVocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.common.processor;

import java.util.*;

import com.univocity.parsers.common.*;

/**
 * A {@link RowProcessor} implementation that stores values of columns in batches. Use this implementation in favor of {@link ColumnProcessor}
 * when processing large inputs to avoid running out of memory.
 *
 * Values parsed in each row will be split into columns of Strings. Each column has its own list of values.
 *
 * 

During the execution of the process, the {@link #batchProcessed(int)} method will be invoked after a given number of rows has been processed.

*

The user can access the lists with values parsed for all columns using the methods {@link #getColumnValuesAsList()}, * {@link #getColumnValuesAsMapOfIndexes()} and {@link #getColumnValuesAsMapOfNames()}.

*

After {@link #batchProcessed(int)} is invoked, all values will be discarded and the next batch of column values will be accumulated. * This process will repeat until there's no more rows in the input. * * @author uniVocity Software Pty Ltd - [email protected] * * @see AbstractParser * @see RowProcessor * @see BatchedColumnReaderProcessor */ public abstract class BatchedColumnProcessor implements RowProcessor, BatchedColumnReaderProcessor { private final ColumnSplitter splitter; private final int rowsPerBatch; private int batchCount; private int batchesProcessed; /** * Constructs a batched column processor configured to invoke the {@link #batchesProcessed} method after a given number of rows has been processed. * @param rowsPerBatch the number of rows to process in each batch. */ public BatchedColumnProcessor(int rowsPerBatch) { splitter = new ColumnSplitter(rowsPerBatch); this.rowsPerBatch = rowsPerBatch; } @Override public void processStarted(ParsingContext context) { splitter.reset(); batchCount = 0; batchesProcessed = 0; } @Override public void rowProcessed(String[] row, ParsingContext context) { splitter.addValuesToColumns(row, context); batchCount++; if (batchCount >= rowsPerBatch) { batchProcessed(batchCount); batchCount = 0; splitter.clearValues(); batchesProcessed++; } } @Override public void processEnded(ParsingContext context) { if (batchCount > 0) { batchProcessed(batchCount); } } @Override public final String[] getHeaders() { return splitter.getHeaders(); } @Override public final List> getColumnValuesAsList() { return splitter.getColumnValues(); } @Override public final void putColumnValuesInMapOfNames(Map> map) { splitter.putColumnValuesInMapOfNames(map); } @Override public final void putColumnValuesInMapOfIndexes(Map> map) { splitter.putColumnValuesInMapOfIndexes(map); } @Override public final Map> getColumnValuesAsMapOfNames() { return splitter.getColumnValuesAsMapOfNames(); } @Override public final Map> getColumnValuesAsMapOfIndexes() { return splitter.getColumnValuesAsMapOfIndexes(); } @Override public int getRowsPerBatch() { return rowsPerBatch; } @Override public int getBatchesProcessed() { return batchesProcessed; } @Override public abstract void batchProcessed(int rowsInThisBatch); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy