parquet.hadoop.MemoryManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-apache Show documentation
Show all versions of hive-apache Show documentation
Shaded version of Apache Hive for Presto
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.hadoop;
import parquet.Log;
import parquet.ParquetRuntimeException;
import java.lang.management.ManagementFactory;
import java.util.HashMap;
import java.util.Map;
/**
* Implements a memory manager that keeps a global context of how many Parquet
* writers there are and manages the memory between them. For use cases with
* dynamic partitions, it is easy to end up with many writers in the same task.
* By managing the size of each allocation, we try to cut down the size of each
* allocation and keep the task from running out of memory.
*
* This class balances the allocation size of each writer by resize them averagely.
* When the sum of each writer's allocation size is less than total memory pool,
* keep them original value.
* When the sum exceeds, decrease each writer's allocation size by a ratio.
*/
public class MemoryManager {
private static final Log LOG = Log.getLog(MemoryManager.class);
static final float DEFAULT_MEMORY_POOL_RATIO = 0.95f;
static final long DEFAULT_MIN_MEMORY_ALLOCATION = 1 * 1024 * 1024; // 1MB
private final float memoryPoolRatio;
private final long totalMemoryPool;
private final long minMemoryAllocation;
private final Map writerList = new
HashMap();
public MemoryManager(float ratio, long minAllocation) {
checkRatio(ratio);
memoryPoolRatio = ratio;
minMemoryAllocation = minAllocation;
totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax
() * ratio);
LOG.debug(String.format("Allocated total memory pool is: %,d", totalMemoryPool));
}
private void checkRatio(float ratio) {
if (ratio <= 0 || ratio > 1) {
throw new IllegalArgumentException("The configured memory pool ratio " + ratio + " is " +
"not between 0 and 1.");
}
}
/**
* Add a new writer and its memory allocation to the memory manager.
* @param writer the new created writer
* @param allocation the requested buffer size
*/
synchronized void addWriter(InternalParquetRecordWriter writer, Long allocation) {
Long oldValue = writerList.get(writer);
if (oldValue == null) {
writerList.put(writer, allocation);
} else {
throw new IllegalArgumentException("[BUG] The Parquet Memory Manager should not add an " +
"instance of InternalParquetRecordWriter more than once. The Manager already contains " +
"the writer: " + writer);
}
updateAllocation();
}
/**
* Remove the given writer from the memory manager.
* @param writer the writer that has been closed
*/
synchronized void removeWriter(InternalParquetRecordWriter writer) {
if (writerList.containsKey(writer)) {
writerList.remove(writer);
}
if (!writerList.isEmpty()) {
updateAllocation();
}
}
/**
* Update the allocated size of each writer based on the current allocations and pool size.
*/
private void updateAllocation() {
long totalAllocations = 0;
double scale;
for (Long allocation : writerList.values()) {
totalAllocations += allocation;
}
if (totalAllocations <= totalMemoryPool) {
scale = 1.0;
} else {
scale = (double) totalMemoryPool / totalAllocations;
LOG.warn(String.format(
"Total allocation exceeds %.2f%% (%,d bytes) of heap memory\n" +
"Scaling row group sizes to %.2f%% for %d writers",
100*memoryPoolRatio, totalMemoryPool, 100*scale, writerList.size()));
}
int maxColCount = 0;
for (InternalParquetRecordWriter w : writerList.keySet()) {
maxColCount = Math.max(w.getSchema().getColumns().size(), maxColCount);
}
for (Map.Entry entry : writerList.entrySet()) {
long newSize = (long) Math.floor(entry.getValue() * scale);
if(scale < 1.0 && minMemoryAllocation > 0 && newSize < minMemoryAllocation) {
throw new ParquetRuntimeException(String.format("New Memory allocation %d bytes" +
" is smaller than the minimum allocation size of %d bytes.",
newSize, minMemoryAllocation)){};
}
entry.getKey().setRowGroupSizeThreshold(newSize);
LOG.debug(String.format("Adjust block size from %,d to %,d for writer: %s",
entry.getValue(), newSize, entry.getKey()));
}
}
/**
* Get the total memory pool size that is available for writers.
* @return the number of bytes in the memory pool
*/
long getTotalMemoryPool() {
return totalMemoryPool;
}
/**
* Get the writers list
* @return the writers in this memory manager
*/
Map getWriterList() {
return writerList;
}
/**
* Get the ratio of memory allocated for all the writers.
* @return the memory pool ratio
*/
float getMemoryPoolRatio() {
return memoryPoolRatio;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy