All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nd4j.linalg.collection.CompactHeapStringList Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

package org.nd4j.linalg.collection;

import java.util.*;

/**
 * A {@code List} that stores all contents in a single char[], to avoid the GC load for a large number of String
 * objects.
*

* Some restrictions to be aware of with the current implementation:
* - The list is intended to be write-once (append only), except for clear() operations. That is: new Strings can be added * at the end, but they cannot be replaced or removed.
* - There is a limit of a maximum of {@link Integer#MAX_VALUE}/2 = 1073741823 Strings
* - There is a limit of the maximum total characters of {@link Integer#MAX_VALUE} (i.e., 2147483647 chars). This corresponds * to a maximum of approximately 4GB of Strings.
* * @author Alex Black */ public class CompactHeapStringList implements List { public static final int DEFAULT_REALLOCATION_BLOCK_SIZE_BYTES = 8 * 1024 * 1024; //8MB public static final int DEFAULT_INTEGER_REALLOCATION_BLOCK_SIZE_BYTES = 1024 * 1024; //1MB - 262144 ints, 131k entries private final int reallocationBlockSizeBytes; private final int reallocationIntegerBlockSizeBytes; private int usedCount = 0; private int nextDataOffset = 0; private char[] data; private int[] offsetAndLength; public CompactHeapStringList() { this(DEFAULT_REALLOCATION_BLOCK_SIZE_BYTES, DEFAULT_INTEGER_REALLOCATION_BLOCK_SIZE_BYTES); } /** * * @param reallocationBlockSizeBytes Number of bytes by which to increase the char[], when allocating a new storage array * @param intReallocationBlockSizeBytes Number of bytes by which to increase the int[], when allocating a new storage array */ public CompactHeapStringList(int reallocationBlockSizeBytes, int intReallocationBlockSizeBytes) { this.reallocationBlockSizeBytes = reallocationBlockSizeBytes; this.reallocationIntegerBlockSizeBytes = intReallocationBlockSizeBytes; this.data = new char[this.reallocationBlockSizeBytes / 2]; this.offsetAndLength = new int[this.reallocationIntegerBlockSizeBytes / 4]; } @Override public int size() { return usedCount; } @Override public boolean isEmpty() { return usedCount == 0; } @Override public boolean contains(Object o) { throw new UnsupportedOperationException("Not supported"); } @Override public Iterator iterator() { return new CompactHeapStringListIterator(); } @Override public String[] toArray() { String[] str = new String[usedCount]; for (int i = 0; i < usedCount; i++) { str[i] = get(i); } return str; } @Override public T[] toArray(T[] a) { throw new UnsupportedOperationException("Not supported"); } @Override public boolean add(String s) { int length = s.length(); //3 possibilities: //(a) doesn't fit in char[] //(b) doesn't fit in int[] //(c) fits OK in both if (nextDataOffset + length > data.length) { //Allocate new data array, if possible if (nextDataOffset > Integer.MAX_VALUE - length) { throw new UnsupportedOperationException( "Cannot allocate new data char[]: required array size exceeds Integer.MAX_VALUE"); } int toAdd = Math.max(reallocationBlockSizeBytes / 2, length); int newLength = data.length + Math.min(toAdd, Integer.MAX_VALUE - data.length); data = Arrays.copyOf(data, newLength); } if (2 * (usedCount + 1) >= offsetAndLength.length) { if (offsetAndLength.length >= Integer.MAX_VALUE - 2) { //Should normally never happen throw new UnsupportedOperationException( "Cannot allocate new offset int[]: required array size exceeds Integer.MAX_VALUE"); } int newLength = offsetAndLength.length + Math.min(reallocationIntegerBlockSizeBytes / 4, Integer.MAX_VALUE - offsetAndLength.length); offsetAndLength = Arrays.copyOf(offsetAndLength, newLength); } s.getChars(0, length, data, nextDataOffset); offsetAndLength[2 * usedCount] = nextDataOffset; offsetAndLength[2 * usedCount + 1] = length; nextDataOffset += length; usedCount++; return true; } @Override public boolean remove(Object o) { //In principle we *could* do this with array copies throw new UnsupportedOperationException("Remove not supported"); } @Override public boolean containsAll(Collection c) { throw new UnsupportedOperationException("Not yet implemented"); } @Override public boolean addAll(Collection c) { for (String s : c) { add(s); } return c.size() > 0; } @Override public boolean addAll(int index, Collection c) { //This is conceivably possible with array copies and adjusting the indices throw new UnsupportedOperationException("Add all at specified index: Not supported"); } @Override public boolean removeAll(Collection c) { throw new UnsupportedOperationException("Remove all: Not supported"); } @Override public boolean retainAll(Collection c) { throw new UnsupportedOperationException("Retain all: Not supported"); } @Override public void clear() { usedCount = 0; nextDataOffset = 0; data = new char[reallocationBlockSizeBytes / 2]; offsetAndLength = new int[reallocationIntegerBlockSizeBytes / 4]; } @Override public String get(int index) { if (index >= usedCount) { throw new IllegalArgumentException("Invalid index: " + index + " >= size(). Size = " + usedCount); } int offset = offsetAndLength[2 * index]; int length = offsetAndLength[2 * index + 1]; return new String(data, offset, length); } @Override public String set(int index, String element) { //This *could* be done with array copy ops... throw new UnsupportedOperationException( "Set specified index: not supported due to serialized storage structure"); } @Override public void add(int index, String element) { //This *could* be done with array copy ops... throw new UnsupportedOperationException( "Set specified index: not supported due to serialized storage structure"); } @Override public String remove(int index) { throw new UnsupportedOperationException("Remove: not supported"); } @Override public int indexOf(Object o) { if (!(o instanceof String)) { return -1; } String str = (String) o; char[] ch = str.toCharArray(); for (int i = 0; i < usedCount; i++) { if (offsetAndLength[2 * i + 1] != ch.length) { //Can't be this one: lengths differ continue; } int offset = offsetAndLength[2 * i]; boolean matches = true; for (int j = 0; j < ch.length; j++) { if (data[offset + j] != ch[j]) { matches = false; break; } } if (matches) { return i; } } return -1; } @Override public int lastIndexOf(Object o) { if (!(o instanceof String)) { return -1; } String str = (String) o; char[] ch = str.toCharArray(); for (int i = usedCount - 1; i >= 0; i--) { if (offsetAndLength[2 * i + 1] != ch.length) { //Can't be this one: lengths differ continue; } int offset = offsetAndLength[2 * i]; boolean matches = true; for (int j = 0; j < ch.length; j++) { if (data[offset + j] != ch[j]) { matches = false; break; } } if (matches) { return i; } } return -1; } @Override public ListIterator listIterator() { return new CompactHeapStringListIterator(); } @Override public ListIterator listIterator(int index) { throw new UnsupportedOperationException("Not supported"); } @Override public List subList(int fromIndex, int toIndex) { throw new UnsupportedOperationException("Not supported"); } @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof List)) return false; ListIterator e1 = listIterator(); ListIterator e2 = ((List) o).listIterator(); while (e1.hasNext() && e2.hasNext()) { String o1 = e1.next(); Object o2 = e2.next(); if (!(o1 == null ? o2 == null : o1.equals(o2))) return false; } return !(e1.hasNext() || e2.hasNext()); } private class CompactHeapStringListIterator implements Iterator, ListIterator { private int currIdx = 0; @Override public boolean hasNext() { return currIdx < usedCount; } @Override public String next() { if (!hasNext()) { throw new NoSuchElementException("No next element"); } return get(currIdx++); } @Override public boolean hasPrevious() { return currIdx > 0; } @Override public String previous() { if (!hasPrevious()) { throw new NoSuchElementException(); } return get(currIdx--); } @Override public int nextIndex() { return currIdx; } @Override public int previousIndex() { return currIdx; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public void set(String s) { throw new UnsupportedOperationException(); } @Override public void add(String s) { throw new UnsupportedOperationException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy