All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.rcsb.cif.binary.encoding.StringArrayEncoding Maven / Gradle / Ivy

package org.rcsb.cif.binary.encoding;

import org.rcsb.cif.EncodingStrategyHint;
import org.rcsb.cif.binary.Classifier;
import org.rcsb.cif.binary.data.ByteArray;
import org.rcsb.cif.binary.data.Int32Array;
import org.rcsb.cif.binary.data.StringArray;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * 

Stores an array of strings as a concatenation of all unique strings, an array of offsets describing substrings, * and indices into the offset array. Indices to corresponding substrings.

* *
 * StringArray {
 *     kind = "StringArray"
 *     dataEncoding: Encoding[]
 *     stringData: string
 *     offsetEncoding: Encoding[]
 *     offsets: Uint8Array
 * }
 * Example
 * ['a','AB','a']
 * ---StringArray--->
 * { stringData = 'aAB', offsets = [0, 1, 3] } [0, 1, 0]
 * 
*/ public class StringArrayEncoding implements Encoding { private Deque> dataEncoding; private String stringData; private Deque> offsetEncoding; private byte[] offsets; public StringArrayEncoding() { } public StringArrayEncoding(String stringData, byte[] offsets, Deque> outputEncoding, Deque> offsetEncoding) { this.dataEncoding = outputEncoding; this.stringData = stringData; this.offsetEncoding = offsetEncoding; this.offsets = offsets; } @Override public Map getMapRepresentation() { Map map = new LinkedHashMap<>(); map.put("kind", "StringArray"); map.put("dataEncoding", toArray(dataEncoding)); map.put("stringData", stringData); map.put("offsetEncoding", toArray(offsetEncoding)); map.put("offsets", offsets); return map; } private Map[] toArray(Deque> deque) { return deque.stream() .map(Encoding::getMapRepresentation) .toArray(Map[]::new); } @Override public StringArray decode(ByteArray data) { int[] offsets = (int[]) new ByteArray(this.offsets, offsetEncoding) .decode() .getData(); data.setEncoding(dataEncoding); int[] indices = (int[]) data.decode().getData(); String[] strings = new String[offsets.length]; strings[0] = ""; for (int i = 1; i < offsets.length; i++) { strings[i] = stringData.substring(offsets[i - 1], offsets[i]); } int offset = 0; String[] result = new String[indices.length]; for (int index : indices) { result[offset++] = strings[index + 1]; } return new StringArray(result, data.getEncoding()); } @Override public ByteArray encode(StringArray data) { String[] input = data.getData(); Map map = new HashMap<>(); List strings = new ArrayList<>(); int[] outputArray = new int[input.length]; List offsetList = new ArrayList<>(); offsetList.add(0); int accLength = 0; int i = 0; for (String s : input) { // handle null strings if (s == null) { outputArray[i++] = -1; continue; } if (map.containsKey(s)) { int index = map.get(s); outputArray[i++] = index; } else { // increment the length accLength += s.length(); // store the string and index int index = strings.size(); strings.add(s); map.put(s, index); // store offset offsetList.add(accLength); outputArray[i++] = index; } } int[] offsetArray = new int[offsetList.size()]; for (int j = 0; j < offsetList.size(); j++) { offsetArray[j] = offsetList.get(j); } Int32Array offsetPlain = new Int32Array(offsetArray); EncodingStrategyHint offsetHint = Classifier.classify(offsetPlain); ByteArray offsets = Classifier.encode(offsetPlain, offsetHint.getEncoding()); Int32Array outputPlain = new Int32Array(outputArray); EncodingStrategyHint outputHint = Classifier.classify(outputPlain); ByteArray output = Classifier.encode(outputPlain, outputHint.getEncoding()); Deque> enc = new ArrayDeque<>(data.getEncoding()); this.offsets = offsets.getData(); this.offsetEncoding = offsets.getEncoding(); this.stringData = String.join("", strings); this.dataEncoding = output.getEncoding(); enc.add(this); return new ByteArray(output.getData(), enc); } @Override public String toString() { return "StringArrayEncoding{" + "dataEncoding=" + dataEncoding + ", stringData='" + stringData + '\'' + ", offsetEncoding=" + offsetEncoding + ", offsets=" + Arrays.toString(offsets) + '}'; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy