
example.DataSources Maven / Gradle / Ivy
/*
* Zorbage: an algebraic data hierarchy for use in numeric processing.
*
* Copyright (c) 2016-2021 Barry DeZonia All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list
* of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or other
* materials provided with the distribution.
*
* Neither the name of the nor the names of its contributors may
* be used to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package example;
import java.math.BigDecimal;
import nom.bdezonia.zorbage.algebra.G;
import nom.bdezonia.zorbage.algorithm.DeepCopy;
import nom.bdezonia.zorbage.algorithm.FFT;
import nom.bdezonia.zorbage.algorithm.Fill;
import nom.bdezonia.zorbage.algorithm.GetV;
import nom.bdezonia.zorbage.algorithm.Mean;
import nom.bdezonia.zorbage.algorithm.Median;
import nom.bdezonia.zorbage.algorithm.SetV;
import nom.bdezonia.zorbage.algorithm.Sort;
import nom.bdezonia.zorbage.algorithm.StdDev;
import nom.bdezonia.zorbage.algorithm.Sum;
import nom.bdezonia.zorbage.datasource.ArrayDataSource;
import nom.bdezonia.zorbage.datasource.BigListDataSource;
import nom.bdezonia.zorbage.datasource.ConcatenatedDataSource;
import nom.bdezonia.zorbage.datasource.ConditionalDataSource;
import nom.bdezonia.zorbage.datasource.DataSourceListener;
import nom.bdezonia.zorbage.datasource.FixedSizeDataSource;
import nom.bdezonia.zorbage.datasource.IndexedDataSource;
import nom.bdezonia.zorbage.datasource.ListDataSource;
import nom.bdezonia.zorbage.datasource.MaskedDataSource;
import nom.bdezonia.zorbage.datasource.ProcedureDataSource;
import nom.bdezonia.zorbage.datasource.ProcedurePaddedDataSource;
import nom.bdezonia.zorbage.datasource.ReadOnlyDataSource;
import nom.bdezonia.zorbage.datasource.ReadOnlyHighPrecisionDataSource;
import nom.bdezonia.zorbage.datasource.ReversedDataSource;
import nom.bdezonia.zorbage.datasource.SequencedDataSource;
import nom.bdezonia.zorbage.datasource.TransformedDataSource;
import nom.bdezonia.zorbage.datasource.TrimmedDataSource;
import nom.bdezonia.zorbage.datasource.WriteNotifyingDataSource;
import nom.bdezonia.zorbage.function.Function1;
import nom.bdezonia.zorbage.procedure.Procedure2;
import nom.bdezonia.zorbage.storage.array.ArrayStorage;
import nom.bdezonia.zorbage.type.bool.BooleanMember;
import nom.bdezonia.zorbage.type.complex.float32.ComplexFloat32Member;
import nom.bdezonia.zorbage.type.integer.int1.UnsignedInt1Member;
import nom.bdezonia.zorbage.type.integer.int10.UnsignedInt10Algebra;
import nom.bdezonia.zorbage.type.integer.int10.UnsignedInt10Member;
import nom.bdezonia.zorbage.type.integer.int12.UnsignedInt12Member;
import nom.bdezonia.zorbage.type.integer.int16.SignedInt16Algebra;
import nom.bdezonia.zorbage.type.integer.int16.SignedInt16Member;
import nom.bdezonia.zorbage.type.integer.int32.SignedInt32Member;
import nom.bdezonia.zorbage.type.integer.int4.UnsignedInt4Member;
import nom.bdezonia.zorbage.type.real.float16.Float16Algebra;
import nom.bdezonia.zorbage.type.real.float16.Float16Member;
import nom.bdezonia.zorbage.type.real.float32.Float32Member;
import nom.bdezonia.zorbage.type.real.float64.Float64Member;
import nom.bdezonia.zorbage.type.real.highprec.HighPrecisionMember;
/**
* @author Barry DeZonia
*/
class DataSources {
// IndexedDataSource;
//
// IndexedDataSource is the interface around which much of Zorbage is organized.
// Many algorithms take IndexedDataSources as an input and calculate results
// from their data. An IndexedDataSource can be thought of as an array indexed
// by longs. In reality an IndexedDataSource might be an actual array, or a
// JDBC database table or a virtual file backed array or many other possibilities.
// Zorbage's algorithms do not have to worry about how the data is stored. Any
// structure can be walked using the interface. In fact in the future Zorbage
// may provide clustered data access through an IndexedDataSource interface.
void example1() {
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(G.DBL.construct(), 100);
Float64Member value = G.DBL.construct();
GetV.fifth(data, value);
value.setV(10101);
SetV.eighteenth(data, value);
}
// ArrayDataSource
//
// The regular Storage allocator requires that the type of data that will be stored
// implements certain primitive coder interfaces. Sometimes this is too restrictive.
// If someone defines a type that can't easily be stored to disk (like for instance
// when its primitive data does not have a fixed size) you can use this data source
// to wrap plain object array data so that it can be used with all of Zorbage's
// algorithms. Note that this code is type safe. It also is completely resident in
// ram and cannot be indexed beyond the range of a 32 bit integer so one is limited
// as to how much data can be allocated and processed. It can be useful for
// interfacing with other libraries that return arrays of objects.
void example2() {
IndexedDataSource list = ArrayDataSource.construct(G.HP, 1234);
// fill the list with values
Fill.compute(G.HP, G.HP.unity(), list);
// then calculate a result
HighPrecisionMember result = G.HP.construct();
Sum.compute(G.HP, list, result); // result should equal 1234
}
// BigListDataSource
//
// In java arrays are limited to the number of elements that can be represented by a
// 32-bit integer. That is the max size and also the most amount of ram a primitive
// data source can use. However in Zorbage the BigListDataSource allows very large
// lists to be contained in ram and to be indexed with 64-bit integers. The JVM can
// be tweaked to allocate lots of ram and the BigListDataSource class can take
// advantage of it. Internally it stores a list of lists.
void example3() {
IndexedDataSource list =
new BigListDataSource(G.INT16, 100000);
// elsewhere fill the list with values
// then calculate a result
SignedInt16Member result = G.INT16.construct();
Median.compute(G.INT16, list, result);
}
// ConcatenatedDataSource
//
// A ConcatenatedDataSource glues two other lists together so they can be treated
// as one. The concatenated lists can be passed to other algorithms.
void example4() {
IndexedDataSource list1 =
nom.bdezonia.zorbage.storage.Storage.allocate(G.UINT1.construct(), 100);
IndexedDataSource list2 =
nom.bdezonia.zorbage.storage.Storage.allocate(G.UINT1.construct(), 1000);
IndexedDataSource joinedList =
new ConcatenatedDataSource<>(list1, list2);
Fill.compute(G.UINT1, G.UINT1.random(), joinedList);
}
// ConditionalDataSource
//
// A ConditionalDataSource creates a list that only contains values from an
// underlying list that satisfy a condition. One of the design constraints on
// Zorbage is that for the most part all IndexedDataSources have a fixed size.
// Because of this ConditionalDataSources must also follow this contract. To
// do so you make sure that all list data writes only succeed with values that
// satisfy the original condition upon which the data source was built.
@SuppressWarnings("unused")
void example5() {
// allocate a list
IndexedDataSource list =
nom.bdezonia.zorbage.storage.Storage.allocate(G.UINT10.construct(), 100000);
// fill it with something
Fill.compute(G.UINT10, G.UINT10.random(), list);
// then make the condition "value is less than 44"
Function1 lessThan44 = new Function1()
{
@Override
public Boolean call(UnsignedInt10Member value) {
return value.v() < 44;
}
};
// get a view of all values that satisfy this
IndexedDataSource conditionalList =
new ConditionalDataSource(
G.UINT10, list, lessThan44);
// count how many values satisfy this constraint
long count = conditionalList.size();
// now get soem values that satisy the condition
UnsignedInt10Member value = G.UINT10.construct();
GetV.first(conditionalList, value);
// try to set the values to something else
value.setV(22);
// this succeeds since 22 < 44 and satisfies the condition
SetV.first(conditionalList, value);
value.setV(99);
// this fails since 99 >= 44 and does not satisfy the condition
// an exception will be thrown
SetV.first(conditionalList, value);
}
// FixedSizeDataSource
//
// Sometimes you have a requirement that an algorithm is expecting a certain size
// list and the one you have does not match. You can cap the size of a list using
// a FixedSizeDataSource. The FFT algorithm commonly needs to do this.
void example6() {
// allocate some data
IndexedDataSource data =
nom.bdezonia.zorbage.storage.Storage.allocate(G.CFLT.construct(), 1234);
// elsewhere fill it with something
// then define an out of bounds padding that is all zero
Procedure2 proc = new Procedure2()
{
@Override
public void call(Long a, ComplexFloat32Member b) {
b.setR(0);
b.setI(0);
}
};
// tie the padding and the zero proc together. reads beyond data's length will return 0
IndexedDataSource padded =
new ProcedurePaddedDataSource<>(G.CFLT, data, proc);
// compute an ideal power of two size that the FFT algorithm will want to use
long idealSize = FFT.enclosingPowerOf2(data.size());
// make the FixedsizeDataSource here that satisfies the FFT algorithm's requirements
IndexedDataSource fixedSize =
new FixedSizeDataSource<>(idealSize, padded);
// allocate the same amount of space for the results
IndexedDataSource outList =
nom.bdezonia.zorbage.storage.Storage.allocate(G.CFLT.construct(), idealSize);
// and compute the FFT
FFT.compute(G.CFLT, G.FLT, fixedSize, outList);
}
// ListDataSource
//
// The regular Storage allocator requires that the type of data that will be stored
// implements certain primitive coder interfaces. Sometimes this is too restrictive.
// If someone defines a type that can't easily be stored to disk (like for instance
// when its primitive data does not have a fixed size) you can use this data source
// to wrap plain object list data so that it can be used with all of Zorbage's
// algorithms. Note that this code is type safe. It also is completely resident in
// ram and cannot be indexed beyond the range of a 32 bit integer so one is limited
// as to how much data can be allocated and processed. It can be useful for
// interfacing with other libraries that return lists of objects.
void example7() {
// allocate the data
IndexedDataSource data = ListDataSource.construct(G.HP, 1234);
// fill the list with values
HighPrecisionMember value = G.HP.construct();
for (long i = 0; i < data.size(); i++) {
value.setV(BigDecimal.valueOf(i));
data.set(i, value);
}
// then calculate a result
HighPrecisionMember result = G.HP.construct();
StdDev.compute(G.HP, data, result);
}
// MaskedDataSource
//
// Sometimes you want to compute on values from a data source that have a logical
// classification. If you build a boolean mask that is true where you are interested
// in the value you can build a MaskedDataSource and do you computations upon only
// that data. An example mask might be booleans describing which values in the
// original list are contained within a threshold value.
void example8() {
// setup some data
IndexedDataSource list = ArrayStorage.allocate(G.DBL.construct(), 9);
// fill it with random values
Fill.compute(G.DBL, G.DBL.random(), list);
// build a mask
IndexedDataSource mask =
nom.bdezonia.zorbage.storage.Storage.allocate(
G.BOOL.construct(),
new boolean[] {true, false, false, true, false, false, true, true, true});
// make the filter
IndexedDataSource maskedData = new MaskedDataSource<>(list, mask);
// do some computations
maskedData.size(); // equals 5
// 3rd value in the masked data = the 7th value of the original list
Float64Member value = G.DBL.construct();
maskedData.get(3, value);
// compute a value on data only where the mask is true in the original dataset
Mean.compute(G.DBL, maskedData, value);
}
// ProcedureDataSource
//
// A ProcedureDataSource can be used to treat a mathematical or tabulated function
// as a source of data. One could define a Procedure that returns values from a
// table or from some computation and then wrap it and pass it to all the algorithms
// Zorbage provides.
void example9() {
// an uninteresting procedure we will calculate values from
Procedure2 proc = new Procedure2()
{
@Override
public void call(Long a, UnsignedInt12Member b) {
b.setV((int) (long) a);
}
};
// the ProcedureDataSource we will query
IndexedDataSource list = new ProcedureDataSource<>(proc);
// limit how much data to consider since procedures have a nearly unlimited x axis range
IndexedDataSource fixedList = new FixedSizeDataSource<>(1000, list);
// then calculate some values
UnsignedInt12Member value = G.UINT12.construct();
Sum.compute(G.UINT12, fixedList, value);
}
// ReadOnlyDataSource
//
// Sometimes you want to make sure that users of your list cannot modify its contents.
// Wrapping a data source in this read only wrapper can be just what you need.
void example10() {
// allocate a regular list
IndexedDataSource list =
nom.bdezonia.zorbage.storage.Storage.allocate(G.UINT4.construct(), 1000);
// fill it with data
Fill.compute(G.UINT4, G.UINT4.random(), list);
// protect it from writes
IndexedDataSource readonlyList = new ReadOnlyDataSource<>(list);
// now play with list
UnsignedInt4Member value = G.UINT4.construct();
// success
readonlyList.get(44, value);
// prepare to write
value.setV(100);
// failure: throws exception. writing not allowed
readonlyList.set(22, value);
}
// ReadOnlyHighPrecisionDataSource
//
// Use this data source to pull data out of an underlying data source as high
// precision numbers so you can do highly accurate calculations. This approach
// only uses minimal amounts of additional ram. The calculations are a bit
// slower than raw data access. Each single number of the underlying data source
// is translated to a high precision representation upon reads to the filter.
// Only one number is translated at a time as they are requested.
void example11() {
// allocate a regular list
IndexedDataSource list =
nom.bdezonia.zorbage.storage.Storage.allocate(G.UINT4.construct(), 1000);
// fill it with data
Fill.compute(G.UINT4, G.UINT4.random(), list);
// wrap it
IndexedDataSource highPrecData =
new ReadOnlyHighPrecisionDataSource<>(G.UINT4, list);
// do a high precision calculation
HighPrecisionMember result = G.HP.construct();
Mean.compute(G.HP, highPrecData, result);
// one thing to notice is that a sum/mean of a 1000 UINT4's would certainly
// overflow the range of an output UINT4. This is a good example of where the
// high precision approach makes sense even if perfect accuracy is not required.
// the high prec output cannot overflow.
}
// ReversedDataSource
//
// Sometimes you want to be able to pull values out of a list in reverse order.
// If you want you can write a for loop from max to min. But Zorbage does
// provide a ReversedDataSource filter in case you want to pass data in
// reverse order to algorithms or if you want to write straightforward for
// loops and just concentrate on the writing of your algorithm.
void example12() {
// an example of passing data to algorithms in reverse order
// make some data
IndexedDataSource nums =
nom.bdezonia.zorbage.storage.Storage.allocate(G.INT32.construct(),
new int[] {-1, 345, 1, -3044, 0, 0, 1066, -12});
// sort it
Sort.compute(G.INT32, nums);
// this is how the results look
// nums = [-3044, -12, -1, 0, 0, 1, 345, 1066]
// now wrap a reverse filter around the data
IndexedDataSource revNums = new ReversedDataSource<>(nums);
// sort using the filter
Sort.compute(G.INT32, revNums);
// this is how the results look
// revNums = [-3044, -12, -1, 0, 0, 1, 345, 1066]
// nums = [1066, 345, 1, 0, 0, -1, -12, -3044]
// now here is a some code showing some another way to use the filter
SignedInt32Member value = G.INT32.construct();
GetV.second(revNums, value);
// gets the second value in the reverse list. so it gets the second to last
// value in the original list.
// value = -12
}
// SequencedDataSource
//
// Sometimes you want to access a datasource in a strided fashion. Zorbage provides
// multidimensional iteration code for many use cases. But sometimes you know you
// just want to fill a column or a plane or a volume etc. You can use a strided
// accessor (a SequencedDataSource) if you want to accomplish things quickly and
// simply.
void example13() {
// create a list of zeroes
IndexedDataSource list =
nom.bdezonia.zorbage.storage.Storage.allocate(G.DBL.construct(), 1000);
// now setup a view that will increment by 3 starting at the list[4] and steps
// up to 100 times.
IndexedDataSource seqData =
new SequencedDataSource(list, 4, 3, 100);
seqData.size(); // size == 100
// now set a bunch of values
Float64Member value = G.DBL.construct();
for (long i = 0; i < seqData.size(); i++) {
value.setV(i);
seqData.set(i, value);
}
// data = [0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, ...]
}
// TransformedDataSource
//
// Sometimes you want to calculate a value on a data source that doesn't
// have an appropriate type. You can wrap the data source with a
// TransformedDatSource and do back and forth conversions as necessary.
// The transform needs to use a couple procedures for translating between
// types.
void example14() {
// original data: a bunch of ints
IndexedDataSource list =
nom.bdezonia.zorbage.storage.Storage.allocate(G.INT32.construct(), 100);
// a procedure to transforms ints to doubles
Procedure2 intToDblProc =
new Procedure2()
{
@Override
public void call(SignedInt32Member a, Float64Member b) {
b.setV(a.v());
}
};
// a procedure to transforms doubles to ints
Procedure2 dblToIntProc =
new Procedure2()
{
@Override
public void call(Float64Member a, SignedInt32Member b) {
b.setV((int) a.v());
}
};
// the definition of the transformed data source
IndexedDataSource xformer =
new TransformedDataSource<>(G.INT32, list, intToDblProc, dblToIntProc);
// now calculate some results. Notice that Mean can't normally be used on
// integer data
// not possible: Integers do not have the correct type of division operator
// SignedInt32Member resI = G.INT32.construct();
// Mean.compute(G.INT32, list, resI);
// with the transformer we can calc mean
Float64Member result = G.DBL.construct();
Mean.compute(G.DBL, xformer, result);
}
// TrimmedDataSource
//
// Make a list that is a window on another list. Sometimes you might want some
// calculation of the next N numbers of a list (for example sum the next 100
// numbers of a list). You can define a trimmed view of a list and pass the
// trimmed list to algorithms that do further calculations.
@SuppressWarnings("unused")
void example15() {
// make a list of 10,000 numbers
IndexedDataSource original =
nom.bdezonia.zorbage.storage.Storage.allocate(G.FLT.construct(), 10000);
// make a list that is a subset of the previous list (numbers from locations 1,000 - 2,999)
IndexedDataSource trimmed = new TrimmedDataSource<>(original, 1000, 2000);
// the trimmed list has length 2,000 and is indexed from 0 to 1,999 returning data from
// locations 1,000 - 2,999 in the original list.
}
// WriteNotifyingDataSource
//
// Make a list that notifies listeners every time it stores a value
void example16() {
// allocate 10,000 float16's
IndexedDataSource origData =
nom.bdezonia.zorbage.storage.Storage.allocate(G.HLF.construct(), 10000);
// wrap a notifier around it
WriteNotifyingDataSource observedData =
new WriteNotifyingDataSource<>(G.HLF, origData);
// observe the process
observedData.subscribe(new DataSourceListener()
{
private long lastPercent = -1;
@Override
public void notify(Float16Algebra alegbra, IndexedDataSource source, long index) {
long cutoff = source.size() / 100;
long percent = index / cutoff;
if (index >= source.size()-1) {
percent = 100;
}
if (percent != lastPercent) {
System.out.println("Operation is " + lastPercent + "done.");
lastPercent = percent;
}
}
});
// do the fill: we'll get a bunch of status updates
Float16Member value = new Float16Member(1234);
Fill.compute(G.HLF, value, observedData);
}
// DeepCopy
//
// Many of the data sources shown above make what is called a shallow copy of the
// data source around which they are wrapped. A TrimmedDataSource does not make a
// copy of the list it wraps but rather passes data input and output calls to it.
// Sometimes you might want to make a full copy of the data underlying any and all
// data sources you've chained together. For instance to calculate a Median of a
// data set you need to make a DeepCopy of the original data so you can sort it
// without affecting any of the underlying data sets. A DeepCopy returns a set
// of data with all data values copied to newly allocated memory (or disk storage
// as appropriate).
@SuppressWarnings("unused")
void example17() {
// make a list of 10,000 numbers
IndexedDataSource original =
nom.bdezonia.zorbage.storage.Storage.allocate(G.FLT.construct(), 10000);
// make a list that is a subset of the previous list (numbers from locations 1,000 - 2,999)
IndexedDataSource trimmed = new TrimmedDataSource<>(original, 1000, 2000);
// then create a new memory copy of those 2000 numbers
IndexedDataSource theCopy = DeepCopy.compute(G.FLT, trimmed);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy