fiftyone.mobile.detection.DatasetBuilder Maven / Gradle / Ivy
/* *********************************************************************
* This Source Code Form is copyright of 51Degrees Mobile Experts Limited.
* Copyright © 2017 51Degrees Mobile Experts Limited, 5 Charlotte Close,
* Caversham, Reading, Berkshire, United Kingdom RG4 7BY
*
* This Source Code Form is the subject of the following patents and patent
* applications, owned by 51Degrees Mobile Experts Limited of 5 Charlotte
* Close, Caversham, Reading, Berkshire, United Kingdom RG4 7BY:
* European Patent No. 2871816;
* European Patent Application No. 17184134.9;
* United States Patent Nos. 9,332,086 and 9,350,823; and
* United States Patent Application No. 15/686,066.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0.
*
* If a copy of the MPL was not distributed with this file, You can obtain
* one at http://mozilla.org/MPL/2.0/.
*
* This Source Code Form is "Incompatible With Secondary Licenses", as
* defined by the Mozilla Public License, v. 2.0.
* ********************************************************************* */
package fiftyone.mobile.detection;
import fiftyone.mobile.detection.cache.*;
import fiftyone.mobile.detection.entities.*;
import fiftyone.mobile.detection.entities.Map;
import fiftyone.mobile.detection.entities.headers.Header;
import fiftyone.mobile.detection.entities.memory.MemoryFixedList;
import fiftyone.mobile.detection.entities.memory.PropertiesList;
import fiftyone.mobile.detection.entities.stream.IntegerList;
import fiftyone.mobile.detection.factories.*;
import fiftyone.mobile.detection.factories.stream.NodeStreamFactoryV31;
import fiftyone.mobile.detection.factories.stream.NodeStreamFactoryV32;
import fiftyone.mobile.detection.factories.stream.ProfileStreamFactory;
import fiftyone.mobile.detection.readers.BinaryReader;
import java.io.File;
import java.io.IOException;
import java.util.*;
import static fiftyone.mobile.detection.DatasetBuilder.CacheType.*;
/**
* Allows construction of a {@link Dataset}.
*
* Use as follows:
*
* // for stream dataset read from byte array buffer
* Dataset dataset = DatasetBuilder.buffer()
* // to use caching
* .configureDefaultCaches()
* .build(array);
*
* // for stream dataset read from file
* Dataset dataset = DatasetBuilder.file()
* // to use caching (recommended)
* .configureDefaultCaches()
* // if a temporary file (deleted on dataset close)
* .setTempFile()
* // to set the date explicitly
* .lastModified(date)
* .build(filename);
*
*
*/
/**
* Used for creating a DataSet.
* This uses the fluent builder pattern to create a DataSet with
* minimal effort from the user initially while also allowing a
* deep level of customisation if required.
*/
public class DatasetBuilder {
/* Default Cache sizes */
public static final int STRINGS_CACHE_SIZE = 5000;
public static final int NODES_CACHE_SIZE = 15000;
public static final int VALUES_CACHE_SIZE = 5000;
public static final int PROFILES_CACHE_SIZE = 600;
public static final int SIGNATURES_CACHE_SIZE = 500;
private static ICacheBuilder lruBuilder = LruCache.builder();
private static EnumMap defaultCacheSizes =
new EnumMap(CacheType.class);
static {
defaultCacheSizes.put(StringsCache, new CacheOptions(STRINGS_CACHE_SIZE, lruBuilder));
defaultCacheSizes.put(NodesCache, new CacheOptions(NODES_CACHE_SIZE, lruBuilder));
defaultCacheSizes.put(ValuesCache, new CacheOptions(VALUES_CACHE_SIZE, lruBuilder));
defaultCacheSizes.put(ProfilesCache, new CacheOptions(PROFILES_CACHE_SIZE, lruBuilder));
defaultCacheSizes.put(SignaturesCache, new CacheOptions(SIGNATURES_CACHE_SIZE, lruBuilder));
}
private static EnumMap MtCacheSizes =
new EnumMap(CacheType.class);
static {
MtCacheSizes.put(StringsCache, new CacheOptions(500, lruBuilder));
MtCacheSizes.put(NodesCache, new CacheOptions(90000, lruBuilder));
MtCacheSizes.put(ValuesCache, new CacheOptions(500, lruBuilder));
MtCacheSizes.put(ProfilesCache, new CacheOptions(7500, lruBuilder));
MtCacheSizes.put(SignaturesCache, new CacheOptions(60000, lruBuilder));
}
private static EnumMap StCacheSizes =
new EnumMap(CacheType.class);
static {
StCacheSizes.put(StringsCache, new CacheOptions(66000, lruBuilder));
StCacheSizes.put(NodesCache, new CacheOptions(100000, lruBuilder));
StCacheSizes.put(ValuesCache, new CacheOptions(14000, lruBuilder));
StCacheSizes.put(ProfilesCache, new CacheOptions(14000, lruBuilder));
StCacheSizes.put(SignaturesCache, new CacheOptions(100000, lruBuilder));
}
private static EnumMap StlmCacheSizes =
new EnumMap(CacheType.class);
static {
StlmCacheSizes.put(StringsCache, new CacheOptions(0, lruBuilder));
StlmCacheSizes.put(NodesCache, new CacheOptions(40000, lruBuilder));
StlmCacheSizes.put(ValuesCache, new CacheOptions(40000, lruBuilder));
StlmCacheSizes.put(ProfilesCache, new CacheOptions(40000, lruBuilder));
StlmCacheSizes.put(SignaturesCache, new CacheOptions(0, lruBuilder));
}
private static EnumMap MtlmCacheSizes =
new EnumMap(CacheType.class);
static {
MtlmCacheSizes.put(StringsCache, new CacheOptions(20500, lruBuilder));
MtlmCacheSizes.put(NodesCache, new CacheOptions(40000, lruBuilder));
MtlmCacheSizes.put(ValuesCache, new CacheOptions(500, lruBuilder));
MtlmCacheSizes.put(ProfilesCache, new CacheOptions(20500, lruBuilder));
MtlmCacheSizes.put(SignaturesCache, new CacheOptions(0, lruBuilder));
}
private static EnumMap HcCacheSizes =
new EnumMap(CacheType.class);
static {
HcCacheSizes.put(StringsCache, new CacheOptions(0, null));
HcCacheSizes.put(NodesCache, new CacheOptions(0, null));
HcCacheSizes.put(ValuesCache, new CacheOptions(0, null));
HcCacheSizes.put(ProfilesCache, new CacheOptions(0, null));
HcCacheSizes.put(SignaturesCache, new CacheOptions(0, null));
}
/**
* Cache types for Stream Dataset
*/
public enum CacheType {
StringsCache, NodesCache, ValuesCache, ProfilesCache, SignaturesCache
}
@SuppressWarnings("unused")
public interface CacheSet {
java.util.Map getCacheConfiguration();
}
public enum CacheTemplate implements CacheSet {
/**
* The default settings. Testing has shown that these settings offer
* all-around good performance. Suitable for most users but not optimised
* for any specific environment.
*/
Default(defaultCacheSizes),
/**
* Cache sizes optimised for a single threaded environment using a JVM
* limited to around 100 Mb. Memory usage without a User-Agent cache
* is expected to be around 60-70 Mb.
*
* Bear in mind that actual memory load is dependent on the data file type
* (i.e. Lite, Premium, etc) and to some extent upon the way in which the
* API is being used.
*/
SingleThreadLowMemory(StlmCacheSizes),
/**
* Cache sizes optimised for a single threaded environment using a JVM
* limited to between 150-250 Mb. Memory usage without a User-Agent cache
* is expected to be around 80-90 Mb.
*
* Bear in mind that actual memory load is dependent on the data file type
* (i.e. Lite, Premium, etc) and to some extent upon the way in which the
* API is being used.
*/
SingleThread(StCacheSizes),
/**
* Cache sizes optimised for a multi threaded environment using a JVM
* limited to around 100 Mb. Memory usage without a User-Agent cache
* is expected to be around 60-70 Mb.
*
* Bear in mind that actual memory load is dependent on the data file type
* (i.e. Lite, Premium, etc) and to some extent upon the way in which the
* API is being used.
*/
MultiThreadLowMemory(MtlmCacheSizes),
/**
* Cache sizes optimised for a multi threaded environment using a JVM
* limited to between 150-250 Mb. Memory usage without a User-Agent cache
* is expected to be around 80-90 Mb.
*
* Bear in mind that actual memory load is dependent on the data file type
* (i.e. Lite, Premium, etc) and to some extent upon the way in which the
* API is being used.
*/
MultiThread(MtCacheSizes),
/**
* Cache sizes optimised for a highly concurrent environment, around 100
* threads and up. Memory usage without a User-Agent cache
* is expected to be around 40-120 Mb depending on the size limit imposed
* on the JVM.
*
* Bear in mind that actual memory load is dependent on the data file type
* (i.e. Lite, Premium, etc) and to some extent upon the way in which the
* API is being used.
*/
HighConcurrency(HcCacheSizes);
private EnumMap configuration =
new EnumMap(CacheType.class);
CacheTemplate(EnumMap configuration) {
this.configuration.putAll(configuration);
}
@Override
public java.util.Map getCacheConfiguration(){
return configuration;
}
}
private java.util.Map cacheMap = new EnumMap(CacheType.class);
// prevent direct construction
private DatasetBuilder() {
}
/**
* Create a stream file dataset
*/
public static BuildFromFile file() {
return new DatasetBuilder().new BuildFromFile();
}
/**
* Create a stream buffer dataset
*/
public static BuildFromBuffer buffer() {
return new DatasetBuilder().new BuildFromBuffer();
}
/**
* Holds cache methods for buffer and file stream mode
*/
@SuppressWarnings("WeakerAccess")
public class Cachable> {
// no direct instantiation
private Cachable () {
}
/**
* Set a cache builder to use for the specified type of cache
* @param cacheType The cache type
* @param builder The cache builder used to create the cache.
* If null is passed then the specified cache type
* will operate without a cache.
* @return The {@link DatasetBuilder}
*/
public T setCacheBuilder(CacheType cacheType, ICacheBuilder builder){
if(cacheMap.containsKey(cacheType)) {
cacheMap.get(cacheType).setBuilder(builder);
} else {
cacheMap.put(cacheType, new CacheOptions(
CacheTemplate.Default.getCacheConfiguration().get(cacheType).getSize(), builder));
}
return (T) this;
}
/**
* Set cache builders for multiple cache types
* @param map A map of cache types and associated cache builders.
* Where a null builder is supplied the associated cache
* type will operate without a cache
* @return The {@link DatasetBuilder}
*/
public T setCacheBuilders(java.util.Map map){
for (CacheType cacheType: map.keySet()) {
setCacheBuilder(cacheType, map.get(cacheType));
}
return (T) this;
}
/**
* Set a size to use for the specified type of cache
* @param cacheType The cache type
* @param cacheSize The size used when creating the cache.
* @return The {@link DatasetBuilder}
*/
public T setCacheSize(CacheType cacheType, int cacheSize){
if(cacheMap.containsKey(cacheType)) {
cacheMap.get(cacheType).setSize(cacheSize);
} else {
cacheMap.put(cacheType, new CacheOptions(
cacheSize, CacheTemplate.Default.getCacheConfiguration().get(cacheType).getBuilder()));
}
return (T) this;
}
/**
* Set cache sizes for multiple cache types
* @param map A map of cache types and associated cache sizes.
* @return The {@link DatasetBuilder}
*/
public T setCacheSizes(java.util.Map map){
for (CacheType cacheType: map.keySet()) {
setCacheSize(cacheType, map.get(cacheType));
}
return (T) this;
}
/**
* Set the builder and size parameter for the specified cache type
* @param cacheType the type
* @param options An {@link ICacheOptions} object that
* specifies a cache builder and size to use when
* constructing the specified cache type
*/
public T configureCache(CacheType cacheType, ICacheOptions options) {
cacheMap.put(cacheType, options);
//noinspection unchecked
return (T) this;
}
/**
* Set builders and size parameters for multiple cache types
* @param map a map of {@link ICacheBuilder} and size parameters
* to use when constructing each cache type
*/
public T configureCaches(java.util.Map map) {
cacheMap.putAll(map);
//noinspection unchecked
return (T) this;
}
/**
* Initialises the {@link DatasetBuilder} with the default cache configuration.
* Individual elements of this configuration can be overridden by using the
* ConfigureCache, ConfigureCaches, SetCacheBuilder and SetCacheBuilders methods
*/
public T configureDefaultCaches() {
configureCaches(CacheTemplate.Default.getCacheConfiguration());
//noinspection unchecked
return (T) this;
}
/**
* Add cache configuration from a predefined cache template.
* Individual elements of this configuration can be overridden by using the
* ConfigureCache, ConfigureCaches, SetCacheBuilder and SetCacheBuilders methods
* @param template A {@link CacheTemplate} that defines the desired
* cache configuration
*/
public T configureCachesFromTemplate(CacheTemplate template) {
return configureCachesFromCacheSet(template);
}
/**
* Add caches from a CacheSet
* To add, say, your own Template with the default LruCache
* do as follows:
*
*
* addCachesFromCacheSet(myCacheSet, LruCache.builder())
*
*
* @param set the template
*/
public T configureCachesFromCacheSet(CacheSet set) {
configureCaches(set.getCacheConfiguration());
//noinspection unchecked
return (T) this;
}
}
/**
* Buffer dataset builder
*/
public class BuildFromBuffer extends Cachable{
// cannot be instantiated directly
private BuildFromBuffer() {
}
/**
* build the dataset from a buffer
* @param buffer the buffer
*/
public IndirectDataset build(byte[] buffer) throws IOException {
IndirectDataset dataSet = new IndirectDataset(buffer, Modes.MEMORY_MAPPED);
loadForStreaming(dataSet, cacheMap);
return dataSet;
}
}
/**
* File dataset builder
*/
@SuppressWarnings("WeakerAccess")
public class BuildFromFile extends Cachable {
private boolean isTempFile = false;
private Date lastModified = null;
// cannot be instantiated directly
private BuildFromFile() {
}
/**
* If this dataset is built from a file, delete the file after close
*/
public BuildFromFile setTempFile() {
isTempFile = true;
return this;
}
/**
* If this dataset is built from a file
* @param isTemp if true, delete the file after close
*/
public BuildFromFile setTempFile(boolean isTemp) {
isTempFile = isTemp;
return this;
}
/**
* If this dataset is built from a file, override the creation date
* @param date the date
*/
public BuildFromFile lastModified(Date date) {
lastModified = date;
return this;
}
/**
* build the dataset from a file
* @param filename the filename to build from
*/
public IndirectDataset build(String filename) throws IOException {
Date modDate = lastModified;
if (modDate == null) {
modDate = new Date(new File(filename).lastModified());
}
IndirectDataset dataSet = new IndirectDataset(filename, modDate, Modes.FILE, isTempFile);
loadForStreaming(dataSet, cacheMap);
return dataSet;
}
}
/*
public Memory memory() {
return new DatasetBuilder().new Memory();
}
public class Memory extends DatasetBuilder {
public Memory init() {
init = true;
return this;
}
public Memory init(boolean isInit) {
init = isInit;
return this;
}
public Dataset build(String filename) throws IOException {
Date modDate = lastModified;
if (modDate.equals(DATE_NONE)) {
modDate = new Date(new File(filename).lastModified());
}
Dataset dataSet = new Dataset(modDate, Modes.MEMORY);
loadForMemory(dataSet, init);
return dataSet;
}
public Dataset build(byte[] buffer) throws IOException {
Date modDate = lastModified;
if (modDate.equals(DATE_NONE)) {
modDate = new Date();
}
Dataset dataSet = new Dataset(modDate, Modes.MEMORY);
loadForMemory(dataSet, init);
return dataSet;
}
}
*/
/**
* Class adapts an EntityFactory to a Loader
*
* @param type of the entity
*/
private static class EntityLoader implements IValueLoader {
final IndirectDataset dataset;
final BaseEntityFactory entityFactory;
final Header header;
boolean fixedLength = false;
EntityLoader(Header header, IndirectDataset dataset, BaseEntityFactory entityFactory) {
this.dataset = dataset;
this.entityFactory = entityFactory;
this.header = header;
try {
getEntityFactory().getLength();
fixedLength = true;
} catch (UnsupportedOperationException ignored) {
// expected for variable length entities
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public V load(Integer key) throws IOException {
BinaryReader reader = dataset.pool.getReader();
try {
if (fixedLength) {
reader.setPos(header.getStartPosition()
+ (getEntityFactory().getLength() * key));
} else {
reader.setPos(header.getStartPosition() + key);
}
return entityFactory.create(dataset, key, reader);
} finally {
dataset.pool.release(reader);
}
}
@SuppressWarnings("WeakerAccess")
public int nextPosition(int position, V result) throws IOException {
if (fixedLength) {
return ++position;
} else {
// this method supported only for variable length entities
return position + getEntityFactory().getLength(result);
}
}
@SuppressWarnings("WeakerAccess")
public BaseEntityFactory getEntityFactory() {
return entityFactory;
}
public Header getHeader() {
return header;
}
}
/**
* A cacheing entity loader that uses an {@link LruCache}
*
* @param type of entity
*/
private static class LruEntityLoader extends EntityLoader {
private LruCache cache;
LruEntityLoader(final Header header, final IndirectDataset dataset, final BaseEntityFactory entityFactory, LruCache cache) {
super(header, dataset, entityFactory);
this.cache = cache;
this.cache.setCacheLoader(new EntityLoader(header, dataset, entityFactory));
}
@Override
public V load(Integer key) throws IOException {
return cache.get(key);
}
}
/**
* A caching entity loader that uses a {@link IPutCache}
*
* @param type of entity
*/
private static class CachedEntityLoader extends EntityLoader {
private IPutCache cache;
CachedEntityLoader(Header header, IndirectDataset dataset, BaseEntityFactory entityFactory, IPutCache cache) {
super(header, dataset, entityFactory);
this.cache = cache;
}
@Override
public V load(Integer key) throws IOException {
V value;
value = cache.get(key);
if (value == null) {
value = super.load(key);
if (value != null) {
cache.put(key, value);
}
}
return value;
}
}
/**
* Implementation of IReadOnlyList for Streams
*
* @param type of entity
*/
private static class StreamList implements IReadonlyList {
private EntityLoader loader;
StreamList(EntityLoader loader) {
this.loader = loader;
}
@Override
public T get(int i) throws IOException {
return loader.load(i);
}
@Override
public int size() {
return this.loader.getHeader().getCount();
}
@Override
public void close() throws IOException {
}
@Override
public Iterator iterator() {
return new Iterator() {
// the item number
int count = 0;
// the position in the file or the item number (as above)
// depending on whether the entity is fixed or variable size
int position = 0;
// number of elements
int total = loader.getHeader().getCount();
@Override
public boolean hasNext() {
return count < total;
}
@Override
public T next() {
try {
if (count >= total) {
throw new NoSuchElementException();
}
T result = get(position);
count++;
position = loader.nextPosition(position, result);
return result;
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException("remove not supported");
}
};
}
}
/**
* helper to create an appropriate loader for a cached list given the cache type
*
* @param header the header defining the list this will create the loader for
* @param cache the cache, or null
* @param dataset the dataset
* @param factory the factory for the type
* @param the type
* @return an entity loader
*/
@SuppressWarnings("unchecked")
private static EntityLoader getLoaderFor(Header header, ICache cache, IndirectDataset dataset, BaseEntityFactory factory) {
EntityLoader loader;
if (cache == null) {
loader = new EntityLoader(header, dataset, factory);
} else if (cache instanceof LruCache) {
loader = new LruEntityLoader(header, dataset, factory, (LruCache) cache);
} else if (cache instanceof IPutCache) {
loader = new CachedEntityLoader(header, dataset, factory, (IPutCache) cache);
} else {
throw new IllegalStateException("Cache must be null, LruCache or IPutCache");
}
return loader;
}
/**
* Load the necessary values from the data
* file in to the Dataset. Stream mode only loads the essential information
* such as file headers.
*
* @param dataSet The dataset object to load in to.
* @throws IOException if there was a problem accessing data file.
*/
@SuppressWarnings("null")
private static void loadForStreaming(IndirectDataset dataSet, java.util.Map cacheConfiguration) throws IOException {
BinaryReader reader = dataSet.pool.getReader();
try {
java.util.Map cacheMap = buildCaches(cacheConfiguration);
dataSet.setCacheMap(cacheMap);
reader.setPos(0);
//Load headers that are common for both V31 and V32.
CommonFactory.loadHeader(dataSet, reader);
EntityLoader loader = getLoaderFor(new Header(reader), cacheMap.get(StringsCache), dataSet, new AsciiStringFactory());
dataSet.strings = new StreamList(loader);
MemoryFixedList components;
switch (dataSet.versionEnum) {
case PatternV31:
components = new MemoryFixedList(
dataSet, reader, new ComponentFactoryV31());
break;
case PatternV32:
components = new MemoryFixedList(
dataSet, reader, new ComponentFactoryV32());
break;
default:
throw new IllegalStateException("Unknown data version number");
}
dataSet.components = components;
MemoryFixedList