![JAR search and dependency download from the Maven repository](/logo.png)
org.github.evenjn.align.alphabet.TupleAlignmentAlphabetDataManager Maven / Gradle / Ivy
/**
*
* Copyright 2016 Marco Trevisan
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.github.evenjn.align.alphabet;
import java.util.function.Consumer;
import java.util.function.Function;
import org.github.evenjn.knit.BasicAutoHook;
import org.github.evenjn.knit.Bi;
import org.github.evenjn.knit.KnittingCursable;
import org.github.evenjn.knit.ProgressManager;
import org.github.evenjn.yarn.AutoHook;
import org.github.evenjn.yarn.Cursable;
import org.github.evenjn.yarn.Di;
import org.github.evenjn.yarn.Hook;
import org.github.evenjn.yarn.Progress;
import org.github.evenjn.yarn.ProgressSpawner;
import org.github.evenjn.yarn.Tuple;
/*
* This object acts as a preprocessor for systems that work on alingment graphs.
*
* Its function is to transform a dataset of tuple pairs into a dataset of
* alignment graphs.
*
* It also provides information about the dataset such as the length of the
* longest tuples and the maximum number of edges occurring in a graph.
*
*
*
*/
public class TupleAlignmentAlphabetDataManager {
private TupleAlignmentAlphabetBuilder builder;
/**
* There are four possible configurations that affect caching behaviour.
*
* reader is null, writer is null
*
* In this case, data is transformed and passed over. No caching occurs.
*
* reader is null, writer is not null
*
* In this case, data is transformed and cached. However, the cache created
* this way is not used. Data is transformed and passed over.
*
* reader is not null, writer is null
*
* In this case, the incoming data is discarded, and loaded from cache
* instead. Cache is only read, not written.
*
* reader is not null, writer is not null
*
* In this case, the data is transformed and cached. Then data is loaded from
* cache and passed over.
*
*/
public TupleAlignmentAlphabetDataManager(
int min_below,
int max_below,
TupleAlignmentAlphabetBuilder builder,
Function> writer,
Cursable reader,
Function a_serializer,
Function b_serializer,
Function a_deserializer,
Function b_deserializer,
Function a_printer,
Function b_printer,
Function> logger) {
this.min_below = min_below;
this.max_below = max_below;
this.builder = builder != null ? builder
: new TupleAlignmentAlphabetGreedyBuilder( false );
this.writer = writer;
this.reader = reader;
this.a_serializer = a_serializer;
this.b_serializer = b_serializer;
this.a_deserializer = a_deserializer;
this.b_deserializer = b_deserializer;
this.a_printer = a_printer;
this.b_printer = b_printer;
this.logger = logger;
}
private final int min_below;
private final int max_below;
private final Function> writer;
private final Function> logger;
private final Cursable reader;
private final Function a_printer;
private final Function b_printer;
private final Function a_serializer;
private final Function b_serializer;
private final Function a_deserializer;
private final Function b_deserializer;
private TupleAlignmentAlphabet alphabet;
public TupleAlignmentAlphabet getAlphabet( ) {
if ( alphabet == null ) {
throw new IllegalStateException( );
}
return alphabet;
}
public TupleAlignmentAlphabetDataManager load(
Cursable, Tuple>> data,
ProgressSpawner progress_spawner ) {
KnittingCursable, Tuple>> kc = KnittingCursable.wrap( data );
alphabet = prepareAlphabet( kc, progress_spawner );
return this;
}
private TupleAlignmentAlphabet prepareAlphabet(
KnittingCursable, Tuple>> data,
ProgressSpawner progress_spawner ) {
TupleAlignmentAlphabet coalignment_alphabet = null;
if ( null != writer || null == reader ) {
/*
* re-compute the coalignment alphabet.
*/
KnittingCursable, Tuple>> map =
data
.map( x -> ( new Bi, Tuple>( )
.set( x.front( ), x.back( ) ) ) );
try ( AutoHook hook = new BasicAutoHook( ) ) {
Progress spawn =
ProgressManager.safeSpawn( hook, progress_spawner, "prepareAlphabet" );
spawn.info( "Computing dataset size." );
int size = data.size( );
spawn.target( null != writer ? 2 * size : size );
spawn.info( "Working out alphabet" );
coalignment_alphabet =
createAlphabet( map, min_below, max_below, spawn );
/*
* serialize the coalignment alphabet, and pour it into the putter.
*/
if ( null != writer ) {
spawn.info( "Serializing alignment graphs" );
TupleAlignmentAlphabetSerializer serializer =
new TupleAlignmentAlphabetSerializer<>(
coalignment_alphabet,
a_serializer,
b_serializer );
KnittingCursable.wrap( serializer )
.tap( x -> spawn.step( 1 ) )
.consume(
writer );
}
}
}
if ( null != reader ) {
/*
* Otherwise, de-serialize it from the reader.
*/
try ( AutoHook hook = new BasicAutoHook( ) ) {
/**
* This is interesting, because the output of the serializer is not
* volatile, but how can we communicate that?
*/
coalignment_alphabet = KnittingCursable
.wrap( reader ).pull( hook )
.skipfold( new TupleAlignmentAlphabetDeserializer<>(
a_deserializer,
b_deserializer ) )
.one( );
}
// int count = 0;
// for (TupleAlignmentPair i : coalignment_alphabet) {
// StringBuilder sb = new StringBuilder( );
// sb.append( count++ ).append( " ").append( i.print( ) );
// System.out.println(sb.toString( ));
// }
}
return coalignment_alphabet;
}
private TupleAlignmentAlphabet
createAlphabet(
KnittingCursable, Tuple>> data,
int min_below,
int max_below,
Progress progress ) {
try ( AutoHook hook = new BasicAutoHook( ) ) {
builder.setPrinters( logger, a_printer, b_printer );
builder.setMinMax(0, 2);
return builder.build( data, progress );
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy