Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
*
* Copyright 2016 Marco Trevisan
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.github.evenjn.align.graph;
import java.util.Iterator;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.github.evenjn.knit.BasicAutoHook;
import org.github.evenjn.knit.KnittingCursable;
import org.github.evenjn.knit.KnittingCursor;
import org.github.evenjn.knit.ProgressManager;
import org.github.evenjn.yarn.AutoHook;
import org.github.evenjn.yarn.Cursable;
import org.github.evenjn.yarn.Di;
import org.github.evenjn.yarn.Hook;
import org.github.evenjn.yarn.Progress;
import org.github.evenjn.yarn.ProgressSpawner;
import org.github.evenjn.yarn.SkipException;
import org.github.evenjn.yarn.SkipMap;
import org.github.evenjn.yarn.Tuple;
/**
* This object acts as a preprocessor for systems that work on alingment graphs.
*
* Its function is to transform a dataset of tuple pairs into a dataset of
* alignment graphs.
*
* It also provides information about the dataset such as the length of the
* longest tuples and the maximum number of edges occurring in a graph.
*
*
* There are four possible configurations that affect caching behaviour.
*
* reader is null, writer is null
*
* In this case, data is transformed and passed over. No caching occurs.
*
* reader is null, writer is not null
*
* In this case, data is transformed and cached. However, the cache created this
* way is not used. Data is transformed and passed over.
*
* reader is not null, writer is null
*
* In this case, the incoming data is discarded, and loaded from cache instead.
* Cache is only read, not written.
*
* reader is not null, writer is not null
*
* In this case, the data is transformed and cached. Then data is loaded from
* cache and passed over.
*
*/
public class TupleAlignmentGraphDataManager {
private int record_max_length_above = 0;
private int record_max_length_below = 0;
private int record_max_number_of_edges = 0;
public TupleAlignmentGraphDataManager(
int min_below,
int max_below,
Function> putter_coalignment_graphs,
Cursable reader_coalignment_graphs ) {
this.min_below = min_below;
this.max_below = max_below;
this.putter_coalignment_graphs = putter_coalignment_graphs;
this.reader_coalignment_graphs = reader_coalignment_graphs;
}
private final int min_below;
private final int max_below;
private final Function> putter_coalignment_graphs;
private final Cursable reader_coalignment_graphs;
private KnittingCursable exposed_graphs;
public KnittingCursable getGraphs( ) {
if ( exposed_graphs == null ) {
throw new IllegalStateException( );
}
return exposed_graphs;
}
/**
* @return the maximum number of input symbols observed in the cached data.
*/
public int getMaxLenghtAbove( ) {
return record_max_length_above;
}
/**
* @return the maximum number of output symbols observed in the cached data.
*/
public int getMaxLenghtBelow( ) {
return record_max_length_below;
}
/**
* @return the maximum number of edges in a single tuple alignment graph
* observed in the cached data.
*/
public int getMaxNumberOfEdges( ) {
return record_max_number_of_edges;
}
public TupleAlignmentGraphDataManager load(
Cursable, Tuple>> data,
BiFunction, Integer> pair_encoder,
ProgressSpawner progress_spawner ) {
KnittingCursable, Tuple>> kc = KnittingCursable.wrap( data );
try ( AutoHook hook = new BasicAutoHook( ) ) {
Progress spawn =
ProgressManager.safeSpawn( hook, progress_spawner,
"prepareGraphs" );
exposed_graphs = prepareGraphs( kc, pair_encoder, spawn );
}
return this;
}
private boolean limits_are_computed = false;
private void computeLimits( Progress progress, KnittingCursable data ) {
try ( AutoHook hook = new BasicAutoHook( ) ) {
for ( TupleAlignmentGraph g : data.pull( hook ).once( ) ) {
int la = g.la( );
int lb = g.lb( );
if ( record_max_length_above < la ) {
record_max_length_above = la;
}
if ( record_max_length_below < lb ) {
record_max_length_below = lb;
}
int current_number_of_edges = 0;
Iterator iter = g.forward( );
while ( iter.hasNext( ) ) {
TupleAlignmentNode node = iter.next( );
int no_ie = node.number_of_incoming_edges;
current_number_of_edges = current_number_of_edges + no_ie;
}
if ( record_max_number_of_edges < current_number_of_edges ) {
record_max_number_of_edges = current_number_of_edges;
}
}
}
limits_are_computed = true;
}
private
KnittingCursable
prepareGraphs(
KnittingCursable, Tuple>> data,
BiFunction, Integer> pair_encoder,
Progress progress ) {
SkipMap, Tuple>, TupleAlignmentGraph> skipMap =
new SkipMap, Tuple>, TupleAlignmentGraph>( ) {
@Override
public TupleAlignmentGraph get(
Di, Tuple> x )
throws SkipException {
try {
return TupleAlignmentGraphFactory.graph(
pair_encoder,
x.front( ),
x.back( ),
min_below,
max_below );
}
catch ( NotAlignableException e ) {
throw SkipException.neo;
}
}
};
if ( null != putter_coalignment_graphs
|| null == reader_coalignment_graphs ) {
/*
* re-compute the coalignment graphs.
*
* This is a lazy iterator, so the graphs are computed on demand.
*/
if ( null != putter_coalignment_graphs ) {
progress.info( "Computing dataset size before computing limits." );
int progress_target = 0;
try ( AutoHook hook2 = new BasicAutoHook( ) ) {
Progress spawn = progress.spawn( hook2, "computing dataset size" );
progress_target = data.tap( x -> spawn.step( 1 ) ).size( );
}
progress.target( 2 * progress_target );
progress.info( "Computing limits." );
computeLimits( progress,
data.tap( x -> progress.step( 1 ) ).skipmap( skipMap ) );
progress.info( "Caching graphs." );
KnittingCursable graphs_to_write = data
.tap( x -> progress.step( 1 ) )
.skipmap( skipMap );
StringBuilder header = new StringBuilder( );
header.append( record_max_length_above );
header.append( "," );
header.append( record_max_length_below );
header.append( "," );
header.append( record_max_number_of_edges );
try ( AutoHook hook = new BasicAutoHook( ) ) {
KnittingCursor.on( header.toString( ) ).chain(
graphs_to_write
.pull( hook )
.unfoldCursable(
x -> new TupleAlignmentGraphSerializer( x ) ) )
.consume( putter_coalignment_graphs );
}
limits_are_computed = true;
}
}
if ( null != reader_coalignment_graphs ) {
try ( AutoHook hook = new BasicAutoHook( ) ) {
Pattern splitter = Pattern.compile( "," );
String[] split = splitter.split(
KnittingCursable.wrap( reader_coalignment_graphs ).head( 0, 1 )
.one( hook ) );
record_max_length_above = Integer.parseInt( split[0] );
record_max_length_below = Integer.parseInt( split[1] );
record_max_number_of_edges = Integer.parseInt( split[2] );
limits_are_computed = true;
}
/*
* de-serialize them from the reader.
*/
return KnittingCursable
.wrap( reader_coalignment_graphs )
.headless( 1 )
.skipfold( ( ) -> new TupleAlignmentGraphDeserializer(
record_max_length_above,
record_max_length_below ) );
}
else {
if ( !limits_are_computed ) {
progress.info(
"Computing dataset size before computing limits." );
int progress_target = 0;
try ( AutoHook hook2 = new BasicAutoHook( ) ) {
Progress spawn = progress.spawn( hook2, "computing dataset size" );
progress_target = data.tap( x -> spawn.step( 1 ) ).size( );
}
progress.target( progress_target );
progress.info( "Computing limits." );
computeLimits( progress,
data.tap( x -> progress.step( 1 ) ).skipmap( skipMap ) );
}
return data.skipmap( skipMap );
}
}
}