org.apache.sysml.hops.rewrite.RewriteSplitDagUnknownCSVRead Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.hops.rewrite;
import java.util.ArrayList;
import org.apache.sysml.hops.DataOp;
import org.apache.sysml.hops.Hop;
import org.apache.sysml.hops.Hop.DataOpTypes;
import org.apache.sysml.hops.Hop.FileFormatTypes;
import org.apache.sysml.hops.Hop.VisitStatus;
import org.apache.sysml.hops.HopsException;
import org.apache.sysml.parser.DataIdentifier;
import org.apache.sysml.parser.StatementBlock;
import org.apache.sysml.parser.VariableSet;
/**
* Rule: Split Hop DAG after CSV reads with unknown size. This is
* important to create recompile hooks if format is read from mtd
* (we are not able to split it on statementblock creation) and
* mtd has unknown size (which can only happen for CSV).
*
*/
public class RewriteSplitDagUnknownCSVRead extends StatementBlockRewriteRule
{
@Override
public ArrayList rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state)
throws HopsException
{
ArrayList ret = new ArrayList();
//collect all unknown csv reads hops
ArrayList cand = new ArrayList();
collectCSVReadHopsUnknownSize( sb.get_hops(), cand );
//split hop dag on demand
if( !cand.isEmpty() )
{
try
{
//duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setAllPositions(sb.getFilename(), sb.getBeginLine(), sb.getBeginColumn(), sb.getEndLine(), sb.getEndColumn());
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
//move csv reads incl reblock to new statement block
//(and replace original persistent read with transient read)
ArrayList sb1hops = new ArrayList();
for( Hop c : cand )
{
Hop reblock = c;
long rlen = reblock.getDim1();
long clen = reblock.getDim2();
long nnz = reblock.getNnz();
long brlen = reblock.getRowsInBlock();
long bclen = reblock.getColsInBlock();
//create new transient read
DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(),
DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, tread);
//replace reblock with transient read
ArrayList parents = new ArrayList(reblock.getParent());
for( int i=0; i roots, ArrayList cand )
{
if( roots == null )
return;
Hop.resetVisitStatus(roots);
for( Hop root : roots )
collectCSVReadHopsUnknownSize(root, cand);
}
/**
*
* @param root
* @param cand
*/
private void collectCSVReadHopsUnknownSize( Hop hop, ArrayList cand )
{
if( hop.getVisited() == VisitStatus.DONE )
return;
//collect persistent reads (of type csv, with unknown size)
if( hop instanceof DataOp )
{
DataOp dop = (DataOp) hop;
if( dop.getDataOpType() == DataOpTypes.PERSISTENTREAD
&& dop.getInputFormatType() == FileFormatTypes.CSV
&& !dop.dimsKnown()
&& !HopRewriteUtils.hasOnlyWriteParents(dop, true, false)
&& !HopRewriteUtils.hasTransformParents(hop) )
{
cand.add(dop);
}
}
//process children
if( hop.getInput()!=null )
for( Hop c : hop.getInput() )
collectCSVReadHopsUnknownSize(c, cand);
hop.setVisited(VisitStatus.DONE);
}
}