math.gemm.Dgemm4x4Worker Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of math-base Show documentation
Show all versions of math-base Show documentation
Elementary math utilities with a focus on random number generation, non-linear optimization, interpolation and solvers
The newest version!
/*
* Copyright 2018 Stefan Zobel
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package math.gemm;
import java.util.concurrent.Callable;
final class Dgemm4x4Worker implements Callable {
// controls for loop-execution
private final int m_nblo;
private final int m_nbhi;
// private final int m_kblo;
private final int m_kbhi;
private final int m_mblo;
private final int m_mbhi;
private final boolean m_nbfull;
// private final boolean m_kbfull;
private final boolean m_mbfull;
@SuppressWarnings("unused")
private final String m_loop;
@SuppressWarnings("unused")
private final int m_tasks;
// the whole rest
private final int m_nc;
private final int m_kc;
private final int m_mc;
private final double m_alpha;
private final int m_offA;
private final double[] m_A;
private final int m_incRowA;
private final int m_incColA;
private final int m_offB;
private final double[] m_B;
private final int m_incRowB;
private final int m_incColB;
private final double m_beta;
private final int m_offC;
private final double[] m_C;
private final int m_incRowC;
private final int m_incColC;
Dgemm4x4Worker(DgemmTasks.TaskConfig cfg, int _nc, int _kc, int _mc, double alpha, int offA, double[] A,
int incRowA, int incColA, int offB, double[] B, int incRowB, int incColB, double beta, int offC, double[] C,
int incRowC, int incColC) {
// unpack loop controls
m_nblo = cfg.nb_lo;
m_nbhi = cfg.nb_hi;
m_nbfull = !cfg.nb_hi_is_last;
// m_kblo = cfg.kb_lo;
m_kbhi = cfg.kb_hi;
// m_kbfull = !cfg.kb_hi_is_last;
m_mblo = cfg.mb_lo;
m_mbhi = cfg.mb_hi;
m_mbfull = !cfg.mb_hi_is_last;
m_loop = cfg.loop;
m_tasks = cfg.tasks;
// set the rest
m_nc = _nc;
m_kc = _kc;
m_mc = _mc;
m_alpha = alpha;
m_offA = offA;
m_A = A;
m_incRowA = incRowA;
m_incColA = incColA;
m_offB = offB;
m_B = B;
m_incRowB = incRowB;
m_incColB = incColB;
m_beta = beta;
m_offC = offC;
m_C = C;
m_incRowC = incRowC;
m_incColC = incColC;
}
@Override
public Integer call() throws Exception {
//
// hoist local variables
//
final int _nc = m_nc;
final int _kc = m_kc;
final int _mc = m_mc;
final double alpha = m_alpha;
final int offA = m_offA;
final double[] A = m_A;
final int incRowA = m_incRowA;
final int incColA = m_incColA;
final int offB = m_offB;
final double[] B = m_B;
final int incRowB = m_incRowB;
final int incColB = m_incColB;
final double beta = m_beta;
final int offC = m_offC;
final double[] C = m_C;
final int incRowC = m_incRowC;
final int incColC = m_incColC;
//
// Local buffers for storing panels from A, B and C
//
final double[] _A = new double[Dgemm4x4.MC * Dgemm4x4.KC];
final double[] _B = new double[Dgemm4x4.KC * Dgemm4x4.NC];
final double[] _C = new double[Dgemm4x4.MR_Height * Dgemm4x4.NR_Width];
final double[] AB = new double[Dgemm4x4.MR_Height * Dgemm4x4.NR_Width];
// counter for microkernel calls
int micro_kernel_calls = 0;
// hoist loop bounds
final int nblo = m_nblo;
final int nbhi = m_nbhi;
final boolean nbfull = m_nbfull;
// final int kblo = m_kblo;
final int kbhi = m_kbhi;
// final boolean kbfull = m_kbfull;
final int mblo = m_mblo;
final int mbhi = m_mbhi;
final boolean mbfull = m_mbfull;
// System.out.println(
// "computation on " + Thread.currentThread().getName() + " - loop : " + m_loop + " , tasks: " + m_tasks);
for (int j = nblo; j < nbhi; ++j) {
int nc = (_nc == 0 || nbfull || (!nbfull && j != nbhi - 1)) ? Dgemm4x4.NC : _nc;
// kb loop can't be parallelized!
for (int l = 0; l < kbhi; ++l) {
int kc = (l != kbhi - 1 || _kc == 0) ? Dgemm4x4.KC : _kc;
double _beta = (l == 0) ? beta : 1.0;
Dgemm4x4.pack_B(kc, nc, (offB + l * Dgemm4x4.KC * incRowB + j * Dgemm4x4.NC * incColB), B, incRowB,
incColB, _B);
for (int i = mblo; i < mbhi; ++i) {
int mc = (_mc == 0 || mbfull || (!mbfull && i != mbhi - 1)) ? Dgemm4x4.MC : _mc;
Dgemm4x4.pack_A(mc, kc, (offA + i * Dgemm4x4.MC * incRowA + l * Dgemm4x4.KC * incColA), A, incRowA,
incColA, _A);
micro_kernel_calls += Dgemm4x4.dgemm_macro_kernel(mc, nc, kc, alpha, _beta,
(offC + i * Dgemm4x4.MC * incRowC + j * Dgemm4x4.NC * incColC), C, incRowC, incColC, _A, _B,
AB, _C);
}
}
}
return micro_kernel_calls;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy