org.ojalgo.machine.Hardware Maven / Gradle / Ivy
Show all versions of ojalgo Show documentation
/*
* Copyright 1997-2024 Optimatika
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package org.ojalgo.machine;
import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;
import org.ojalgo.array.operation.COPY;
import org.ojalgo.netio.ASCII;
/**
*
* - The first element in the array should correspond to total system resources; the total amount of RAM and
* the total number of threads (Typically the same as what is returned by
* {@linkplain Runtime#availableProcessors()}).
*
- The last element in the array should describe the L1 cache. Typically Intel processors have 32k L1
* cache and AMD 64k. 1 or maybe 2 threads use/share this cache.
*
- Caches, all levels except L1, are described between the first and last elements in descending order (L3
* cache comes before L2 cache). Specify the size of the cache and the number of threads using/sharing the
* cache. (Do not worry about how many cache units there are - describe one unit.)
*
- The array must have at least 2 elements. You must describe the total system resources and the L1 cache.
* It is strongly recommended to also describe the L2 cache. The L3 cache, if it exists, is less important to
* describe. The derived attributes
processors
, cores
and units
may be
* incorrectly calculated if you fail to specify the caches. Known issue: If you have more than one processor,
* nut no L3 cache; the processors
attribute will be incorrectly set 1. A workaround that
* currently works is to define an L3 cache anyway and set the memory/size of that cache to 0bytes. This
* Workaround may stop working in the future.
* new MemoryThreads[] { SYSTEM, L3, L2, L1 }
or
* new MemoryThreads[] { SYSTEM, L2, L1 }
or new MemoryThreads[] { SYSTEM, L1 }
*
*
* @author apete
*/
public final class Hardware extends CommonMachine implements Comparable {
/**
* Cache-line size is (typically) 64 bytes
*/
public static final long CPU_CACHE_LINE_SIZE = 64L;
/**
* Page size is usually determined by the processor architecture. Traditionally, pages in a system had
* uniform size, such as 4,096 bytes. However, processor designs often allow two or more, sometimes
* simultaneous, page sizes due to its benefits. There are several points that can factor into choosing
* the best page size.
*
* Practically all architectures/OS:s have a page size of 4k (one notable exception is Solaris/SPARC that
* have 8k)
*
* AArch64 supports three different granule sizes: 4KB, 16KB, and 64KB.
*/
public static final long OS_MEMORY_PAGE_SIZE = 4L * K;
/**
* Should contain all available hardware in ascending "power" order.
*/
public static final Set PREDEFINED = new TreeSet<>();
/**
*
* M1 Pro Mainly modelled after the performance cores since there are more of those. Also did not separate
* between L2 and L3/SLC cache since there are 2 of each and they are the same size per thread.
*
* Notes: M2, M2 Pro, M2 Max, M2 Ultra -> 1, 2, 4, 8 memory controllers resulting in 100GB/s, 200GB/s,
* 400GB/s and 800GB/s Memory Bandwidth
*
* - Apple M1 Pro
*
* - L1 Cache the high-perf cores have a large 192 KB of L1 instruction cache and 128 KB of L1 data
* cache The energy-efficient cores have a 128 KB L1 instruction cache, 64 KB L1 data cache.
*
- L2 Cache (28MB all together) The 6 high-perf cores are split in two clusters, each cluster has 12MB
* of shared L2 cache (so 24MB total) The 2 high-efficiency cores have 4MB of shared L2 cache
*
- L3 / SLC (24MB all together) The SLC is 12MB per memory controller, so 24MB total.
*
- 16 GB unified memory
*
* - squid / 15" MacBook Air 2023, Apple M2
*
* - 8 cores (4 performance and 4 efficiency)
*
- L1: Performance cores 192+128 KB per core / Efficiency cores 128+64 KB per core
*
- L2: Performance cores 16 MB / Efficiency cores 4 MB
*
- L3: 8 MB
*
- 24 GB unified memory
*
*
*/
static final Hardware AARCH64__08 = new Hardware("aarch64", new BasicMachine[] { new BasicMachine(24L * K * K * K, 8), new BasicMachine(8L * K * K, 8),
new BasicMachine(4L * K * K, 4), new BasicMachine(64L * K, 1) });
/**
*
* - CLAM / PowerBook6,5
*
* - 1 processor
*
- 1 core per processor
*
- 1 thread per core
*
- ===
*
- 1.25GB system RAM
*
- 512kB L2 cache per processor
*
- 64kB L1 cache per core
*
*
*/
static final Hardware PPC__01 = new Hardware("ppc",
new BasicMachine[] { new BasicMachine(5L * 256L * K * K, 1), new BasicMachine(512L * K, 1), new BasicMachine(64L * K, 1) });
/**
*
* - INTEL1
*
* - 1 processor
*
- 1 core per processor
*
- 1 thread per core
*
- ===
*
- 1GB system RAM
*
- 1MB L2 cache per processor
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86__01 = new Hardware("x86",
new BasicMachine[] { new BasicMachine(1L * K * K * K, 1), new BasicMachine(1L * K * K, 1), new BasicMachine(32L * K, 1) });
/**
*
* - B5950053
*
* - 1 processor
*
- 2 cores per processor
*
- 1 thread per core
*
- ===
*
- 3.5GB system RAM
*
- 6MB L2 cache per processor (2 cores)
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86__02 = new Hardware("x86",
new BasicMachine[] { new BasicMachine(7L * 512L * K * K, 2), new BasicMachine(6L * K * K, 2), new BasicMachine(32L * K, 1) });
/**
*
* - MANTA / iMac7,1
*
* - 1 processor
*
- 2 cores per processor
*
- 1 thread per core
*
- ===
*
- 3GB system RAM
*
- 4MB L2 cache per processor (2 cores)
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__02 = new Hardware("x86_64",
new BasicMachine[] { new BasicMachine(3L * K * K * K, 2), new BasicMachine(4L * K * K, 2), new BasicMachine(32L * K, 1) });
/**
* Combination of {@link #X86_64__04_1_L2}, {@link #X86_64__04_1_L3} and {@link #X86_64__04_2}
*/
static final Hardware X86_64__04 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 4), new BasicMachine(3L * K * K, 4),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* - PA's Q9400
*
* - 1 processors
*
- 4 cores per processor
*
- 1 thread per core (4 threads in total)
*
- ===
*
- 3GB system RAM
*
- 3MB L2 cache per 2 cores
*
- 32kB L1 cache per core
*
* - PA's Q6600
*
* - 1 processors
*
- 4 cores per processor
*
- 1 thread per core (4 threads in total)
*
- ===
*
- 8GB system RAM
*
- 4MB L2 cache per 2 cores
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__04_1_L2 = new Hardware("x86_64",
new BasicMachine[] { new BasicMachine(8L * K * K * K, 4), new BasicMachine(3L * K * K, 2), new BasicMachine(32L * K, 1) });
/**
*
* - Intel i5-4670K with 16GB of RAM
*
* - 1 processors
*
- 4 cores per processor
*
- 1 thread per core (4 threads in total)
*
- ===
*
- 16GB system RAM
*
- 6MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - Intel Core i5-3570K with 32GB of RAM (from Java Matrix Benchmark)
*
* - 1 processors
*
- 4 cores per processor
*
- 1 thread per core (4 threads in total)
*
- ===
*
- 32GB system RAM
*
- 6MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__04_1_L3 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 4), new BasicMachine(6L * K * K, 4),
new BasicMachine(256L * K, 1), new BasicMachine(32L * K, 1) });
/**
*
* - BUBBLE / MacBookAir4,2
*
* - 1 processors
*
- 2 cores per processor
*
- 2 threads per core (4 threads in total)
*
- ===
*
- 4GB system RAM
*
- 3MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - PA's Intel Core i7-620M laptop
*
* - 1 processors
*
- 2 cores per processor
*
- 2 threads per core (4 threads in total)
*
- ===
*
- 8GB system RAM
*
- 4MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - MacBookPro14,2 (oyster)
*
* - 1 processors
*
- 2 cores per processor
*
- 2 threads per core (4 threads in total)
*
- ===
*
- 8GB system RAM
*
- 4MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__04_2 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(8L * K * K * K, 4), new BasicMachine(3L * K * K, 4),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* - HA's Intel Core i7-920 server
*
* - 1 processor
*
- 4 cores per processor
*
- 2 threads per core (8 threads in total)
*
- ===
*
- 8GB system RAM
*
- 8MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - Core i7-2600 3.4 GHz - 4 cores - 8 threads from Java Matrix Benchmark
*
* - 1 processor
*
- 4 cores per processor
*
- 2 threads per core (8 threads in total)
*
- ===
*
- 11GB system RAM
*
- 8MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - Core i7-3770 3.4 GHz - 4 cores - 8 threads (whale @ MSC/MSB)
*
* - 1 processor
*
- 4 cores per processor
*
- 2 threads per core (8 threads in total)
*
- ===
*
- 8GB system RAM
*
- 8MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - Core i7-2600 3.4 GHz - 4 cores - 8 threads (Vostro-460 @ Scila)
*
* - 1 processor
*
- 4 cores per processor
*
- 2 threads per core (8 threads in total)
*
- ===
*
- 32GB system RAM
*
- 8MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - Google Cloud Platform Compute Engine n1-standard-8 (8 vCPUs, 30 GB memory, Skylake)
*
* - 1 processor
*
- 4 cores per processor
*
- 2 threads per core (8 threads in total)
*
- ===
*
- 30GB system RAM
*
- 8.25MB L3 cache per processor
*
- 1MB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__08 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 8), new BasicMachine(8L * K * K, 8),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* "Gulftown" (32 nm) Model: SLBUZ (B1)
* Intel Core i7-980 3.33GHz
* 8/25/2010
* ref: http://ark.intel.com/products/47932
* https://en.wikipedia.org/wiki/List_of_Intel_Core_i7_microprocessors
* Device Manager
*
*
* - Intel Core i7-980
*
* - 1 processor
*
- 6 cores per processor
*
- 2 threads per core (12 threads in total)
*
- ===
*
- 12GB system RAM
*
- 12MB L3 cache per processor
*
- 256kB L2 cache per core (x6)
*
- 32kB L1 cache per core (x6)
*
*
*/
static final Hardware X86_64__12 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(12L * K * K * K, 12), new BasicMachine(12L * K * K, 12),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* - SAILFISH / MacPro4,1
*
* - 2 processors
*
- 4 cores per processor (8 cores in total)
*
- 2 threads per core (16 threads in total)
*
- ===
*
- 12GB system RAM
*
- 8MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - OCTOPUS / MacBookPro16,1
*
* - 1 processors
*
- 8 cores per processor (8 cores in total)
*
- 2 threads per core (16 threads in total)
*
- ===
*
- 64GB system RAM
*
- 16MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__16 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(64L * K * K * K, 16), new BasicMachine(8L * K * K, 16),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* - CBL (prod & test) 2 x Intel(R) Xeon(R) CPU E5-2697A v4 @ 2.60GHz
*
* - 2 processors
*
- 16 cores per processor (32 cores in total)
*
- 2 threads per core (64 threads in total)
*
- ===
*
- 512GB system RAM
*
- 40MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
* - CBF (simu) 4 x Intel(R) Xeon(R) CPU E7-4809 v3 @ 2.00GHz
*
* - 4 processors
*
- 8 cores per processor (32 cores in total)
*
- 2 threads per core (64 threads in total)
*
- ===
*
- 512GB system RAM
*
- 20MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__64 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(512L * K * K * K, 64), new BasicMachine(20L * K * K, 32),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
/**
*
* - CBF (prod) 4 x Intel(R) Xeon(R) CPU E7-4830 v3 @ 2.10GHz
*
* - 4 processors
*
- 12 cores per processor (48 cores in total)
*
- 2 threads per core (96 threads in total)
*
- ===
*
- 512GB system RAM
*
- 30MB L3 cache per processor
*
- 256kB L2 cache per core
*
- 32kB L1 cache per core
*
*
*/
static final Hardware X86_64__96 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(512L * K * K * K, 96), new BasicMachine(30L * K * K, 24),
new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) });
static {
PREDEFINED.add(AARCH64__08);
PREDEFINED.add(PPC__01);
PREDEFINED.add(X86__01);
PREDEFINED.add(X86__02);
PREDEFINED.add(X86_64__02);
PREDEFINED.add(X86_64__04);
// PREDEFINED.add(X86_64.X86_64__04_2);
// PREDEFINED.add(X86_64.X86_64__04_1_L2);
// PREDEFINED.add(X86_64.X86_64__04_1_L3);
PREDEFINED.add(X86_64__08);
PREDEFINED.add(X86_64__12);
PREDEFINED.add(X86_64__16);
PREDEFINED.add(X86_64__64);
PREDEFINED.add(X86_64__96);
}
public static Hardware makeSimple() {
return Hardware.makeSimple(VirtualMachine.getArchitecture(), VirtualMachine.getMemory(), VirtualMachine.getThreads());
}
public static Hardware makeSimple(final String systemArchitecture, final long systemMemory, final int systemThreads) {
if (systemThreads > 8) {
// Assume hyperthreading, L3 cache and more than 1 CPU
BasicMachine tmpL1Machine = new BasicMachine(32L * K, 2); //Hyperthreading
BasicMachine tmpL2Machine = new BasicMachine(256L * K, tmpL1Machine.threads);
BasicMachine tmpL3Machine = new BasicMachine(4L * K * K, systemThreads / ((systemThreads + 7) / 8)); //More than 1 CPU
BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads);
return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL3Machine, tmpL2Machine, tmpL1Machine });
} else if (systemThreads >= 4) {
// Assume hyperthreading, L3 cache but only 1 CPU
BasicMachine tmpL1Machine = new BasicMachine(32L * K, 2); //Hyperthreading
BasicMachine tmpL2Machine = new BasicMachine(256L * K, tmpL1Machine.threads);
BasicMachine tmpL3Machine = new BasicMachine(3L * K * K, systemThreads);
BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads);
return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL3Machine, tmpL2Machine, tmpL1Machine });
} else {
// No hyperthreading, no L3 cache and 1 CPU
BasicMachine tmpL1Machine = new BasicMachine(32L * K, 1); //No hyperthreading
BasicMachine tmpL2Machine = new BasicMachine(2L * K * K, tmpL1Machine.threads);
BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads);
return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL2Machine, tmpL1Machine });
}
}
private final BasicMachine[] myLevels;
/**
* new BasicMachine[] { SYSTEM, L3, L2, L1 }
or
* new BasicMachine[] { SYSTEM, L2, L1 }
or in worst case
* new BasicMachine[] { SYSTEM, L1 }
*/
public Hardware(final String arch, final BasicMachine[] levels) {
super(arch, levels);
if (levels.length < 2) {
throw new IllegalArgumentException();
}
myLevels = COPY.copyOf(levels);
}
@Override
public int compareTo(final Hardware other) {
if (cores != other.cores) {
return cores - other.cores;
} else if (threads != other.threads) {
return threads - other.threads;
} else if (cache != other.cache) {
return (int) (cache - other.cache);
} else if (units != other.units) {
return units - other.units;
} else if (memory != other.memory) {
return (int) (memory - other.memory);
} else {
return 0;
}
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (!super.equals(obj) || !(obj instanceof Hardware)) {
return false;
}
Hardware other = (Hardware) obj;
if (!Arrays.equals(myLevels, other.myLevels)) {
return false;
}
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
return prime * result + Arrays.hashCode(myLevels);
}
public boolean isL2Specified() {
return myLevels.length > 2;
}
public boolean isL3Specified() {
return myLevels.length > 3;
}
@Override
public String toString() {
StringBuilder retVal = new StringBuilder("HW=");
retVal.append(myLevels[0].toString());
if (this.isL3Specified()) {
retVal.append(ASCII.COMMA).append(units).append("xL3:").append(myLevels[myLevels.length - 3]);
} else if (this.isL2Specified()) {
retVal.append(ASCII.COMMA).append(units).append("xL2:").append(myLevels[myLevels.length - 2]);
}
retVal.append(ASCII.COMMA).append(cores).append("cores:").append(myLevels[myLevels.length - 1]);
return retVal.toString();
}
public VirtualMachine virtualise() {
return new VirtualMachine(this, Runtime.getRuntime());
}
}