All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ojalgo.machine.Hardware Maven / Gradle / Ivy

Go to download

oj! Algorithms - ojAlgo - is Open Source Java code that has to do with mathematics, linear algebra and optimisation.

There is a newer version: 55.0.1
Show newest version
/*
 * Copyright 1997-2024 Optimatika
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package org.ojalgo.machine;

import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;

import org.ojalgo.array.operation.COPY;
import org.ojalgo.netio.ASCII;

/**
 * 
    *
  • The first element in the array should correspond to total system resources; the total amount of RAM and * the total number of threads (Typically the same as what is returned by * {@linkplain Runtime#availableProcessors()}). *
  • The last element in the array should describe the L1 cache. Typically Intel processors have 32k L1 * cache and AMD 64k. 1 or maybe 2 threads use/share this cache. *
  • Caches, all levels except L1, are described between the first and last elements in descending order (L3 * cache comes before L2 cache). Specify the size of the cache and the number of threads using/sharing the * cache. (Do not worry about how many cache units there are - describe one unit.) *
  • The array must have at least 2 elements. You must describe the total system resources and the L1 cache. * It is strongly recommended to also describe the L2 cache. The L3 cache, if it exists, is less important to * describe. The derived attributes processors, cores and units may be * incorrectly calculated if you fail to specify the caches. Known issue: If you have more than one processor, * nut no L3 cache; the processors attribute will be incorrectly set 1. A workaround that * currently works is to define an L3 cache anyway and set the memory/size of that cache to 0bytes. This * Workaround may stop working in the future. *
  • new MemoryThreads[] { SYSTEM, L3, L2, L1 } or * new MemoryThreads[] { SYSTEM, L2, L1 } or new MemoryThreads[] { SYSTEM, L1 } *
* * @author apete */ public final class Hardware extends CommonMachine implements Comparable { /** * Cache-line size is (typically) 64 bytes */ public static final long CPU_CACHE_LINE_SIZE = 64L; /** * Page size is usually determined by the processor architecture. Traditionally, pages in a system had * uniform size, such as 4,096 bytes. However, processor designs often allow two or more, sometimes * simultaneous, page sizes due to its benefits. There are several points that can factor into choosing * the best page size. *

* Practically all architectures/OS:s have a page size of 4k (one notable exception is Solaris/SPARC that * have 8k) *

* AArch64 supports three different granule sizes: 4KB, 16KB, and 64KB. */ public static final long OS_MEMORY_PAGE_SIZE = 4L * K; /** * Should contain all available hardware in ascending "power" order. */ public static final Set PREDEFINED = new TreeSet<>(); /** *

* M1 Pro Mainly modelled after the performance cores since there are more of those. Also did not separate * between L2 and L3/SLC cache since there are 2 of each and they are the same size per thread. *

* Notes: M2, M2 Pro, M2 Max, M2 Ultra -> 1, 2, 4, 8 memory controllers resulting in 100GB/s, 200GB/s, * 400GB/s and 800GB/s Memory Bandwidth *

    *
  • Apple M1 Pro *
      *
    • L1 Cache the high-perf cores have a large 192 KB of L1 instruction cache and 128 KB of L1 data * cache The energy-efficient cores have a 128 KB L1 instruction cache, 64 KB L1 data cache. *
    • L2 Cache (28MB all together) The 6 high-perf cores are split in two clusters, each cluster has 12MB * of shared L2 cache (so 24MB total) The 2 high-efficiency cores have 4MB of shared L2 cache *
    • L3 / SLC (24MB all together) The SLC is 12MB per memory controller, so 24MB total. *
    • 16 GB unified memory *
    *
  • squid / 15" MacBook Air 2023, Apple M2 *
      *
    • 8 cores (4 performance and 4 efficiency) *
    • L1: Performance cores 192+128 KB per core / Efficiency cores 128+64 KB per core *
    • L2: Performance cores 16 MB / Efficiency cores 4 MB *
    • L3: 8 MB *
    • 24 GB unified memory *
    *
*/ static final Hardware AARCH64__08 = new Hardware("aarch64", new BasicMachine[] { new BasicMachine(24L * K * K * K, 8), new BasicMachine(8L * K * K, 8), new BasicMachine(4L * K * K, 4), new BasicMachine(64L * K, 1) }); /** *
    *
  • CLAM / PowerBook6,5 *
      *
    • 1 processor *
    • 1 core per processor *
    • 1 thread per core *
    • === *
    • 1.25GB system RAM *
    • 512kB L2 cache per processor *
    • 64kB L1 cache per core *
    *
*/ static final Hardware PPC__01 = new Hardware("ppc", new BasicMachine[] { new BasicMachine(5L * 256L * K * K, 1), new BasicMachine(512L * K, 1), new BasicMachine(64L * K, 1) }); /** *
    *
  • INTEL1 *
      *
    • 1 processor *
    • 1 core per processor *
    • 1 thread per core *
    • === *
    • 1GB system RAM *
    • 1MB L2 cache per processor *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86__01 = new Hardware("x86", new BasicMachine[] { new BasicMachine(1L * K * K * K, 1), new BasicMachine(1L * K * K, 1), new BasicMachine(32L * K, 1) }); /** *
    *
  • B5950053 *
      *
    • 1 processor *
    • 2 cores per processor *
    • 1 thread per core *
    • === *
    • 3.5GB system RAM *
    • 6MB L2 cache per processor (2 cores) *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86__02 = new Hardware("x86", new BasicMachine[] { new BasicMachine(7L * 512L * K * K, 2), new BasicMachine(6L * K * K, 2), new BasicMachine(32L * K, 1) }); /** *
    *
  • MANTA / iMac7,1 *
      *
    • 1 processor *
    • 2 cores per processor *
    • 1 thread per core *
    • === *
    • 3GB system RAM *
    • 4MB L2 cache per processor (2 cores) *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__02 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(3L * K * K * K, 2), new BasicMachine(4L * K * K, 2), new BasicMachine(32L * K, 1) }); /** * Combination of {@link #X86_64__04_1_L2}, {@link #X86_64__04_1_L3} and {@link #X86_64__04_2} */ static final Hardware X86_64__04 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 4), new BasicMachine(3L * K * K, 4), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
    *
  • PA's Q9400 *
      *
    • 1 processors *
    • 4 cores per processor *
    • 1 thread per core (4 threads in total) *
    • === *
    • 3GB system RAM *
    • 3MB L2 cache per 2 cores *
    • 32kB L1 cache per core *
    *
  • PA's Q6600 *
      *
    • 1 processors *
    • 4 cores per processor *
    • 1 thread per core (4 threads in total) *
    • === *
    • 8GB system RAM *
    • 4MB L2 cache per 2 cores *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__04_1_L2 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(8L * K * K * K, 4), new BasicMachine(3L * K * K, 2), new BasicMachine(32L * K, 1) }); /** *
    *
  • Intel i5-4670K with 16GB of RAM *
      *
    • 1 processors *
    • 4 cores per processor *
    • 1 thread per core (4 threads in total) *
    • === *
    • 16GB system RAM *
    • 6MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • Intel Core i5-3570K with 32GB of RAM (from Java Matrix Benchmark) *
      *
    • 1 processors *
    • 4 cores per processor *
    • 1 thread per core (4 threads in total) *
    • === *
    • 32GB system RAM *
    • 6MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__04_1_L3 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 4), new BasicMachine(6L * K * K, 4), new BasicMachine(256L * K, 1), new BasicMachine(32L * K, 1) }); /** *
    *
  • BUBBLE / MacBookAir4,2 *
      *
    • 1 processors *
    • 2 cores per processor *
    • 2 threads per core (4 threads in total) *
    • === *
    • 4GB system RAM *
    • 3MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • PA's Intel Core i7-620M laptop *
      *
    • 1 processors *
    • 2 cores per processor *
    • 2 threads per core (4 threads in total) *
    • === *
    • 8GB system RAM *
    • 4MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • MacBookPro14,2 (oyster) *
      *
    • 1 processors *
    • 2 cores per processor *
    • 2 threads per core (4 threads in total) *
    • === *
    • 8GB system RAM *
    • 4MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__04_2 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(8L * K * K * K, 4), new BasicMachine(3L * K * K, 4), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
    *
  • HA's Intel Core i7-920 server *
      *
    • 1 processor *
    • 4 cores per processor *
    • 2 threads per core (8 threads in total) *
    • === *
    • 8GB system RAM *
    • 8MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • Core i7-2600 3.4 GHz - 4 cores - 8 threads from Java Matrix Benchmark *
      *
    • 1 processor *
    • 4 cores per processor *
    • 2 threads per core (8 threads in total) *
    • === *
    • 11GB system RAM *
    • 8MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • Core i7-3770 3.4 GHz - 4 cores - 8 threads (whale @ MSC/MSB) *
      *
    • 1 processor *
    • 4 cores per processor *
    • 2 threads per core (8 threads in total) *
    • === *
    • 8GB system RAM *
    • 8MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • Core i7-2600 3.4 GHz - 4 cores - 8 threads (Vostro-460 @ Scila) *
      *
    • 1 processor *
    • 4 cores per processor *
    • 2 threads per core (8 threads in total) *
    • === *
    • 32GB system RAM *
    • 8MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • Google Cloud Platform Compute Engine n1-standard-8 (8 vCPUs, 30 GB memory, Skylake) *
      *
    • 1 processor *
    • 4 cores per processor *
    • 2 threads per core (8 threads in total) *
    • === *
    • 30GB system RAM *
    • 8.25MB L3 cache per processor *
    • 1MB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__08 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(32L * K * K * K, 8), new BasicMachine(8L * K * K, 8), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
     * "Gulftown" (32 nm) Model: SLBUZ (B1)
     * Intel Core i7-980 3.33GHz
     * 8/25/2010
     * ref: http://ark.intel.com/products/47932
     *      https://en.wikipedia.org/wiki/List_of_Intel_Core_i7_microprocessors
     *      Device Manager
     * 
*
    *
  • Intel Core i7-980 *
      *
    • 1 processor *
    • 6 cores per processor *
    • 2 threads per core (12 threads in total) *
    • === *
    • 12GB system RAM *
    • 12MB L3 cache per processor *
    • 256kB L2 cache per core (x6) *
    • 32kB L1 cache per core (x6) *
    *
*/ static final Hardware X86_64__12 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(12L * K * K * K, 12), new BasicMachine(12L * K * K, 12), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
    *
  • SAILFISH / MacPro4,1 *
      *
    • 2 processors *
    • 4 cores per processor (8 cores in total) *
    • 2 threads per core (16 threads in total) *
    • === *
    • 12GB system RAM *
    • 8MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • OCTOPUS / MacBookPro16,1 *
      *
    • 1 processors *
    • 8 cores per processor (8 cores in total) *
    • 2 threads per core (16 threads in total) *
    • === *
    • 64GB system RAM *
    • 16MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__16 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(64L * K * K * K, 16), new BasicMachine(8L * K * K, 16), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
    *
  • CBL (prod & test) 2 x Intel(R) Xeon(R) CPU E5-2697A v4 @ 2.60GHz *
      *
    • 2 processors *
    • 16 cores per processor (32 cores in total) *
    • 2 threads per core (64 threads in total) *
    • === *
    • 512GB system RAM *
    • 40MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
  • CBF (simu) 4 x Intel(R) Xeon(R) CPU E7-4809 v3 @ 2.00GHz *
      *
    • 4 processors *
    • 8 cores per processor (32 cores in total) *
    • 2 threads per core (64 threads in total) *
    • === *
    • 512GB system RAM *
    • 20MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__64 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(512L * K * K * K, 64), new BasicMachine(20L * K * K, 32), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); /** *
    *
  • CBF (prod) 4 x Intel(R) Xeon(R) CPU E7-4830 v3 @ 2.10GHz *
      *
    • 4 processors *
    • 12 cores per processor (48 cores in total) *
    • 2 threads per core (96 threads in total) *
    • === *
    • 512GB system RAM *
    • 30MB L3 cache per processor *
    • 256kB L2 cache per core *
    • 32kB L1 cache per core *
    *
*/ static final Hardware X86_64__96 = new Hardware("x86_64", new BasicMachine[] { new BasicMachine(512L * K * K * K, 96), new BasicMachine(30L * K * K, 24), new BasicMachine(256L * K, 2), new BasicMachine(32L * K, 2) }); static { PREDEFINED.add(AARCH64__08); PREDEFINED.add(PPC__01); PREDEFINED.add(X86__01); PREDEFINED.add(X86__02); PREDEFINED.add(X86_64__02); PREDEFINED.add(X86_64__04); // PREDEFINED.add(X86_64.X86_64__04_2); // PREDEFINED.add(X86_64.X86_64__04_1_L2); // PREDEFINED.add(X86_64.X86_64__04_1_L3); PREDEFINED.add(X86_64__08); PREDEFINED.add(X86_64__12); PREDEFINED.add(X86_64__16); PREDEFINED.add(X86_64__64); PREDEFINED.add(X86_64__96); } public static Hardware makeSimple() { return Hardware.makeSimple(VirtualMachine.getArchitecture(), VirtualMachine.getMemory(), VirtualMachine.getThreads()); } public static Hardware makeSimple(final String systemArchitecture, final long systemMemory, final int systemThreads) { if (systemThreads > 8) { // Assume hyperthreading, L3 cache and more than 1 CPU BasicMachine tmpL1Machine = new BasicMachine(32L * K, 2); //Hyperthreading BasicMachine tmpL2Machine = new BasicMachine(256L * K, tmpL1Machine.threads); BasicMachine tmpL3Machine = new BasicMachine(4L * K * K, systemThreads / ((systemThreads + 7) / 8)); //More than 1 CPU BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads); return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL3Machine, tmpL2Machine, tmpL1Machine }); } else if (systemThreads >= 4) { // Assume hyperthreading, L3 cache but only 1 CPU BasicMachine tmpL1Machine = new BasicMachine(32L * K, 2); //Hyperthreading BasicMachine tmpL2Machine = new BasicMachine(256L * K, tmpL1Machine.threads); BasicMachine tmpL3Machine = new BasicMachine(3L * K * K, systemThreads); BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads); return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL3Machine, tmpL2Machine, tmpL1Machine }); } else { // No hyperthreading, no L3 cache and 1 CPU BasicMachine tmpL1Machine = new BasicMachine(32L * K, 1); //No hyperthreading BasicMachine tmpL2Machine = new BasicMachine(2L * K * K, tmpL1Machine.threads); BasicMachine tmpSystemMachine = new BasicMachine(systemMemory, systemThreads); return new Hardware(systemArchitecture, new BasicMachine[] { tmpSystemMachine, tmpL2Machine, tmpL1Machine }); } } private final BasicMachine[] myLevels; /** * new BasicMachine[] { SYSTEM, L3, L2, L1 } or * new BasicMachine[] { SYSTEM, L2, L1 } or in worst case * new BasicMachine[] { SYSTEM, L1 } */ public Hardware(final String arch, final BasicMachine[] levels) { super(arch, levels); if (levels.length < 2) { throw new IllegalArgumentException(); } myLevels = COPY.copyOf(levels); } @Override public int compareTo(final Hardware other) { if (cores != other.cores) { return cores - other.cores; } else if (threads != other.threads) { return threads - other.threads; } else if (cache != other.cache) { return (int) (cache - other.cache); } else if (units != other.units) { return units - other.units; } else if (memory != other.memory) { return (int) (memory - other.memory); } else { return 0; } } @Override public boolean equals(final Object obj) { if (this == obj) { return true; } if (!super.equals(obj) || !(obj instanceof Hardware)) { return false; } Hardware other = (Hardware) obj; if (!Arrays.equals(myLevels, other.myLevels)) { return false; } return true; } @Override public int hashCode() { final int prime = 31; int result = super.hashCode(); return prime * result + Arrays.hashCode(myLevels); } public boolean isL2Specified() { return myLevels.length > 2; } public boolean isL3Specified() { return myLevels.length > 3; } @Override public String toString() { StringBuilder retVal = new StringBuilder("HW="); retVal.append(myLevels[0].toString()); if (this.isL3Specified()) { retVal.append(ASCII.COMMA).append(units).append("xL3:").append(myLevels[myLevels.length - 3]); } else if (this.isL2Specified()) { retVal.append(ASCII.COMMA).append(units).append("xL2:").append(myLevels[myLevels.length - 2]); } retVal.append(ASCII.COMMA).append(cores).append("cores:").append(myLevels[myLevels.length - 1]); return retVal.toString(); } public VirtualMachine virtualise() { return new VirtualMachine(this, Runtime.getRuntime()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy