import jdk.incubator.vector.*;
import java.util.Arrays;
public class VectorAdditionSIMD {
private static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
private static final int VECTOR_SIZE = SPECIES.length();
public static double[] addVectorsSIMD(double[] vector1, double[] vector2) {
if (vector1 == null || vector2 == null) {
throw new IllegalArgumentException("Vectors cannot be null.");
}
if (vector1.length != vector2.length) {
throw new IllegalArgumentException("Vectors must have the same dimensions.");
}
double[] result = new double[vector1.length];
int i = 0;
// Vectorized loop
for (; i <= vector1.length - VECTOR_SIZE; i += VECTOR_SIZE) {
DoubleVector v1 = DoubleVector.fromArray(SPECIES, vector1, i);
DoubleVector v2 = DoubleVector.fromArray(SPECIES, vector2, i);
DoubleVector sum = v1.add(v2);
sum.intoArray(result, i);
}
// Scalar loop for remaining elements
for (; i < vector1.length; i++) {
result[i] = vector1[i] + vector2[i];
}
return result;
}
public static void main(String[] args) {
// Example Usage
double[] v1 = new double[100];
double[] v2 = new double[100];
double[] v3 = new double[7];
for(int i = 0; i < 100; i++){
v1[i] = i * 1.0;
v2[i] = i * 2.0;
}
for(int i = 0; i < 7; i++){
v3[i] = i * 3.0;
}
try {
double[] sum = addVectorsSIMD(v1, v2);
System.out.println("Vector 1: " + Arrays.toString(Arrays.copyOf(v1,10)));
System.out.println("Vector 2: " + Arrays.toString(Arrays.copyOf(v2,10)));
System.out.println("SIMD Sum: " + Arrays.toString(Arrays.copyOf(sum, 10)));
double[] sum2 = addVectorsSIMD(v1, v3);
} catch (IllegalArgumentException e) {
System.err.println("Error: " + e.getMessage());
}
}
}
To verify, first install hsdis you can get a prebuilt binary (e.g. chriswhocodes.com/hsdis). Copy the shared library or dll to the appropriate location (e.g. on Linux copy hsdis-amd64.so to /usr/lib/jvm/java-21-openjdk-amd64/lib/server/) hsdis is the hotspot disassembler so we can see what instructions (and compiler we need c2/server not c1) is utilized.
Compile:
javac --add-modules jdk.incubator.vector VectorAdditionSIMD.java
Run/examine output:
java -XX:+UnlockDiagnosticVMOptions -XX:+PrintAssembly -XX:CompileThreshold=1 -XX:-TieredCompilation --add-modules jdk.incubator.vector VectorAdditionSIMD > c2output.txt
Now within c2output.txt you should find that the C2 compiler was utilized, and furthermore that you are using ymm (256bit) registers, this is a good sign that you are executing via SIMD.If all is well you should see output along the lines of:
0x00007f345ca993ca: vmovdqu (%rdi,%rcx,1),%ymm0 0x00007f345ca993cf: vmovdqu (%rsi,%rcx,1),%ymm1 0x00007f345ca993d4: vpxor %ymm1,%ymm0,%ymm0 0x00007f345ca993d8: vptest %ymm0,%ymm0 0x00007f345ca993f1: vmovdqu -0x20(%rdi,%rax,1),%ymm0 0x00007f345ca993f7: vmovdqu -0x20(%rsi,%rax,1),%ymm1 0x00007f345ca993fd: vpxor %ymm1,%ymm0,%ymm0
No comments:
Post a Comment