Monday, 1 April 2013

Performance gains from bitwise operations





An experiment to assess the performance gains from bitwise operations in Java.


Methodology: use the junit benchmark framework to measure the time elapsed with and without bit-twiddling for four operations (each executed several million times): multiply, divide , modulo and finding the next power of two.
Configuration: JDK 7 running on MacOS 10.7.
Results
For each test two results are shown: time taken without bit-twiddling first, with bit-twiddling second.
1) Division
DivideBenchmark.testJVMDivision: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.04 [+- 0.00], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.39, time.warmup: 0.07, time.bench: 0.32
DivideBenchmark.testBitwiseDivision: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.03 [+- 0.00], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.33, time.warmup: 0.04, time.bench: 0.29
2) Multiplication
MultBenchmark.testJVMMultiply: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.02 [+- 0.00], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.22, time.warmup: 0.04, time.bench: 0.18
MultBenchmark.testBitwiseMultiply: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.02 [+- 0.00], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.18, time.warmup: 0.03, time.bench: 0.16
3) Modulo
ModuloBenchmark.testJVMModulo: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.09 [+- 0.03], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 1.00, time.warmup: 0.20, time.bench: 0.80
ModuloBenchmark.testBitwiseModulo: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.09 [+- 0.01], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.95, time.warmup: 0.10, time.bench: 0.84
4) Find the next power of 2
NextPowerOfTwoBenchmark.testJVMNextPowerOfTwo: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 1.44 [+- 0.02], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 14.49, time.warmup: 1.50, time.bench: 13.00
NextPowerOfTwoBenchmark.testBitwisePowerOfTwo: [measured 9 out of 10 rounds, threads: 1 (sequential)]
round: 0.03 [+- 0.00], round.block: 0.00 [+- 0.00], round.gc: 0.00 [+- 0.00], GC.calls: 0, GC.time: 0.00, time.total: 0.33, time.warmup: 0.05, time.bench: 0.28
Conclusion
There’s no discernable performance gains when using bit-twiddling to execute multiplications, divisions or modulo. This is hardly surprising as these simple utilities should already be heavily optimised by the JVM.
Less heavily JVM-optimised code  (eg. find the next power of two) shows large potential performance wins, which may even justify the less readable code associated with bit shifts techniques.
Benchmark source code
12345678910111213141516171819202122232425262728293031323334353637383940
import com.carrotsearch.junitbenchmarks.BenchmarkOptions;
import com.carrotsearch.junitbenchmarks.BenchmarkRule;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import static junit.framework.Assert.assertTrue;
public class DivideBenchmark {
final static int reps = 1000 * 1000 *10 ;
int divide[] = new int[reps];
@Before
public void setup(){
for (int i =0; i<reps; i++)
divide[i]= i / 64;
}
@Rule
public BenchmarkRule benchmarkRun = new BenchmarkRule();
@Test
@BenchmarkOptions(benchmarkRounds =9, warmupRounds = 1, concurrency = 1)
public void testJVMDivision() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(divide[i]==i /64);
}
@Test
@BenchmarkOptions(benchmarkRounds = 9, warmupRounds = 1, concurrency = 1)
public void testBitwiseDivision() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(divide[i]==(i >> 6));
}
}
123456789101112131415161718192021222324252627282930313233343536373839
import com.carrotsearch.junitbenchmarks.BenchmarkOptions;
import com.carrotsearch.junitbenchmarks.BenchmarkRule;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import static junit.framework.Assert.assertTrue;
public class ModuloBenchmark {
final static int reps = 1000 * 1000 *50;
final static int modulos[] = new int[reps];
@Before
public void setup(){
for (int i =0; i<reps; i++)
modulos[i]= i % 1024;
}
@Rule
public BenchmarkRule benchmarkRun = new BenchmarkRule();
@Test
@BenchmarkOptions(benchmarkRounds =9, warmupRounds = 1, concurrency = 1)
public void testJVMModulo() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(modulos[i]==i % 1024);
}
@Test
@BenchmarkOptions(benchmarkRounds = 9, warmupRounds = 1, concurrency = 1)
public void testBitwiseModulo() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(modulos[i]==(i &1023));
}
}
123456789101112131415161718192021222324252627282930313233343536373839
import com.carrotsearch.junitbenchmarks.BenchmarkOptions;
import com.carrotsearch.junitbenchmarks.BenchmarkRule;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import static junit.framework.Assert.assertTrue;
public class MultBenchmark {
static final int reps = 1000 * 1000 * 10 ;
static final int mult[] = new int[reps];
@BeforeClass
public static void setup(){
for (int i =0; i<reps; i++)
mult[i]= i * 64;
}
@Rule
public BenchmarkRule benchmarkRun = new BenchmarkRule();
@Test
@BenchmarkOptions(benchmarkRounds = 9, warmupRounds = 1, concurrency = 1)
public void testJVMMultiply() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(mult[i]==i *64);
}
@Test
@BenchmarkOptions(benchmarkRounds = 9, warmupRounds = 1, concurrency = 1)
public void testBitwiseMultiply() throws Exception {
for (int i=0; i<reps;i++)
assertTrue(mult[i]==(i << 6));
}
}
123456789101112131415161718192021222324252627282930313233343536373839404142
import com.carrotsearch.junitbenchmarks.BenchmarkOptions;
import com.carrotsearch.junitbenchmarks.BenchmarkRule;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestRule;
import static junit.framework.Assert.assertTrue;
public class NextPowerOfTwoBenchmark {
static final int reps = 1000 * 1000 * 10 ;
static final int nxtPower[] = new int[reps];
@BeforeClass
public static void setup(){
for (int i =2; i<reps; i++) {
nxtPower[i] = (int)Math.pow(2,Math.ceil(Math.log(i) / Math.log(2)));
}
}
@Rule
public TestRule benchmarkRun = new BenchmarkRule();
@Test
@BenchmarkOptions(benchmarkRounds =9, warmupRounds = 1,callgc = false,concurrency = 1)
public void testJVMNextPowerOfTwo() throws Exception {
for (int i=2; i<reps;i++)
assertTrue(nxtPower[i]==(int)Math.pow(2,Math.ceil(Math.log(i)/Math.log(2))));
}
@Test
@BenchmarkOptions(benchmarkRounds = 9, warmupRounds = 1, callgc = false,concurrency = 1)
public void testBitwisePowerOfTwo() throws Exception {
for (int i=2; i<reps;i++)
assertTrue(nxtPower[i]==( 1 << (32 - Integer.numberOfLeadingZeros(i - 1))));
}
}
by edgblog

No comments:

Post a Comment