浏览代码

Reduce global work size by half

Adam Kelly 7 年之前
父节点
当前提交
4e0f68d994
共有 2 个文件被更改,包括 17 次插入7 次删除
  1. 16 2
      src/backends/opencl/kernel.cl
  2. 1 5
      src/backends/opencl/mod.rs

+ 16 - 2
src/backends/opencl/kernel.cl

@@ -46,6 +46,18 @@ static float complex_abs(complex_f a)
 static complex_f cexp(float a) {
     return (complex_f)(cos(a), sin(a));
 }
+
+/*
+ * Returns the nth number where a given digit
+ * is cleared in the binary representation of the number
+ */
+static uint nth_cleared(uint n, uint target)
+{
+    uint mask = (1 << target) - 1;
+    uint not_mask = ~mask;
+
+    return (n & mask) | ((n & not_mask) << 1);
+}
 /*
  * Applies a single qubit gate to the register.
  * The gate matrix must be given in the form:
@@ -61,9 +73,11 @@ __kernel void apply_gate(
     complex_f C,
     complex_f D)
 {
-    uint const state = get_global_id(0);
+    uint const global_id = get_global_id(0);
 
-    uint const zero_state = state & (~(1 << target));
+    uint const state = nth_cleared(global_id, target);
+
+    uint const zero_state = state & (~(1 << target)); // Could just be state
     uint const one_state = state | (1 << target);
 
     uint const target_bit_val = (((1 << target) & state) > 0) ? 1 : 0;

+ 1 - 5
src/backends/opencl/mod.rs

@@ -74,13 +74,10 @@ impl OpenCL {
 
 impl Backend for OpenCL {
     fn apply_gate(&mut self, gate: Gate, target: u8) -> Result<(), Error> {
-        // create a temporary vector with the source buffer
-        let result_buffer: Buffer<Complex32> = self.pro_que.create_buffer()?;
-
         let apply = self.pro_que
             .kernel_builder("apply_gate")
+            .global_work_size(&self.buffer.len() / 2)
             .arg(&self.buffer)
-            .arg(&result_buffer)
             .arg(i32::from(target))
             .arg(gate.a)
             .arg(gate.b)
@@ -92,7 +89,6 @@ impl Backend for OpenCL {
             apply.enq()?;
         }
 
-        self.buffer = result_buffer;
         Ok(())
     }