Browse Source

Added MPI Based State Vector Simulator

Adam Kelly 5 years ago
parent
commit
c1af6d4afe

+ 1 - 1
CMakeLists.txt

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.9)
 project(tangle_project)
 
 
-option(DISTRIBUTED "Use MPI" OFF)
+option(DISTRIBUTED "Use MPI" ON)
 option(MULTITHREADED "Use OpenMP" OFF)
 
 

+ 61 - 23
standalone/cli.c

@@ -1,32 +1,70 @@
-#include "tangle.h"
+// #include "tangle.h"
+// #include <stdio.h>
+// // #include "mpi.h"
+
+// int main()
+// {
+//     // MPI_Init(NULL, NULL);
+
+//     printf("Welcome\n");
+
+//     count_to(10);
+
+//     // MPI_Finalize();
+
+//     return 0;
+// }
+
+// // #include <stdio.h>
+// // #include <omp.h>
+
+// // int main()
+// // {
+// // #pragma omp parallel num_threads(3)
+// //     {
+// //         int id = omp_get_thread_num();
+// //         int data = id;
+// //         int total = omp_get_num_threads();
+// //         printf("Greetings from process %d out of %d with Data %d\n", id, total, data);
+// //     }
+// //     printf("parallel for ends.\n");
+// //     return 0;
+// // }
+
 #include <stdio.h>
-// #include "mpi.h"
+#include <stdlib.h>
+#include "tangle.h"
 
-int main()
+int main(int argc, char *argv[])
 {
-    // MPI_Init(NULL, NULL);
+    TangleEnvironment env = initialize_environment();
 
-    printf("Welcome\n");
+    if (env.rank == 0 && argc <= 1)
+    {
+        printf("Please enter the number of qubits.\n");
+        exit(1);
+    }
 
-    count_to(10);
+    int num_qubits = atoi(argv[1]);
 
-    // MPI_Finalize();
+    TangleState state = create_state(num_qubits, env);
 
-    return 0;
-}
+    // Apply a hadamard gate to each qubit
+    // for (int i = 0; i < num_qubits; i++)
+    // {
+    //     H(state, i);
+    // }
+    H(state, 0);
 
-// #include <stdio.h>
-// #include <omp.h>
+    // Print the initial amplitude
+    if (env.rank == 0)
+    {
+        cfloat amp = state.amps[0];
+        printf("|0> = %f + i%f\n", creal(amp), cimag(amp));
+    }
 
-// int main()
-// {
-// #pragma omp parallel num_threads(3)
-//     {
-//         int id = omp_get_thread_num();
-//         int data = id;
-//         int total = omp_get_num_threads();
-//         printf("Greetings from process %d out of %d with Data %d\n", id, total, data);
-//     }
-//     printf("parallel for ends.\n");
-//     return 0;
-// }
+    // Finish the computation
+    destroy_environment(env);
+
+    return 0;
+}

+ 5 - 2
tangle/CMakeLists.txt

@@ -7,9 +7,10 @@ endif()
 
 add_subdirectory(src)
 message("Tangle Source: ${TANGLE_SRC}")
+message("Tangle Platform Specific Source: ${TANGLE_PLATFORM_SRC}")
 
 
-add_library(tangle STATIC ${TANGLE_SRC})
+add_library(tangle STATIC ${TANGLE_SRC} ${TANGLE_PLATFORM_SRC})
 target_include_directories(tangle PRIVATE src PUBLIC include)
 
 
@@ -18,4 +19,6 @@ if (DISTRIBUTED)
     target_link_libraries(tangle ${MPI_C_LIBRARIES})
 elseif (MULTITHREADED)
     target_link_libraries(tangle PUBLIC OpenMP::OpenMP_C)
-endif()
+endif()
+
+target_link_libraries(tangle m)

+ 11 - 0
tangle/include/communication.h

@@ -0,0 +1,11 @@
+#ifndef __TANGLE_COMMUNICATION_H
+#define __TANGLE_COMMUNICATION_H
+
+#include "tangle.h"
+
+void send_top(TangleState state, int node);
+void receive_top(TangleState state, int node);
+void send_bottom(TangleState state, int node);
+void receive_bottom(TangleState state, int node);
+
+#endif

+ 66 - 1
tangle/include/tangle.h

@@ -3,6 +3,71 @@
 #ifndef __TANGLE_H
 #define __TANGLE_H
 
-void count_to(int i);
+#include <complex.h>
+
+// Common Types
+typedef float _Complex cfloat;
+typedef long long int llint;
+
+#define cfloat(r, i) ((float)(r) + ((float)(i)) * I)
+
+// Tangle Types
+typedef struct
+{
+    int rank;
+    int nodes;
+} TangleEnvironment;
+
+typedef struct
+{
+    int num_qubits;
+    cfloat *amps;
+
+    // Distributed Information
+    int rank;
+    int nodes;
+    int k;
+    int m;
+
+    llint node_amps;
+    llint temp_amps;
+} TangleState;
+
+typedef struct
+{
+    cfloat A;
+    cfloat B;
+    cfloat C;
+    cfloat D;
+} TangleGate;
+
+// Environment/Distributed Functions
+TangleEnvironment initialize_environment();
+void destroy_environment(TangleEnvironment env);
+
+// Quantum State Functions
+TangleState create_state(int num_qubits, TangleEnvironment env);
+void print_state(TangleState state);
+
+// Quantum Gates
+void apply_gate(TangleState state, int target, TangleGate u);
+void apply_diagonal_gate(TangleState state, int target, TangleGate u);
+void apply_antidiagonal_gate(TangleState state, int target, TangleGate u);
+void apply_controlled_gate(TangleState state, int control, int target, TangleGate u);
+void apply_controlled_diagonal_gate(TangleState state, int control, int target, TangleGate u);
+void apply_controlled_antidiagonal_gate(TangleState state, int control, int target, TangleGate u);
+
+TangleGate _X;
+TangleGate _Z;
+TangleGate _H;
+
+void X(TangleState state, int target);
+void H(TangleState state, int target);
+void Z(TangleState state, int target);
+void CZ(TangleState state, int control, int target);
+void CX(TangleState state, int control, int target);
+
+// Measurement
+llint measure(TangleState state);
 
 #endif

+ 14 - 4
tangle/src/CMakeLists.txt

@@ -1,15 +1,25 @@
+# Global Code
+set(TANGLE_SRC
+    ${CMAKE_CURRENT_SOURCE_DIR}/gates.c
+    PARENT_SCOPE
+)
+
+# Platform Specific Code
 if (DISTRIBUTED)
-    set(TANGLE_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/mpi.c
+    set(TANGLE_PLATFORM_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/communication.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/environment.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/state.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/gate_application.c
         PARENT_SCOPE
     )
 elseif (MULTITHREADED)
-    set(TANGLE_SRC
+    set(TANGLE_PLATFORM_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/omp.c
         PARENT_SCOPE
     )
 else()
-    set(TANGLE_SRC
+    set(TANGLE_PLATFORM_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/cpu.c
         PARENT_SCOPE
     )

+ 58 - 0
tangle/src/distributed/communication.c

@@ -0,0 +1,58 @@
+#include "mpi.h"
+#include "tangle.h"
+
+// A helper to deal with communication that goes above the
+// communication size limits in OpenMPI. This could be removed
+// by using some BigMPI implementation, but it's as easy to just
+// do this.
+void communication_helper(TangleState state, int node, llint idx_1, llint idx_2, int tag_1, int tag_2)
+{
+    // There is a limit to the amount of data that can be transferred.
+    // This limit is 28 for a double, 27 for a long quad.
+    llint message_size = 1LL << 29;
+
+    // It will always be a state.temp_amps sized blocks transferred.
+    // This is assuming the 'half and half' memory structure.
+    // If needed, this can be decomposed.
+    if (state.temp_amps < message_size)
+    {
+        message_size = state.temp_amps;
+    }
+
+    // Message size is a power of two, so breaking up the message can
+    // be done without errors.
+    for (llint i = 0; i < state.temp_amps / message_size; i++)
+    {
+        MPI_Sendrecv(&state.amps[idx_1 + (i * message_size)], message_size, MPI_COMPLEX, node, tag_1,
+                     &state.amps[idx_2 + (i * message_size)], message_size, MPI_COMPLEX, node, tag_2,
+                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+    }
+}
+
+// Send the top half of the state
+// Receive into the temporary storage
+void send_top(TangleState state, int node)
+{
+    communication_helper(state, node, 0, state.node_amps, 1, 2);
+}
+
+// Send the temporary storage
+// Receive into the top half of the state
+void receive_top(TangleState state, int node)
+{
+    communication_helper(state, node, state.node_amps, 0, 3, 4);
+}
+
+// Send the bottom half of the state
+// Receive into the temporary state
+void send_bottom(TangleState state, int node)
+{
+    communication_helper(state, node, state.temp_amps, state.node_amps, 2, 1);
+}
+
+// Send the temporary storage
+// Receive into the bottom half of the state
+void receive_bottom(TangleState state, int node)
+{
+    communication_helper(state, node, state.node_amps, state.temp_amps, 4, 3);
+}

+ 34 - 0
tangle/src/distributed/environment.c

@@ -0,0 +1,34 @@
+#include "tangle.h"
+#include "mpi.h"
+
+TangleEnvironment initialize_environment()
+{
+    TangleEnvironment env;
+
+    int rank, nodes, initialized;
+    MPI_Initialized(&initialized);
+
+    if (!initialized)
+    {
+        MPI_Init(0, 0);
+    }
+
+    MPI_Comm_size(MPI_COMM_WORLD, &nodes);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    env.rank = rank;
+    env.nodes = nodes;
+
+    return env;
+}
+
+void destroy_environment(TangleEnvironment env)
+{
+    int finalized;
+    MPI_Finalized(&finalized);
+
+    if (!finalized)
+    {
+        MPI_Finalize();
+    }
+}

+ 320 - 0
tangle/src/distributed/gate_application.c

@@ -0,0 +1,320 @@
+#include "mpi.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "communication.h"
+#include "tangle.h"
+
+llint is_bit_clear(llint n, llint pos) { return n & (1 << pos); }
+
+void apply_gate(TangleState state, int target, TangleGate u)
+{
+    if (target < state.m)
+    {
+        // Communication isn't required
+        llint stride = 1LL << (target + 1LL);
+        for (llint i = 0; i < state.node_amps; i += stride)
+        {
+            for (llint j = i; j < i + (1LL << target); j++)
+            {
+                llint zero = j;
+                llint one = j + (1LL << target);
+
+                cfloat zero_amp = state.amps[zero];
+                cfloat one_amp = state.amps[one];
+
+                state.amps[zero] = (u.A * zero_amp) + (u.B * one_amp);
+                state.amps[one] = (u.C * zero_amp) + (u.D * one_amp);
+            }
+        }
+    }
+    else
+    {
+        // Communication is required
+        llint stride = 1LL << (target - state.m);
+        llint global_state = state.rank * state.node_amps;
+
+        llint node_i, node_j;
+        if (is_bit_clear(global_state, target) == 0)
+        {
+            node_i = state.rank;
+            node_j = node_i + stride;
+        }
+        else
+        {
+            node_j = state.rank;
+            node_i = node_j - stride;
+        }
+
+        if (node_i == state.rank)
+        {
+            // s1 --> dt
+            // d2 --> st
+            send_top(state, node_j);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i + state.temp_amps;
+                llint one = i;
+
+                cfloat zero_amp = (&state.amps[0])[zero];
+                cfloat one_amp = (&state.amps[state.node_amps])[one];
+
+                (&state.amps[0])[zero] = (u.A * zero_amp) + (u.B * one_amp);
+                (&state.amps[state.node_amps])[one] =
+                    (u.C * zero_amp) + (u.D * one_amp);
+            }
+
+            // st --> d2
+            // dt --> s1
+            receive_top(state, node_j);
+        }
+        else
+        {
+            // s2 --> dt
+            // d1 --> st
+            send_bottom(state, node_i);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i;
+                llint one = i;
+
+                cfloat zero_amp = (&state.amps[state.node_amps])[zero];
+                cfloat one_amp = (&state.amps[0])[one];
+
+                (&state.amps[state.node_amps])[zero] =
+                    (u.A * zero_amp) + (u.B * one_amp);
+                (&state.amps[0])[one] = (u.C * zero_amp) + (u.D * one_amp);
+            }
+
+            // st --> d1
+            // dt --> s2
+            receive_bottom(state, node_i);
+        }
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+void apply_diagonal_gate(TangleState state, int target, TangleGate u)
+{
+    // Communication can always be avoided
+    for (llint i = 0; i < state.node_amps; i++)
+    {
+        if (is_bit_clear(i, target) == 0)
+        {
+            state.amps[i] = u.A * state.amps[i];
+        }
+        else
+        {
+            state.amps[i] = u.D * state.amps[i];
+        }
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+void apply_antidiagonal_gate(TangleState state, int target, TangleGate u)
+{
+    if (target < state.m)
+    {
+        // Communication isn't required
+        llint stride = 1LL << (target + 1LL);
+        for (llint i = 0; i < state.node_amps; i += stride)
+        {
+            for (llint j = i; j < i + (1LL << target); j++)
+            {
+                llint zero = j;
+                llint one = j + (1LL << target);
+
+                cfloat zero_amp = state.amps[zero];
+                cfloat one_amp = state.amps[one];
+
+                state.amps[zero] = u.B * one_amp;
+                state.amps[one] = u.C * zero_amp;
+            }
+        }
+    }
+    else
+    {
+        // Communication is required
+        llint stride = 1LL << (target - state.m);
+        llint global_state = state.rank * state.node_amps;
+
+        llint node_i, node_j;
+        if (is_bit_clear(global_state, target) == 0)
+        {
+            node_i = state.rank;
+            node_j = node_i + stride;
+        }
+        else
+        {
+            node_j = state.rank;
+            node_i = node_j - stride;
+        }
+
+        if (node_i == state.rank)
+        {
+            // s1 --> dt
+            // d2 --> st
+            send_top(state, node_j);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i + state.temp_amps;
+                llint one = i;
+
+                cfloat zero_amp = (&state.amps[0])[zero];
+                cfloat one_amp = (&state.amps[state.node_amps])[one];
+
+                (&state.amps[0])[zero] = u.B * one_amp;
+                (&state.amps[state.node_amps])[one] = u.C * zero_amp;
+            }
+
+            // st --> d2
+            // dt --> s1
+            receive_top(state, node_j);
+        }
+        else
+        {
+            // s2 --> dt
+            // d1 --> st
+            send_bottom(state, node_i);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i;
+                llint one = i;
+
+                cfloat zero_amp = (&state.amps[state.node_amps])[zero];
+                cfloat one_amp = (&state.amps[0])[one];
+
+                (&state.amps[state.node_amps])[zero] = u.B * one_amp;
+                (&state.amps[0])[one] = u.C * zero_amp;
+            }
+
+            // st --> d1
+            // dt --> s2
+            receive_bottom(state, node_i);
+        }
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+void apply_controlled_gate(TangleState state, int control, int target,
+                           TangleGate u)
+{
+    if (target < state.m)
+    {
+        // Communication isn't required
+        llint stride = 1LL << (target + 1LL);
+        // TODO: Better stride for controlled gates
+        for (llint i = 0; i < state.node_amps; i += stride)
+        {
+            for (llint j = i; j < i + (1LL << target); j++)
+            {
+                if (is_bit_clear(j, control) != 0)
+                {
+                    llint zero = j;
+                    llint one = j + (1LL << target);
+
+                    cfloat zero_amp = state.amps[zero];
+                    cfloat one_amp = state.amps[one];
+
+                    state.amps[zero] = (u.A * zero_amp) + (u.B * one_amp);
+                    state.amps[one] = (u.C * zero_amp) + (u.D * one_amp);
+                }
+            }
+        }
+    }
+    else
+    {
+        // Communication is required
+        llint stride = 1LL << (target - state.m);
+        llint global_state = state.rank * state.node_amps;
+
+        llint node_i, node_j;
+
+        if (is_bit_clear(global_state, target) == 0)
+        {
+            node_i = state.rank;
+            node_j = node_i + stride;
+        }
+        else
+        {
+            node_j = state.rank;
+            node_i = node_j - stride;
+        }
+
+        if (node_i == state.rank)
+        {
+            // s1 --> dt
+            // d2 --> st
+            send_top(state, node_j);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i + state.temp_amps;
+                llint one = i;
+
+                if (is_bit_clear(zero, control) != 0)
+                {
+                    cfloat zero_amp = (&state.amps[0])[zero];
+                    cfloat one_amp = (&state.amps[state.node_amps])[one];
+
+                    (&state.amps[0])[zero] = (u.A * zero_amp) + (u.B * one_amp);
+                    (&state.amps[state.node_amps])[one] =
+                        (u.C * zero_amp) + (u.D * one_amp);
+                }
+            }
+
+            // st --> d2
+            // dt --> s1
+            receive_top(state, node_j);
+        }
+        else
+        {
+            // s2 --> dt
+            // d1 --> st
+            send_bottom(state, node_i);
+
+            for (llint i = 0; i < state.temp_amps; i++)
+            {
+                llint zero = i;
+                llint one = i;
+
+                if (is_bit_clear(zero, control) != 0)
+                {
+
+                    cfloat zero_amp = (&state.amps[state.node_amps])[zero];
+                    cfloat one_amp = (&state.amps[0])[one];
+
+                    (&state.amps[state.node_amps])[zero] =
+                        (u.A * zero_amp) + (u.B * one_amp);
+                    (&state.amps[0])[one] = (u.C * zero_amp) + (u.D * one_amp);
+                }
+            }
+            // st --> d1
+            // dt --> s2
+            receive_bottom(state, node_i);
+        }
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+// There is some faster way of doing this
+// TODO: implement
+void apply_controlled_diagonal_gate(TangleState state, int control, int target,
+                                    TangleGate u)
+{
+    apply_controlled_gate(state, control, target, u);
+}
+
+// There is some faster way of doing this
+// TODO: implement
+void apply_controlled_antidiagonal_gate(TangleState state, int control,
+                                        int target, TangleGate u)
+{
+    apply_controlled_gate(state, control, target, u);
+}

+ 55 - 0
tangle/src/distributed/state.c

@@ -0,0 +1,55 @@
+#include "mpi.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "tangle.h"
+
+TangleState create_state(int num_qubits, TangleEnvironment env)
+{
+    TangleState state;
+
+    state.num_qubits = num_qubits;
+    state.rank = env.rank;
+    state.nodes = env.nodes;
+    state.k = log2(env.nodes);
+    state.m = num_qubits - state.k;
+
+    state.node_amps = 1LL << state.m;
+    state.temp_amps = state.node_amps / 2;
+
+    // Allocate Memory
+    llint num_amplitudes = state.node_amps + state.temp_amps;
+    state.amps = malloc(num_amplitudes * sizeof(cfloat));
+
+    for (llint i = 0; i < num_amplitudes; i++)
+    {
+        state.amps[i] = cfloat(0.0, 0.0);
+    }
+
+    if (state.rank == 0)
+    {
+        state.amps[0] = cfloat(1.0, 0.0);
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    return state;
+}
+
+void print_state(TangleState state)
+{
+    for (int id = 0; id < state.nodes; id++)
+    {
+        if (state.rank == id)
+        {
+            for (llint i = 0; i < (1LL << state.m); i++)
+            {
+                cfloat amp = state.amps[i];
+                printf("%lld: %f + i%f\n", state.rank * (1 << state.m) + i, creal(amp), cimag(amp));
+            }
+        }
+
+        MPI_Barrier(MPI_COMM_WORLD);
+    }
+}

+ 23 - 0
tangle/src/gates.c

@@ -0,0 +1,23 @@
+#include <math.h>
+#include "tangle.h"
+
+// Predefined Gates
+TangleGate _X = {.A = 0, .B = 1, .C = 1, .D = 0};
+TangleGate _Z = {.A = 1, .B = 0, .C = 0, .D = -1};
+TangleGate _H = {.A = M_SQRT1_2, .B = M_SQRT1_2, .C = M_SQRT1_2, .D = -M_SQRT1_2};
+
+// Shorthand Gate Application Functions
+void X(TangleState state, int target)
+{
+    apply_antidiagonal_gate(state, target, _X);
+}
+
+void Z(TangleState state, int target)
+{
+    apply_diagonal_gate(state, target, _Z);
+}
+
+void H(TangleState state, int target)
+{
+    apply_gate(state, target, _H);
+}