5 lat temu · c1af6d4afe
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.9)
 
				 project(tangle_project)
			
 
				 
			
 
				 
			
 
				-option(DISTRIBUTED "Use MPI" OFF)
			
 
				+option(DISTRIBUTED "Use MPI" ON)
			
 
				 option(MULTITHREADED "Use OpenMP" OFF)
			
 
				 
			
 
				 
			
--- a/standalone/cli.c
+++ b/standalone/cli.c
@@ -1,32 +1,70 @@
 
				-#include "tangle.h"
			
 
				+// #include "tangle.h"
			
 
				+// #include <stdio.h>
			
 
				+// // #include "mpi.h"
			
 
				+
			
 
				+// int main()
			
 
				+// {
			
 
				+//     // MPI_Init(NULL, NULL);
			
 
				+
			
 
				+//     printf("Welcome\n");
			
 
				+
			
 
				+//     count_to(10);
			
 
				+
			
 
				+//     // MPI_Finalize();
			
 
				+
			
 
				+//     return 0;
			
 
				+// }
			
 
				+
			
 
				+// // #include <stdio.h>
			
 
				+// // #include <omp.h>
			
 
				+
			
 
				+// // int main()
			
 
				+// // {
			
 
				+// // #pragma omp parallel num_threads(3)
			
 
				+// //     {
			
 
				+// //         int id = omp_get_thread_num();
			
 
				+// //         int data = id;
			
 
				+// //         int total = omp_get_num_threads();
			
 
				+// //         printf("Greetings from process %d out of %d with Data %d\n", id, total, data);
			
 
				+// //     }
			
 
				+// //     printf("parallel for ends.\n");
			
 
				+// //     return 0;
			
 
				+// // }
			
 
				+
			
 
				 #include <stdio.h>
			
 
				-// #include "mpi.h"
			
 
				+#include <stdlib.h>
			
 
				+#include "tangle.h"
			
 
				 
			
 
				-int main()
			
 
				+int main(int argc, char *argv[])
			
 
				 {
			
 
				-    // MPI_Init(NULL, NULL);
			
 
				+    TangleEnvironment env = initialize_environment();
			
 
				 
			
 
				-    printf("Welcome\n");
			
 
				+    if (env.rank == 0 && argc <= 1)
			
 
				+    {
			
 
				+        printf("Please enter the number of qubits.\n");
			
 
				+        exit(1);
			
 
				+    }
			
 
				 
			
 
				-    count_to(10);
			
 
				+    int num_qubits = atoi(argv[1]);
			
 
				 
			
 
				-    // MPI_Finalize();
			
 
				+    TangleState state = create_state(num_qubits, env);
			
 
				 
			
 
				-    return 0;
			
 
				-}
			
 
				+    // Apply a hadamard gate to each qubit
			
 
				+    // for (int i = 0; i < num_qubits; i++)
			
 
				+    // {
			
 
				+    //     H(state, i);
			
 
				+    // }
			
 
				+    H(state, 0);
			
 
				 
			
 
				-// #include <stdio.h>
			
 
				-// #include <omp.h>
			
 
				+    // Print the initial amplitude
			
 
				+    if (env.rank == 0)
			
 
				+    {
			
 
				+        cfloat amp = state.amps[0];
			
 
				+        printf("|0> = %f + i%f\n", creal(amp), cimag(amp));
			
 
				+    }
			
 
				 
			
 
				-// int main()
			
 
				-// {
			
 
				-// #pragma omp parallel num_threads(3)
			
 
				-//     {
			
 
				-//         int id = omp_get_thread_num();
			
 
				-//         int data = id;
			
 
				-//         int total = omp_get_num_threads();
			
 
				-//         printf("Greetings from process %d out of %d with Data %d\n", id, total, data);
			
 
				-//     }
			
 
				-//     printf("parallel for ends.\n");
			
 
				-//     return 0;
			
 
				-// }
			
 
				+    // Finish the computation
			
 
				+    destroy_environment(env);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
--- a/tangle/CMakeLists.txt
+++ b/tangle/CMakeLists.txt
@@ -7,9 +7,10 @@ endif()
 
				 
			
 
				 add_subdirectory(src)
			
 
				 message("Tangle Source: ${TANGLE_SRC}")
			
 
				+message("Tangle Platform Specific Source: ${TANGLE_PLATFORM_SRC}")
			
 
				 
			
 
				 
			
 
				-add_library(tangle STATIC ${TANGLE_SRC})
			
 
				+add_library(tangle STATIC ${TANGLE_SRC} ${TANGLE_PLATFORM_SRC})
			
 
				 target_include_directories(tangle PRIVATE src PUBLIC include)
			
 
				 
			
 
				 
			
@@ -18,4 +19,6 @@ if (DISTRIBUTED)
 
				     target_link_libraries(tangle ${MPI_C_LIBRARIES})
			
 
				 elseif (MULTITHREADED)
			
 
				     target_link_libraries(tangle PUBLIC OpenMP::OpenMP_C)
			
 
				-endif()
			
 
				+endif()
			
 
				+
			
 
				+target_link_libraries(tangle m)
			
--- a/tangle/include/communication.h
+++ b/tangle/include/communication.h
@@ -0,0 +1,11 @@
 
				+#ifndef __TANGLE_COMMUNICATION_H
			
 
				+#define __TANGLE_COMMUNICATION_H
			
 
				+
			
 
				+#include "tangle.h"
			
 
				+
			
 
				+void send_top(TangleState state, int node);
			
 
				+void receive_top(TangleState state, int node);
			
 
				+void send_bottom(TangleState state, int node);
			
 
				+void receive_bottom(TangleState state, int node);
			
 
				+
			
 
				+#endif
			
--- a/tangle/include/tangle.h
+++ b/tangle/include/tangle.h
@@ -3,6 +3,71 @@
 
				 #ifndef __TANGLE_H
			
 
				 #define __TANGLE_H
			
 
				 
			
 
				-void count_to(int i);
			
 
				+#include <complex.h>
			
 
				+
			
 
				+// Common Types
			
 
				+typedef float _Complex cfloat;
			
 
				+typedef long long int llint;
			
 
				+
			
 
				+#define cfloat(r, i) ((float)(r) + ((float)(i)) * I)
			
 
				+
			
 
				+// Tangle Types
			
 
				+typedef struct
			
 
				+{
			
 
				+    int rank;
			
 
				+    int nodes;
			
 
				+} TangleEnvironment;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    int num_qubits;
			
 
				+    cfloat *amps;
			
 
				+
			
 
				+    // Distributed Information
			
 
				+    int rank;
			
 
				+    int nodes;
			
 
				+    int k;
			
 
				+    int m;
			
 
				+
			
 
				+    llint node_amps;
			
 
				+    llint temp_amps;
			
 
				+} TangleState;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    cfloat A;
			
 
				+    cfloat B;
			
 
				+    cfloat C;
			
 
				+    cfloat D;
			
 
				+} TangleGate;
			
 
				+
			
 
				+// Environment/Distributed Functions
			
 
				+TangleEnvironment initialize_environment();
			
 
				+void destroy_environment(TangleEnvironment env);
			
 
				+
			
 
				+// Quantum State Functions
			
 
				+TangleState create_state(int num_qubits, TangleEnvironment env);
			
 
				+void print_state(TangleState state);
			
 
				+
			
 
				+// Quantum Gates
			
 
				+void apply_gate(TangleState state, int target, TangleGate u);
			
 
				+void apply_diagonal_gate(TangleState state, int target, TangleGate u);
			
 
				+void apply_antidiagonal_gate(TangleState state, int target, TangleGate u);
			
 
				+void apply_controlled_gate(TangleState state, int control, int target, TangleGate u);
			
 
				+void apply_controlled_diagonal_gate(TangleState state, int control, int target, TangleGate u);
			
 
				+void apply_controlled_antidiagonal_gate(TangleState state, int control, int target, TangleGate u);
			
 
				+
			
 
				+TangleGate _X;
			
 
				+TangleGate _Z;
			
 
				+TangleGate _H;
			
 
				+
			
 
				+void X(TangleState state, int target);
			
 
				+void H(TangleState state, int target);
			
 
				+void Z(TangleState state, int target);
			
 
				+void CZ(TangleState state, int control, int target);
			
 
				+void CX(TangleState state, int control, int target);
			
 
				+
			
 
				+// Measurement
			
 
				+llint measure(TangleState state);
			
 
				 
			
 
				 #endif
			
--- a/tangle/src/CMakeLists.txt
+++ b/tangle/src/CMakeLists.txt
@@ -1,15 +1,25 @@
 
				+# Global Code
			
 
				+set(TANGLE_SRC
			
 
				+    ${CMAKE_CURRENT_SOURCE_DIR}/gates.c
			
 
				+    PARENT_SCOPE
			
 
				+)
			
 
				+
			
 
				+# Platform Specific Code
			
 
				 if (DISTRIBUTED)
			
 
				-    set(TANGLE_SRC
			
 
				-        ${CMAKE_CURRENT_SOURCE_DIR}/mpi.c
			
 
				+    set(TANGLE_PLATFORM_SRC
			
 
				+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/communication.c
			
 
				+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/environment.c
			
 
				+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/state.c
			
 
				+        ${CMAKE_CURRENT_SOURCE_DIR}/distributed/gate_application.c
			
 
				         PARENT_SCOPE
			
 
				     )
			
 
				 elseif (MULTITHREADED)
			
 
				-    set(TANGLE_SRC
			
 
				+    set(TANGLE_PLATFORM_SRC
			
 
				         ${CMAKE_CURRENT_SOURCE_DIR}/omp.c
			
 
				         PARENT_SCOPE
			
 
				     )
			
 
				 else()
			
 
				-    set(TANGLE_SRC
			
 
				+    set(TANGLE_PLATFORM_SRC
			
 
				         ${CMAKE_CURRENT_SOURCE_DIR}/cpu.c
			
 
				         PARENT_SCOPE
			
 
				     )
			
--- a/tangle/src/distributed/communication.c
+++ b/tangle/src/distributed/communication.c
@@ -0,0 +1,58 @@
 
				+#include "mpi.h"
			
 
				+#include "tangle.h"
			
 
				+
			
 
				+// A helper to deal with communication that goes above the
			
 
				+// communication size limits in OpenMPI. This could be removed
			
 
				+// by using some BigMPI implementation, but it's as easy to just
			
 
				+// do this.
			
 
				+void communication_helper(TangleState state, int node, llint idx_1, llint idx_2, int tag_1, int tag_2)
			
 
				+{
			
 
				+    // There is a limit to the amount of data that can be transferred.
			
 
				+    // This limit is 28 for a double, 27 for a long quad.
			
 
				+    llint message_size = 1LL << 29;
			
 
				+
			
 
				+    // It will always be a state.temp_amps sized blocks transferred.
			
 
				+    // This is assuming the 'half and half' memory structure.
			
 
				+    // If needed, this can be decomposed.
			
 
				+    if (state.temp_amps < message_size)
			
 
				+    {
			
 
				+        message_size = state.temp_amps;
			
 
				+    }
			
 
				+
			
 
				+    // Message size is a power of two, so breaking up the message can
			
 
				+    // be done without errors.
			
 
				+    for (llint i = 0; i < state.temp_amps / message_size; i++)
			
 
				+    {
			
 
				+        MPI_Sendrecv(&state.amps[idx_1 + (i * message_size)], message_size, MPI_COMPLEX, node, tag_1,
			
 
				+                     &state.amps[idx_2 + (i * message_size)], message_size, MPI_COMPLEX, node, tag_2,
			
 
				+                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// Send the top half of the state
			
 
				+// Receive into the temporary storage
			
 
				+void send_top(TangleState state, int node)
			
 
				+{
			
 
				+    communication_helper(state, node, 0, state.node_amps, 1, 2);
			
 
				+}
			
 
				+
			
 
				+// Send the temporary storage
			
 
				+// Receive into the top half of the state
			
 
				+void receive_top(TangleState state, int node)
			
 
				+{
			
 
				+    communication_helper(state, node, state.node_amps, 0, 3, 4);
			
 
				+}
			
 
				+
			
 
				+// Send the bottom half of the state
			
 
				+// Receive into the temporary state
			
 
				+void send_bottom(TangleState state, int node)
			
 
				+{
			
 
				+    communication_helper(state, node, state.temp_amps, state.node_amps, 2, 1);
			
 
				+}
			
 
				+
			
 
				+// Send the temporary storage
			
 
				+// Receive into the bottom half of the state
			
 
				+void receive_bottom(TangleState state, int node)
			
 
				+{
			
 
				+    communication_helper(state, node, state.node_amps, state.temp_amps, 4, 3);
			
 
				+}
			
--- a/tangle/src/distributed/environment.c
+++ b/tangle/src/distributed/environment.c
@@ -0,0 +1,34 @@
 
				+#include "tangle.h"
			
 
				+#include "mpi.h"
			
 
				+
			
 
				+TangleEnvironment initialize_environment()
			
 
				+{
			
 
				+    TangleEnvironment env;
			
 
				+
			
 
				+    int rank, nodes, initialized;
			
 
				+    MPI_Initialized(&initialized);
			
 
				+
			
 
				+    if (!initialized)
			
 
				+    {
			
 
				+        MPI_Init(0, 0);
			
 
				+    }
			
 
				+
			
 
				+    MPI_Comm_size(MPI_COMM_WORLD, &nodes);
			
 
				+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
			
 
				+
			
 
				+    env.rank = rank;
			
 
				+    env.nodes = nodes;
			
 
				+
			
 
				+    return env;
			
 
				+}
			
 
				+
			
 
				+void destroy_environment(TangleEnvironment env)
			
 
				+{
			
 
				+    int finalized;
			
 
				+    MPI_Finalized(&finalized);
			
 
				+
			
 
				+    if (!finalized)
			
 
				+    {
			
 
				+        MPI_Finalize();
			
 
				+    }
			
 
				+}
			
--- a/tangle/src/distributed/gate_application.c
+++ b/tangle/src/distributed/gate_application.c
@@ -0,0 +1,320 @@
 
				+#include "mpi.h"
			
 
				+#include <math.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "communication.h"
			
 
				+#include "tangle.h"
			
 
				+
			
 
				+llint is_bit_clear(llint n, llint pos) { return n & (1 << pos); }
			
 
				+
			
 
				+void apply_gate(TangleState state, int target, TangleGate u)
			
 
				+{
			
 
				+    if (target < state.m)
			
 
				+    {
			
 
				+        // Communication isn't required
			
 
				+        llint stride = 1LL << (target + 1LL);
			
 
				+        for (llint i = 0; i < state.node_amps; i += stride)
			
 
				+        {
			
 
				+            for (llint j = i; j < i + (1LL << target); j++)
			
 
				+            {
			
 
				+                llint zero = j;
			
 
				+                llint one = j + (1LL << target);
			
 
				+
			
 
				+                cfloat zero_amp = state.amps[zero];
			
 
				+                cfloat one_amp = state.amps[one];
			
 
				+
			
 
				+                state.amps[zero] = (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                state.amps[one] = (u.C * zero_amp) + (u.D * one_amp);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        // Communication is required
			
 
				+        llint stride = 1LL << (target - state.m);
			
 
				+        llint global_state = state.rank * state.node_amps;
			
 
				+
			
 
				+        llint node_i, node_j;
			
 
				+        if (is_bit_clear(global_state, target) == 0)
			
 
				+        {
			
 
				+            node_i = state.rank;
			
 
				+            node_j = node_i + stride;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            node_j = state.rank;
			
 
				+            node_i = node_j - stride;
			
 
				+        }
			
 
				+
			
 
				+        if (node_i == state.rank)
			
 
				+        {
			
 
				+            // s1 --> dt
			
 
				+            // d2 --> st
			
 
				+            send_top(state, node_j);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i + state.temp_amps;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                cfloat zero_amp = (&state.amps[0])[zero];
			
 
				+                cfloat one_amp = (&state.amps[state.node_amps])[one];
			
 
				+
			
 
				+                (&state.amps[0])[zero] = (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                (&state.amps[state.node_amps])[one] =
			
 
				+                    (u.C * zero_amp) + (u.D * one_amp);
			
 
				+            }
			
 
				+
			
 
				+            // st --> d2
			
 
				+            // dt --> s1
			
 
				+            receive_top(state, node_j);
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // s2 --> dt
			
 
				+            // d1 --> st
			
 
				+            send_bottom(state, node_i);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                cfloat zero_amp = (&state.amps[state.node_amps])[zero];
			
 
				+                cfloat one_amp = (&state.amps[0])[one];
			
 
				+
			
 
				+                (&state.amps[state.node_amps])[zero] =
			
 
				+                    (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                (&state.amps[0])[one] = (u.C * zero_amp) + (u.D * one_amp);
			
 
				+            }
			
 
				+
			
 
				+            // st --> d1
			
 
				+            // dt --> s2
			
 
				+            receive_bottom(state, node_i);
			
 
				+        }
			
 
				+    }
			
 
				+    MPI_Barrier(MPI_COMM_WORLD);
			
 
				+}
			
 
				+
			
 
				+void apply_diagonal_gate(TangleState state, int target, TangleGate u)
			
 
				+{
			
 
				+    // Communication can always be avoided
			
 
				+    for (llint i = 0; i < state.node_amps; i++)
			
 
				+    {
			
 
				+        if (is_bit_clear(i, target) == 0)
			
 
				+        {
			
 
				+            state.amps[i] = u.A * state.amps[i];
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            state.amps[i] = u.D * state.amps[i];
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    MPI_Barrier(MPI_COMM_WORLD);
			
 
				+}
			
 
				+
			
 
				+void apply_antidiagonal_gate(TangleState state, int target, TangleGate u)
			
 
				+{
			
 
				+    if (target < state.m)
			
 
				+    {
			
 
				+        // Communication isn't required
			
 
				+        llint stride = 1LL << (target + 1LL);
			
 
				+        for (llint i = 0; i < state.node_amps; i += stride)
			
 
				+        {
			
 
				+            for (llint j = i; j < i + (1LL << target); j++)
			
 
				+            {
			
 
				+                llint zero = j;
			
 
				+                llint one = j + (1LL << target);
			
 
				+
			
 
				+                cfloat zero_amp = state.amps[zero];
			
 
				+                cfloat one_amp = state.amps[one];
			
 
				+
			
 
				+                state.amps[zero] = u.B * one_amp;
			
 
				+                state.amps[one] = u.C * zero_amp;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        // Communication is required
			
 
				+        llint stride = 1LL << (target - state.m);
			
 
				+        llint global_state = state.rank * state.node_amps;
			
 
				+
			
 
				+        llint node_i, node_j;
			
 
				+        if (is_bit_clear(global_state, target) == 0)
			
 
				+        {
			
 
				+            node_i = state.rank;
			
 
				+            node_j = node_i + stride;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            node_j = state.rank;
			
 
				+            node_i = node_j - stride;
			
 
				+        }
			
 
				+
			
 
				+        if (node_i == state.rank)
			
 
				+        {
			
 
				+            // s1 --> dt
			
 
				+            // d2 --> st
			
 
				+            send_top(state, node_j);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i + state.temp_amps;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                cfloat zero_amp = (&state.amps[0])[zero];
			
 
				+                cfloat one_amp = (&state.amps[state.node_amps])[one];
			
 
				+
			
 
				+                (&state.amps[0])[zero] = u.B * one_amp;
			
 
				+                (&state.amps[state.node_amps])[one] = u.C * zero_amp;
			
 
				+            }
			
 
				+
			
 
				+            // st --> d2
			
 
				+            // dt --> s1
			
 
				+            receive_top(state, node_j);
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // s2 --> dt
			
 
				+            // d1 --> st
			
 
				+            send_bottom(state, node_i);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                cfloat zero_amp = (&state.amps[state.node_amps])[zero];
			
 
				+                cfloat one_amp = (&state.amps[0])[one];
			
 
				+
			
 
				+                (&state.amps[state.node_amps])[zero] = u.B * one_amp;
			
 
				+                (&state.amps[0])[one] = u.C * zero_amp;
			
 
				+            }
			
 
				+
			
 
				+            // st --> d1
			
 
				+            // dt --> s2
			
 
				+            receive_bottom(state, node_i);
			
 
				+        }
			
 
				+    }
			
 
				+    MPI_Barrier(MPI_COMM_WORLD);
			
 
				+}
			
 
				+
			
 
				+void apply_controlled_gate(TangleState state, int control, int target,
			
 
				+                           TangleGate u)
			
 
				+{
			
 
				+    if (target < state.m)
			
 
				+    {
			
 
				+        // Communication isn't required
			
 
				+        llint stride = 1LL << (target + 1LL);
			
 
				+        // TODO: Better stride for controlled gates
			
 
				+        for (llint i = 0; i < state.node_amps; i += stride)
			
 
				+        {
			
 
				+            for (llint j = i; j < i + (1LL << target); j++)
			
 
				+            {
			
 
				+                if (is_bit_clear(j, control) != 0)
			
 
				+                {
			
 
				+                    llint zero = j;
			
 
				+                    llint one = j + (1LL << target);
			
 
				+
			
 
				+                    cfloat zero_amp = state.amps[zero];
			
 
				+                    cfloat one_amp = state.amps[one];
			
 
				+
			
 
				+                    state.amps[zero] = (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                    state.amps[one] = (u.C * zero_amp) + (u.D * one_amp);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        // Communication is required
			
 
				+        llint stride = 1LL << (target - state.m);
			
 
				+        llint global_state = state.rank * state.node_amps;
			
 
				+
			
 
				+        llint node_i, node_j;
			
 
				+
			
 
				+        if (is_bit_clear(global_state, target) == 0)
			
 
				+        {
			
 
				+            node_i = state.rank;
			
 
				+            node_j = node_i + stride;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            node_j = state.rank;
			
 
				+            node_i = node_j - stride;
			
 
				+        }
			
 
				+
			
 
				+        if (node_i == state.rank)
			
 
				+        {
			
 
				+            // s1 --> dt
			
 
				+            // d2 --> st
			
 
				+            send_top(state, node_j);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i + state.temp_amps;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                if (is_bit_clear(zero, control) != 0)
			
 
				+                {
			
 
				+                    cfloat zero_amp = (&state.amps[0])[zero];
			
 
				+                    cfloat one_amp = (&state.amps[state.node_amps])[one];
			
 
				+
			
 
				+                    (&state.amps[0])[zero] = (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                    (&state.amps[state.node_amps])[one] =
			
 
				+                        (u.C * zero_amp) + (u.D * one_amp);
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            // st --> d2
			
 
				+            // dt --> s1
			
 
				+            receive_top(state, node_j);
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // s2 --> dt
			
 
				+            // d1 --> st
			
 
				+            send_bottom(state, node_i);
			
 
				+
			
 
				+            for (llint i = 0; i < state.temp_amps; i++)
			
 
				+            {
			
 
				+                llint zero = i;
			
 
				+                llint one = i;
			
 
				+
			
 
				+                if (is_bit_clear(zero, control) != 0)
			
 
				+                {
			
 
				+
			
 
				+                    cfloat zero_amp = (&state.amps[state.node_amps])[zero];
			
 
				+                    cfloat one_amp = (&state.amps[0])[one];
			
 
				+
			
 
				+                    (&state.amps[state.node_amps])[zero] =
			
 
				+                        (u.A * zero_amp) + (u.B * one_amp);
			
 
				+                    (&state.amps[0])[one] = (u.C * zero_amp) + (u.D * one_amp);
			
 
				+                }
			
 
				+            }
			
 
				+            // st --> d1
			
 
				+            // dt --> s2
			
 
				+            receive_bottom(state, node_i);
			
 
				+        }
			
 
				+    }
			
 
				+    MPI_Barrier(MPI_COMM_WORLD);
			
 
				+}
			
 
				+
			
 
				+// There is some faster way of doing this
			
 
				+// TODO: implement
			
 
				+void apply_controlled_diagonal_gate(TangleState state, int control, int target,
			
 
				+                                    TangleGate u)
			
 
				+{
			
 
				+    apply_controlled_gate(state, control, target, u);
			
 
				+}
			
 
				+
			
 
				+// There is some faster way of doing this
			
 
				+// TODO: implement
			
 
				+void apply_controlled_antidiagonal_gate(TangleState state, int control,
			
 
				+                                        int target, TangleGate u)
			
 
				+{
			
 
				+    apply_controlled_gate(state, control, target, u);
			
 
				+}
			
--- a/tangle/src/distributed/state.c
+++ b/tangle/src/distributed/state.c
@@ -0,0 +1,55 @@
 
				+#include "mpi.h"
			
 
				+#include <math.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "tangle.h"
			
 
				+
			
 
				+TangleState create_state(int num_qubits, TangleEnvironment env)
			
 
				+{
			
 
				+    TangleState state;
			
 
				+
			
 
				+    state.num_qubits = num_qubits;
			
 
				+    state.rank = env.rank;
			
 
				+    state.nodes = env.nodes;
			
 
				+    state.k = log2(env.nodes);
			
 
				+    state.m = num_qubits - state.k;
			
 
				+
			
 
				+    state.node_amps = 1LL << state.m;
			
 
				+    state.temp_amps = state.node_amps / 2;
			
 
				+
			
 
				+    // Allocate Memory
			
 
				+    llint num_amplitudes = state.node_amps + state.temp_amps;
			
 
				+    state.amps = malloc(num_amplitudes * sizeof(cfloat));
			
 
				+
			
 
				+    for (llint i = 0; i < num_amplitudes; i++)
			
 
				+    {
			
 
				+        state.amps[i] = cfloat(0.0, 0.0);
			
 
				+    }
			
 
				+
			
 
				+    if (state.rank == 0)
			
 
				+    {
			
 
				+        state.amps[0] = cfloat(1.0, 0.0);
			
 
				+    }
			
 
				+
			
 
				+    MPI_Barrier(MPI_COMM_WORLD);
			
 
				+
			
 
				+    return state;
			
 
				+}
			
 
				+
			
 
				+void print_state(TangleState state)
			
 
				+{
			
 
				+    for (int id = 0; id < state.nodes; id++)
			
 
				+    {
			
 
				+        if (state.rank == id)
			
 
				+        {
			
 
				+            for (llint i = 0; i < (1LL << state.m); i++)
			
 
				+            {
			
 
				+                cfloat amp = state.amps[i];
			
 
				+                printf("%lld: %f + i%f\n", state.rank * (1 << state.m) + i, creal(amp), cimag(amp));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        MPI_Barrier(MPI_COMM_WORLD);
			
 
				+    }
			
 
				+}
			
--- a/tangle/src/gates.c
+++ b/tangle/src/gates.c
@@ -0,0 +1,23 @@
 
				+#include <math.h>
			
 
				+#include "tangle.h"
			
 
				+
			
 
				+// Predefined Gates
			
 
				+TangleGate _X = {.A = 0, .B = 1, .C = 1, .D = 0};
			
 
				+TangleGate _Z = {.A = 1, .B = 0, .C = 0, .D = -1};
			
 
				+TangleGate _H = {.A = M_SQRT1_2, .B = M_SQRT1_2, .C = M_SQRT1_2, .D = -M_SQRT1_2};
			
 
				+
			
 
				+// Shorthand Gate Application Functions
			
 
				+void X(TangleState state, int target)
			
 
				+{
			
 
				+    apply_antidiagonal_gate(state, target, _X);
			
 
				+}
			
 
				+
			
 
				+void Z(TangleState state, int target)
			
 
				+{
			
 
				+    apply_diagonal_gate(state, target, _Z);
			
 
				+}
			
 
				+
			
 
				+void H(TangleState state, int target)
			
 
				+{
			
 
				+    apply_gate(state, target, _H);
			
 
				+}