miniWeather_openacc.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. //////////////////////////////////////////////////////////////////////////////////////////
  2. // miniWeather
  3. // Author: Matt Norman <normanmr@ornl.gov> , Oak Ridge National Laboratory
  4. // This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
  5. // For documentation, please see the attached documentation in the "documentation" folder
  6. //////////////////////////////////////////////////////////////////////////////////////////
  7. /*
  8. ** Copyright (c) 2018, National Center for Computational Sciences, Oak Ridge National Laboratory. All rights reserved.
  9. **
  10. ** Portions Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
  11. */
  12. #include <stdlib.h>
  13. #include <math.h>
  14. #include <stdio.h>
  15. #include <nvtx3/nvToolsExt.h>
  16. const double pi = 3.14159265358979323846264338327; //Pi
  17. const double grav = 9.8; //Gravitational acceleration (m / s^2)
  18. const double cp = 1004.; //Specific heat of dry air at constant pressure
  19. const double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
  20. const double p0 = 1.e5; //Standard pressure at the surface in Pascals
  21. const double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
  22. const double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
  23. //Define domain and stability-related constants
  24. const double xlen = 2.e4; //Length of the domain in the x-direction (meters)
  25. const double zlen = 1.e4; //Length of the domain in the z-direction (meters)
  26. const double hv_beta = 0.25; //How strong to diffuse the solution: hv_beta \in [0:1]
  27. const double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
  28. const double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
  29. const int hs = 2; //"Halo" size: number of cells needed for a full "stencil" of information for reconstruction
  30. const int sten_size = 4; //Size of the stencil used for interpolation
  31. //Parameters for indexing and flags
  32. const int NUM_VARS = 4; //Number of fluid state variables
  33. const int ID_DENS = 0; //index for density ("rho")
  34. const int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
  35. const int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
  36. const int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
  37. const int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
  38. const int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
  39. const int nqpoints = 3;
  40. double qpoints[] = {0.112701665379258311482073460022E0, 0.500000000000000000000000000000E0, 0.887298334620741688517926539980E0};
  41. double qweights[] = {0.277777777777777777777777777779E0, 0.444444444444444444444444444444E0, 0.277777777777777777777777777779E0};
  42. ///////////////////////////////////////////////////////////////////////////////////////
  43. // Variables that are initialized but remain static over the course of the simulation
  44. ///////////////////////////////////////////////////////////////////////////////////////
  45. double sim_time; //total simulation time in seconds
  46. double output_freq; //frequency to perform output in seconds
  47. double dt; //Model time step (seconds)
  48. int nx, nz; //Number of local grid cells in the x- and z- dimensions
  49. double dx, dz; //Grid space length in x- and z-dimension (meters)
  50. int nx_glob, nz_glob; //Number of total grid cells in the x- and z- dimensions
  51. int i_beg, k_beg; //beginning index in the x- and z-directions
  52. int nranks, myrank; //my rank id
  53. int left_rank, right_rank; //Rank IDs that exist to my left and right in the global domain
  54. double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
  55. double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
  56. double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
  57. double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
  58. double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
  59. ///////////////////////////////////////////////////////////////////////////////////////
  60. // Variables that are dynamics over the course of the simulation
  61. ///////////////////////////////////////////////////////////////////////////////////////
  62. double etime; //Elapsed model time
  63. double output_counter; //Helps determine when it's time to do output
  64. //Runtime variable arrays
  65. double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
  66. double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
  67. double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
  68. double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
  69. int num_out = 0; //The number of outputs performed so far
  70. int direction_switch = 1;
  71. //How is this not in the standard?!
  72. double dmin(double a, double b)
  73. {
  74. if (a < b)
  75. {
  76. return a;
  77. }
  78. else
  79. {
  80. return b;
  81. }
  82. };
  83. //Declaring the functions defined after "main"
  84. void init();
  85. void finalize();
  86. void injection(double x, double z, double &r, double &u, double &w, double &t, double &hr, double &ht);
  87. void hydro_const_theta(double z, double &r, double &t);
  88. void output(double *state, double etime);
  89. void ncwrap(int ierr, int line);
  90. void perform_timestep(double *state, double *state_tmp, double *flux, double *tend, double dt);
  91. void semi_discrete_step(double *state_init, double *state_forcing, double *state_out, double dt, int dir, double *flux, double *tend);
  92. void compute_tendencies_x(double *state, double *flux, double *tend);
  93. void compute_tendencies_z(double *state, double *flux, double *tend);
  94. void set_halo_values_x(double *state);
  95. void set_halo_values_z(double *state);
  96. ///////////////////////////////////////////////////////////////////////////////////////
  97. // THE MAIN PROGRAM STARTS HERE
  98. ///////////////////////////////////////////////////////////////////////////////////////
  99. int main(int argc, char **argv)
  100. {
  101. ///////////////////////////////////////////////////////////////////////////////////////
  102. // BEGIN USER-CONFIGURABLE PARAMETERS
  103. ///////////////////////////////////////////////////////////////////////////////////////
  104. //The x-direction length is twice as long as the z-direction length
  105. //So, you'll want to have nx_glob be twice as large as nz_glob
  106. nx_glob = 40; //Number of total cells in the x-dirction
  107. nz_glob = 20; //Number of total cells in the z-dirction
  108. sim_time = 1000; //How many seconds to run the simulation
  109. output_freq = 100; //How frequently to output data to file (in seconds)
  110. ///////////////////////////////////////////////////////////////////////////////////////
  111. // END USER-CONFIGURABLE PARAMETERS
  112. ///////////////////////////////////////////////////////////////////////////////////////
  113. if (argc == 4)
  114. {
  115. printf("The arguments supplied are %s %s %s\n", argv[1], argv[2], argv[3]);
  116. nx_glob = atoi(argv[1]);
  117. nz_glob = atoi(argv[2]);
  118. sim_time = atoi(argv[3]);
  119. }
  120. else
  121. {
  122. printf("Using default values ...\n");
  123. }
  124. nvtxRangePushA("Total");
  125. init();
  126. #pragma acc data copyin(state_tmp[(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS], hy_dens_cell[nz + 2 * hs], hy_dens_theta_cell[nz + 2 * hs], hy_dens_int[nz + 1], hy_dens_theta_int[nz + 1], hy_pressure_int[nz + 1]) \
  127. create(flux[(nz + 1) * (nx + 1) * NUM_VARS], tend[nz * nx * NUM_VARS]) \
  128. copy(state [0:(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
  129. {
  130. //Output the initial state
  131. //output(state, etime);
  132. ////////////////////////////////////////////////////
  133. // MAIN TIME STEP LOOP
  134. ////////////////////////////////////////////////////
  135. nvtxRangePushA("while");
  136. while (etime < sim_time)
  137. {
  138. //If the time step leads to exceeding the simulation time, shorten it for the last step
  139. if (etime + dt > sim_time)
  140. {
  141. dt = sim_time - etime;
  142. }
  143. //Perform a single time step
  144. nvtxRangePushA("perform_timestep");
  145. perform_timestep(state, state_tmp, flux, tend, dt);
  146. nvtxRangePop();
  147. //Inform the user
  148. printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
  149. //Update the elapsed time and output counter
  150. etime = etime + dt;
  151. output_counter = output_counter + dt;
  152. //If it's time for output, reset the counter, and do output
  153. if (output_counter >= output_freq)
  154. {
  155. output_counter = output_counter - output_freq;
  156. #pragma acc update host(state[(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
  157. //output(state, etime);
  158. }
  159. }
  160. nvtxRangePop();
  161. }
  162. finalize();
  163. nvtxRangePop();
  164. }
  165. //Performs a single dimensionally split time step using a simple low-storate three-stage Runge-Kutta time integrator
  166. //The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
  167. //order of directions is alternated each time step.
  168. //The Runge-Kutta method used here is defined as follows:
  169. // q* = q[n] + dt/3 * rhs(q[n])
  170. // q** = q[n] + dt/2 * rhs(q* )
  171. // q[n+1] = q[n] + dt/1 * rhs(q** )
  172. void perform_timestep(double *state, double *state_tmp, double *flux, double *tend, double dt)
  173. {
  174. if (direction_switch)
  175. {
  176. //x-direction first
  177. semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
  178. semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
  179. semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
  180. //z-direction second
  181. semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
  182. semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
  183. semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
  184. }
  185. else
  186. {
  187. //z-direction second
  188. semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
  189. semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
  190. semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
  191. //x-direction first
  192. semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
  193. semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
  194. semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
  195. }
  196. if (direction_switch)
  197. {
  198. direction_switch = 0;
  199. }
  200. else
  201. {
  202. direction_switch = 1;
  203. }
  204. }
  205. //Perform a single semi-discretized step in time with the form:
  206. //state_out = state_init + dt * rhs(state_forcing)
  207. //Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
  208. void semi_discrete_step(double *state_init, double *state_forcing, double *state_out, double dt, int dir, double *flux, double *tend)
  209. {
  210. int i, k, ll, inds, indt;
  211. if (dir == DIR_X)
  212. {
  213. //Set the halo values in the x-direction
  214. set_halo_values_x(state_forcing);
  215. //Compute the time tendencies for the fluid state in the x-direction
  216. compute_tendencies_x(state_forcing, flux, tend);
  217. }
  218. else if (dir == DIR_Z)
  219. {
  220. //Set the halo values in the z-direction
  221. set_halo_values_z(state_forcing);
  222. //Compute the time tendencies for the fluid state in the z-direction
  223. compute_tendencies_z(state_forcing, flux, tend);
  224. }
  225. /////////////////////////////////////////////////
  226. // TODO: THREAD ME
  227. /////////////////////////////////////////////////
  228. //Apply the tendencies to the fluid state
  229. #pragma acc parallel loop collapse(3) private(inds, indt) default(present)
  230. for (ll = 0; ll < NUM_VARS; ll++)
  231. {
  232. for (k = 0; k < nz; k++)
  233. {
  234. for (i = 0; i < nx; i++)
  235. {
  236. inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
  237. indt = ll * nz * nx + k * nx + i;
  238. state_out[inds] = state_init[inds] + dt * tend[indt];
  239. }
  240. }
  241. }
  242. }
  243. //Compute the time tendencies of the fluid state using forcing in the x-direction
  244. //First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity)
  245. //Then, compute the tendencies using those fluxes
  246. void compute_tendencies_x(double *state, double *flux, double *tend)
  247. {
  248. int i, k, ll, s, inds, indf1, indf2, indt;
  249. double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
  250. //Compute the hyperviscosity coeficient
  251. hv_coef = -hv_beta * dx / (16 * dt);
  252. /////////////////////////////////////////////////
  253. // TODO: THREAD ME
  254. /////////////////////////////////////////////////
  255. //Compute fluxes in the x-direction for each cell
  256. #pragma acc parallel loop collapse(2) private(ll, s, inds, stencil, vals, d3_vals, r, u, w, t, p) default(present)
  257. for (k = 0; k < nz; k++)
  258. {
  259. for (i = 0; i < nx + 1; i++)
  260. {
  261. //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
  262. for (ll = 0; ll < NUM_VARS; ll++)
  263. {
  264. for (s = 0; s < sten_size; s++)
  265. {
  266. inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + s;
  267. stencil[s] = state[inds];
  268. }
  269. //Fourth-order-accurate interpolation of the state
  270. vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 + 7 * stencil[2] / 12 - stencil[3] / 12;
  271. //First-order-accurate interpolation of the third spatial derivative of the state (for artificial viscosity)
  272. d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
  273. }
  274. //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively)
  275. r = vals[ID_DENS] + hy_dens_cell[k + hs];
  276. u = vals[ID_UMOM] / r;
  277. w = vals[ID_WMOM] / r;
  278. t = (vals[ID_RHOT] + hy_dens_theta_cell[k + hs]) / r;
  279. p = C0 * pow((r * t), gamm);
  280. //Compute the flux vector
  281. flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * u - hv_coef * d3_vals[ID_DENS];
  282. flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * u * u + p - hv_coef * d3_vals[ID_UMOM];
  283. flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * u * w - hv_coef * d3_vals[ID_WMOM];
  284. flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * u * t - hv_coef * d3_vals[ID_RHOT];
  285. }
  286. }
  287. /////////////////////////////////////////////////
  288. // TODO: THREAD ME
  289. /////////////////////////////////////////////////
  290. //Use the fluxes to compute tendencies for each cell
  291. #pragma acc parallel loop collapse(3) private(indt, indf1, indf2) default(present)
  292. for (ll = 0; ll < NUM_VARS; ll++)
  293. {
  294. for (k = 0; k < nz; k++)
  295. {
  296. for (i = 0; i < nx; i++)
  297. {
  298. indt = ll * nz * nx + k * nx + i;
  299. indf1 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i;
  300. indf2 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i + 1;
  301. tend[indt] = -(flux[indf2] - flux[indf1]) / dx;
  302. }
  303. }
  304. }
  305. }
  306. //Compute the time tendencies of the fluid state using forcing in the z-direction
  307. //First, compute the flux vector at each cell interface in the z-direction (including hyperviscosity)
  308. //Then, compute the tendencies using those fluxes
  309. void compute_tendencies_z(double *state, double *flux, double *tend)
  310. {
  311. int i, k, ll, s, inds, indf1, indf2, indt;
  312. double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
  313. //Compute the hyperviscosity coeficient
  314. hv_coef = -hv_beta * dx / (16 * dt);
  315. /////////////////////////////////////////////////
  316. // TODO: THREAD ME
  317. /////////////////////////////////////////////////
  318. //Compute fluxes in the x-direction for each cell
  319. #pragma acc parallel loop collapse(2) private(ll, s, inds, stencil, vals, d3_vals, r, u, w, t, p) default(present)
  320. for (k = 0; k < nz + 1; k++)
  321. {
  322. for (i = 0; i < nx; i++)
  323. {
  324. //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
  325. for (ll = 0; ll < NUM_VARS; ll++)
  326. {
  327. for (s = 0; s < sten_size; s++)
  328. {
  329. inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + s) * (nx + 2 * hs) + i + hs;
  330. stencil[s] = state[inds];
  331. }
  332. //Fourth-order-accurate interpolation of the state
  333. vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 + 7 * stencil[2] / 12 - stencil[3] / 12;
  334. //First-order-accurate interpolation of the third spatial derivative of the state
  335. d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
  336. }
  337. //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively)
  338. r = vals[ID_DENS] + hy_dens_int[k];
  339. u = vals[ID_UMOM] / r;
  340. w = vals[ID_WMOM] / r;
  341. t = (vals[ID_RHOT] + hy_dens_theta_int[k]) / r;
  342. p = C0 * pow((r * t), gamm) - hy_pressure_int[k];
  343. //Compute the flux vector with hyperviscosity
  344. flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * w - hv_coef * d3_vals[ID_DENS];
  345. flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * w * u - hv_coef * d3_vals[ID_UMOM];
  346. flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * w * w + p - hv_coef * d3_vals[ID_WMOM];
  347. flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] = r * w * t - hv_coef * d3_vals[ID_RHOT];
  348. }
  349. }
  350. /////////////////////////////////////////////////
  351. // TODO: THREAD ME
  352. /////////////////////////////////////////////////
  353. //Use the fluxes to compute tendencies for each cell
  354. #pragma acc parallel loop collapse(3) private(indt, indf1, indf2) default(present)
  355. for (ll = 0; ll < NUM_VARS; ll++)
  356. {
  357. for (k = 0; k < nz; k++)
  358. {
  359. for (i = 0; i < nx; i++)
  360. {
  361. indt = ll * nz * nx + k * nx + i;
  362. indf1 = ll * (nz + 1) * (nx + 1) + (k) * (nx + 1) + i;
  363. indf2 = ll * (nz + 1) * (nx + 1) + (k + 1) * (nx + 1) + i;
  364. tend[indt] = -(flux[indf2] - flux[indf1]) / dz;
  365. if (ll == ID_WMOM)
  366. {
  367. inds = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
  368. tend[indt] = tend[indt] - state[inds] * grav;
  369. }
  370. }
  371. }
  372. }
  373. }
  374. void set_halo_values_x(double *state)
  375. {
  376. int k, ll, ind_r, ind_u, ind_t, i;
  377. double z;
  378. #pragma acc parallel loop collapse(2) default(present)
  379. for (ll = 0; ll < NUM_VARS; ll++)
  380. {
  381. for (k = 0; k < nz; k++)
  382. {
  383. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + 0] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + nx + hs - 2];
  384. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + nx + hs - 1];
  385. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + nx + hs] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + hs];
  386. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + nx + hs + 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + hs + 1];
  387. }
  388. }
  389. ////////////////////////////////////////////////////
  390. if (myrank == 0)
  391. {
  392. for (k = 0; k < nz; k++)
  393. {
  394. for (i = 0; i < hs; i++)
  395. {
  396. z = (k_beg + k + 0.5) * dz;
  397. if (abs(z - 3 * zlen / 4) <= zlen / 16)
  398. {
  399. ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i;
  400. ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i;
  401. ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i;
  402. state[ind_u] = (state[ind_r] + hy_dens_cell[k + hs]) * 50.;
  403. state[ind_t] = (state[ind_r] + hy_dens_cell[k + hs]) * 298. - hy_dens_theta_cell[k + hs];
  404. }
  405. }
  406. }
  407. }
  408. }
  409. //Set this task's halo values in the z-direction.
  410. //decomposition in the vertical direction.
  411. void set_halo_values_z(double *state)
  412. {
  413. int i, ll;
  414. const double mnt_width = xlen / 8;
  415. double x, xloc, mnt_deriv;
  416. /////////////////////////////////////////////////
  417. // TODO: THREAD ME
  418. /////////////////////////////////////////////////
  419. #pragma acc parallel loop private(x, xloc, mnt_deriv) default(present)
  420. for (ll = 0; ll < NUM_VARS; ll++)
  421. {
  422. for (i = 0; i < nx + 2 * hs; i++)
  423. {
  424. if (ll == ID_WMOM)
  425. {
  426. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] = 0.;
  427. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] = 0.;
  428. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) + i] = 0.;
  429. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs + 1) * (nx + 2 * hs) + i] = 0.;
  430. }
  431. else
  432. {
  433. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) + i];
  434. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) + i];
  435. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) + i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs - 1) * (nx + 2 * hs) + i];
  436. state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs + 1) * (nx + 2 * hs) + i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs - 1) * (nx + 2 * hs) + i];
  437. }
  438. }
  439. }
  440. }
  441. void init()
  442. {
  443. int i, k, ii, kk, ll, inds, i_end;
  444. double x, z, r, u, w, t, hr, ht, nper;
  445. //Set the cell grid size
  446. dx = xlen / nx_glob;
  447. dz = zlen / nz_glob;
  448. nranks = 1;
  449. myrank = 0;
  450. // For simpler version, replace i_beg = 0, nx = nx_glob, left_rank = 0, right_rank = 0;
  451. nper = ((double)nx_glob) / nranks;
  452. i_beg = round(nper * (myrank));
  453. i_end = round(nper * ((myrank) + 1)) - 1;
  454. nx = i_end - i_beg + 1;
  455. left_rank = myrank - 1;
  456. if (left_rank == -1)
  457. left_rank = nranks - 1;
  458. right_rank = myrank + 1;
  459. if (right_rank == nranks)
  460. right_rank = 0;
  461. ////////////////////////////////////////////////////////////////////////////////
  462. ////////////////////////////////////////////////////////////////////////////////
  463. // YOU DON'T NEED TO ALTER ANYTHING BELOW THIS POINT IN THE CODE
  464. ////////////////////////////////////////////////////////////////////////////////
  465. ////////////////////////////////////////////////////////////////////////////////
  466. k_beg = 0;
  467. nz = nz_glob;
  468. //Allocate the model data
  469. state = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS * sizeof(double));
  470. state_tmp = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS * sizeof(double));
  471. flux = (double *)malloc((nx + 1) * (nz + 1) * NUM_VARS * sizeof(double));
  472. tend = (double *)malloc(nx * nz * NUM_VARS * sizeof(double));
  473. hy_dens_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
  474. hy_dens_theta_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
  475. hy_dens_int = (double *)malloc((nz + 1) * sizeof(double));
  476. hy_dens_theta_int = (double *)malloc((nz + 1) * sizeof(double));
  477. hy_pressure_int = (double *)malloc((nz + 1) * sizeof(double));
  478. //Define the maximum stable time step based on an assumed maximum wind speed
  479. dt = dmin(dx, dz) / max_speed * cfl;
  480. //Set initial elapsed model time and output_counter to zero
  481. etime = 0.;
  482. output_counter = 0.;
  483. // Display grid information
  484. printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
  485. printf("dx,dz: %lf %lf\n", dx, dz);
  486. printf("dt: %lf\n", dt);
  487. //////////////////////////////////////////////////////////////////////////
  488. // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
  489. //////////////////////////////////////////////////////////////////////////
  490. for (k = 0; k < nz + 2 * hs; k++)
  491. {
  492. for (i = 0; i < nx + 2 * hs; i++)
  493. {
  494. //Initialize the state to zero
  495. for (ll = 0; ll < NUM_VARS; ll++)
  496. {
  497. inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  498. state[inds] = 0.;
  499. }
  500. //Use Gauss-Legendre quadrature to initialize a hydrostatic balance + temperature perturbation
  501. for (kk = 0; kk < nqpoints; kk++)
  502. {
  503. for (ii = 0; ii < nqpoints; ii++)
  504. {
  505. //Compute the x,z location within the global domain based on cell and quadrature index
  506. x = (i_beg + i - hs + 0.5) * dx + (qpoints[ii] - 0.5) * dx;
  507. z = (k_beg + k - hs + 0.5) * dz + (qpoints[kk] - 0.5) * dz;
  508. //Set the fluid state based on the user's specification (default is injection in this example)
  509. injection(x, z, r, u, w, t, hr, ht);
  510. //Store into the fluid state array
  511. inds = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  512. state[inds] = state[inds] + r * qweights[ii] * qweights[kk];
  513. inds = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  514. state[inds] = state[inds] + (r + hr) * u * qweights[ii] * qweights[kk];
  515. inds = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  516. state[inds] = state[inds] + (r + hr) * w * qweights[ii] * qweights[kk];
  517. inds = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  518. state[inds] = state[inds] + ((r + hr) * (t + ht) - hr * ht) * qweights[ii] * qweights[kk];
  519. }
  520. }
  521. for (ll = 0; ll < NUM_VARS; ll++)
  522. {
  523. inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
  524. state_tmp[inds] = state[inds];
  525. }
  526. }
  527. }
  528. //Compute the hydrostatic background state over vertical cell averages
  529. for (k = 0; k < nz + 2 * hs; k++)
  530. {
  531. hy_dens_cell[k] = 0.;
  532. hy_dens_theta_cell[k] = 0.;
  533. for (kk = 0; kk < nqpoints; kk++)
  534. {
  535. z = (k_beg + k - hs + 0.5) * dz;
  536. //Set the fluid state based on the user's specification (default is injection in this example)
  537. injection(0., z, r, u, w, t, hr, ht);
  538. hy_dens_cell[k] = hy_dens_cell[k] + hr * qweights[kk];
  539. hy_dens_theta_cell[k] = hy_dens_theta_cell[k] + hr * ht * qweights[kk];
  540. }
  541. }
  542. //Compute the hydrostatic background state at vertical cell interfaces
  543. for (k = 0; k < nz + 1; k++)
  544. {
  545. z = (k_beg + k) * dz;
  546. //Set the fluid state based on the user's specification (default is injection in this example)
  547. injection(0., z, r, u, w, t, hr, ht);
  548. hy_dens_int[k] = hr;
  549. hy_dens_theta_int[k] = hr * ht;
  550. hy_pressure_int[k] = C0 * pow((hr * ht), gamm);
  551. }
  552. }
  553. //This test case is initially balanced but injects fast, cold air from the left boundary near the model top
  554. //x and z are input coordinates at which to sample
  555. //r,u,w,t are output density, u-wind, w-wind, and potential temperature at that location
  556. //hr and ht are output background hydrostatic density and potential temperature at that location
  557. void injection(double x, double z, double &r, double &u, double &w, double &t, double &hr, double &ht)
  558. {
  559. hydro_const_theta(z, hr, ht);
  560. r = 0.;
  561. t = 0.;
  562. u = 0.;
  563. w = 0.;
  564. }
  565. //Establish hydrstatic balance using constant potential temperature (thermally neutral atmosphere)
  566. //z is the input coordinate
  567. //r and t are the output background hydrostatic density and potential temperature
  568. void hydro_const_theta(double z, double &r, double &t)
  569. {
  570. const double theta0 = 300.; //Background potential temperature
  571. const double exner0 = 1.; //Surface-level Exner pressure
  572. double p, exner, rt;
  573. //Establish hydrostatic balance first using Exner pressure
  574. t = theta0; //Potential Temperature at z
  575. exner = exner0 - grav * z / (cp * theta0); //Exner pressure at z
  576. p = p0 * pow(exner, (cp / rd)); //Pressure at z
  577. rt = pow((p / C0), (1. / gamm)); //rho*theta at z
  578. r = rt / t; //Density at z
  579. }
  580. void finalize()
  581. {
  582. free(state);
  583. free(state_tmp);
  584. free(flux);
  585. free(tend);
  586. free(hy_dens_cell);
  587. free(hy_dens_theta_cell);
  588. free(hy_dens_int);
  589. free(hy_dens_theta_int);
  590. free(hy_pressure_int);
  591. }