blas_level_1.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. /*****************************************************************************
  2. *
  3. * MODULE: Grass numerical math interface
  4. * AUTHOR(S): Soeren Gebbert, Berlin (GER) Dec 2006
  5. * soerengebbert <at> googlemail <dot> com
  6. *
  7. * PURPOSE: grass blas implementation
  8. * part of the gmath library
  9. *
  10. * COPYRIGHT: (C) 2010 by the GRASS Development Team
  11. *
  12. * This program is free software under the GNU General Public
  13. * License (>=v2). Read the file COPYING that comes with GRASS
  14. * for details.
  15. *
  16. *****************************************************************************/
  17. #include <math.h>
  18. #include <unistd.h>
  19. #include <stdio.h>
  20. #include <string.h>
  21. #include <stdlib.h>
  22. #include <grass/gis.h>
  23. #include <grass/gmath.h>
  24. /* **************************************************************** */
  25. /* *************** D O U B L E ************************************ */
  26. /* **************************************************************** */
  27. /*!
  28. * \brief Compute the dot product of vector x and y
  29. *
  30. * \f[ a = {\bf x}^T {\bf y} \f]
  31. *
  32. * The functions creates its own parallel OpenMP region.
  33. * It can be called within a parallel OpenMP region if nested parallelism is supported
  34. * by the compiler.
  35. *
  36. * \param x (double *)
  37. * \param y (double *)
  38. * \param value (double *) -- the return value
  39. * \param rows (int)
  40. * \return (void)
  41. *
  42. * */
  43. void G_math_d_x_dot_y(double *x, double *y, double *value, int rows)
  44. {
  45. int i;
  46. double s = 0.0;
  47. #pragma omp parallel for schedule (static) reduction(+:s)
  48. for (i = rows - 1; i >= 0; i--) {
  49. s += x[i] * y[i];
  50. }
  51. #pragma omp single
  52. {
  53. *value = s;
  54. }
  55. return;
  56. }
  57. /*!
  58. * \brief Compute the euclid norm of vector x
  59. *
  60. * \f[ a = ||{\bf x}||_2 \f]
  61. *
  62. * The functions creates its own parallel OpenMP region.
  63. * It can be called within a parallel OpenMP region if nested parallelism is supported
  64. * by the compiler.
  65. *
  66. * \param x (double *) -- the vector
  67. * \param value (double *) -- the return value
  68. * \param rows (int)
  69. * \return (void)
  70. *
  71. * */
  72. void G_math_d_euclid_norm(double *x, double *value, int rows)
  73. {
  74. int i;
  75. double s = 0.0;
  76. #pragma omp parallel for schedule (static) reduction(+:s)
  77. for (i = rows - 1; i >= 0; i--) {
  78. s += x[i] * x[i];
  79. }
  80. #pragma omp single
  81. {
  82. *value = sqrt(s);
  83. }
  84. return;
  85. }
  86. /*!
  87. * \brief Compute the asum norm of vector x
  88. *
  89. * \f[ a = ||{\bf x}||_1 \f]
  90. *
  91. * The functions creates its own parallel OpenMP region.
  92. * It can be called within a parallel OpenMP region if nested parallelism is supported
  93. * by the compiler.
  94. *
  95. * \param x (double *)-- the vector
  96. * \param value (double *) -- the return value
  97. * \param rows (int)
  98. * \return (void)
  99. *
  100. * */
  101. void G_math_d_asum_norm(double *x, double *value, int rows)
  102. {
  103. int i = 0;
  104. double s = 0.0;
  105. #pragma omp parallel for schedule (static) reduction(+:s)
  106. for (i = rows - 1; i >= 0; i--) {
  107. s += fabs(x[i]);
  108. }
  109. #pragma omp single
  110. {
  111. *value = s;
  112. }
  113. return;
  114. }
  115. /*!
  116. * \brief Compute the maximum norm of vector x
  117. *
  118. * \f[ a = ||{\bf x}||_\infty \f]
  119. *
  120. * This function is not multi-threaded
  121. *
  122. * \param x (double *)-- the vector
  123. * \param value (double *) -- the return value
  124. * \param rows (int)
  125. * \return (void)
  126. *
  127. * */
  128. void G_math_d_max_norm(double *x, double *value, int rows)
  129. {
  130. int i;
  131. double max = 0.0;
  132. max = fabs(x[rows - 1]);
  133. for (i = rows - 2; i >= 0; i--) {
  134. if (max < fabs(x[i]))
  135. max = fabs(x[i]);
  136. }
  137. *value = max;
  138. }
  139. /*!
  140. * \brief Scales vectors x and y with the scalars a and b and adds them
  141. *
  142. * \f[ {\bf z} = a{\bf x} + b{\bf y} \f]
  143. *
  144. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  145. *
  146. * \param x (double *)
  147. * \param y (double *)
  148. * \param z (double *)
  149. * \param a (double)
  150. * \param b (double)
  151. * \param rows (int)
  152. * \return (void)
  153. *
  154. * */
  155. void G_math_d_ax_by(double *x, double *y, double *z, double a, double b,
  156. int rows)
  157. {
  158. int i;
  159. /*find specific cases */
  160. if (b == 0.0) {
  161. #pragma omp for schedule (static)
  162. for (i = rows - 1; i >= 0; i--) {
  163. z[i] = a * x[i];
  164. }
  165. }
  166. else if ((a == 1.0) && (b == 1.0)) {
  167. #pragma omp for schedule (static)
  168. for (i = rows - 1; i >= 0; i--) {
  169. z[i] = x[i] + y[i];
  170. }
  171. }
  172. else if ((a == 1.0) && (b == -1.0)) {
  173. #pragma omp for schedule (static)
  174. for (i = rows - 1; i >= 0; i--) {
  175. z[i] = x[i] - y[i];
  176. }
  177. }
  178. else if (a == b) {
  179. #pragma omp for schedule (static)
  180. for (i = rows - 1; i >= 0; i--) {
  181. z[i] = a * (x[i] + y[i]);
  182. }
  183. }
  184. else if (b == -1.0) {
  185. #pragma omp for schedule (static)
  186. for (i = rows - 1; i >= 0; i--) {
  187. z[i] = a * x[i] - y[i];
  188. }
  189. }
  190. else if (b == 1.0) {
  191. #pragma omp for schedule (static)
  192. for (i = rows - 1; i >= 0; i--) {
  193. z[i] = a * x[i] + y[i];
  194. }
  195. }
  196. else {
  197. #pragma omp for schedule (static)
  198. for (i = rows - 1; i >= 0; i--) {
  199. z[i] = a * x[i] + b * y[i];
  200. }
  201. }
  202. return;
  203. }
  204. /*!
  205. * \brief Copy the vector x to y
  206. *
  207. * \f[ {\bf y} = {\bf x} \f]
  208. *
  209. * This function is not multi-threaded
  210. *
  211. * \param x (double *)
  212. * \param y (double *)
  213. * \param rows (int)
  214. *
  215. * */
  216. void G_math_d_copy(double *x, double *y, int rows)
  217. {
  218. y = memcpy(y, x, rows * sizeof(double));
  219. return;
  220. }
  221. /* **************************************************************** */
  222. /* *************** F L O A T ************************************** */
  223. /* **************************************************************** */
  224. /*!
  225. * \brief Compute the dot product of vector x and y
  226. *
  227. * \f[ a = {\bf x}^T {\bf y} \f]
  228. *
  229. * The functions creates its own parallel OpenMP region.
  230. * It can be called within a parallel OpenMP region if nested parallelism is supported
  231. * by the compiler.
  232. *
  233. * \param x (float *)
  234. * \param y (float *)
  235. * \param value (float *) -- the return value
  236. * \param rows (int)
  237. * \return (void)
  238. *
  239. * */
  240. void G_math_f_x_dot_y(float *x, float *y, float *value, int rows)
  241. {
  242. int i;
  243. float s = 0.0;
  244. #pragma omp parallel for schedule (static) reduction(+:s)
  245. for (i = rows - 1; i >= 0; i--) {
  246. s += x[i] * y[i];
  247. }
  248. #pragma omp single
  249. {
  250. *value = s;
  251. }
  252. return;
  253. }
  254. /*!
  255. * \brief Compute the euclid norm of vector x
  256. *
  257. * \f[ a = ||{\bf x}||_2 \f]
  258. *
  259. * The functions creates its own parallel OpenMP region.
  260. * It can be called within a parallel OpenMP region if nested parallelism is supported
  261. * by the compiler.
  262. *
  263. * \param x (double *) -- the vector
  264. * \param value (float *) -- the return value
  265. * \param rows (int)
  266. * \return (void)
  267. *
  268. * */
  269. void G_math_f_euclid_norm(float *x, float *value, int rows)
  270. {
  271. int i;
  272. float s = 0.0;
  273. #pragma omp parallel for schedule (static) reduction(+:s)
  274. for (i = rows - 1; i >= 0; i--) {
  275. s += x[i] * x[i];
  276. }
  277. #pragma omp single
  278. {
  279. *value = sqrt(s);
  280. }
  281. return;
  282. }
  283. /*!
  284. * \brief Compute the asum norm of vector x
  285. *
  286. * \f[ a = ||{\bf x}||_1 \f]
  287. *
  288. * The functions creates its own parallel OpenMP region.
  289. * It can be called within a parallel OpenMP region if nested parallelism is supported
  290. * by the compiler.
  291. *
  292. * \param x (float *)-- the vector
  293. * \param value (float *) -- the return value
  294. * \param rows (int)
  295. * \return (void)
  296. *
  297. * */
  298. void G_math_f_asum_norm(float *x, float *value, int rows)
  299. {
  300. int i;
  301. int count = 0;
  302. float s = 0.0;
  303. #pragma omp parallel for schedule (static) private(i) reduction(+:s, count)
  304. for (i = 0; i < rows; i++) {
  305. s += fabs(x[i]);
  306. count++;
  307. }
  308. #pragma omp single
  309. {
  310. *value = s;
  311. }
  312. return;
  313. }
  314. /*!
  315. * \brief Compute the maximum norm of vector x
  316. *
  317. * \f[ a = ||{\bf x}||_\infty \f]
  318. *
  319. * This function is not multi-threaded
  320. *
  321. * \param x (float *)-- the vector
  322. * \param value (float *) -- the return value
  323. * \param rows (int)
  324. * \return (void)
  325. *
  326. * */
  327. void G_math_f_max_norm(float *x, float *value, int rows)
  328. {
  329. int i;
  330. float max = 0.0;
  331. max = fabs(x[rows - 1]);
  332. for (i = rows - 2; i >= 0; i--) {
  333. if (max < fabs(x[i]))
  334. max = fabs(x[i]);
  335. }
  336. *value = max;
  337. return;
  338. }
  339. /*!
  340. * \brief Scales vectors x and y with the scalars a and b and adds them
  341. *
  342. * \f[ {\bf z} = a{\bf x} + b{\bf y} \f]
  343. *
  344. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  345. *
  346. * \param x (float *)
  347. * \param y (float *)
  348. * \param z (float *)
  349. * \param a (float)
  350. * \param b (float)
  351. * \param rows (int)
  352. * \return (void)
  353. *
  354. * */
  355. void G_math_f_ax_by(float *x, float *y, float *z, float a, float b, int rows)
  356. {
  357. int i;
  358. /*find specific cases */
  359. if (b == 0.0) {
  360. #pragma omp for schedule (static)
  361. for (i = rows - 1; i >= 0; i--) {
  362. z[i] = a * x[i];
  363. }
  364. }
  365. else if ((a == 1.0) && (b == 1.0)) {
  366. #pragma omp for schedule (static)
  367. for (i = rows - 1; i >= 0; i--) {
  368. z[i] = x[i] + y[i];
  369. }
  370. }
  371. else if ((a == 1.0) && (b == -1.0)) {
  372. #pragma omp for schedule (static)
  373. for (i = rows - 1; i >= 0; i--) {
  374. z[i] = x[i] - y[i];
  375. }
  376. }
  377. else if (a == b) {
  378. #pragma omp for schedule (static)
  379. for (i = rows - 1; i >= 0; i--) {
  380. z[i] = a * (x[i] + y[i]);
  381. }
  382. }
  383. else if (b == -1.0) {
  384. #pragma omp for schedule (static)
  385. for (i = rows - 1; i >= 0; i--) {
  386. z[i] = a * x[i] - y[i];
  387. }
  388. }
  389. else if (b == 1.0) {
  390. #pragma omp for schedule (static)
  391. for (i = rows - 1; i >= 0; i--) {
  392. z[i] = a * x[i] + y[i];
  393. }
  394. }
  395. else {
  396. #pragma omp for schedule (static)
  397. for (i = rows - 1; i >= 0; i--) {
  398. z[i] = a * x[i] + b * y[i];
  399. }
  400. }
  401. return;
  402. }
  403. /*!
  404. * \brief Copy the vector x to y
  405. *
  406. * \f[ {\bf y} = {\bf x} \f]
  407. *
  408. * This function is not multi-threaded
  409. *
  410. * \param x (float *)
  411. * \param y (float *)
  412. * \param rows (int)
  413. *
  414. * */
  415. void G_math_f_copy(float *x, float *y, int rows)
  416. {
  417. y = memcpy(y, x, rows * sizeof(float));
  418. return;
  419. }
  420. /* **************************************************************** */
  421. /* *************** I N T E G E R ********************************** */
  422. /* **************************************************************** */
  423. /*!
  424. * \brief Compute the dot product of vector x and y
  425. *
  426. * \f[ a = {\bf x}^T {\bf y} \f]
  427. *
  428. * The functions creates its own parallel OpenMP region.
  429. * It can be called within a parallel OpenMP region if nested parallelism is supported
  430. * by the compiler.
  431. *
  432. * \param x (int *)
  433. * \param y (int *)
  434. * \param value (double *) -- the return value
  435. * \param rows (int)
  436. * \return (void)
  437. *
  438. * */
  439. void G_math_i_x_dot_y(int *x, int *y, double *value, int rows)
  440. {
  441. int i;
  442. double s = 0.0;
  443. #pragma omp parallel for schedule (static) reduction(+:s)
  444. for (i = rows - 1; i >= 0; i--) {
  445. s += x[i] * y[i];
  446. }
  447. #pragma omp single
  448. {
  449. *value = s;
  450. }
  451. return;
  452. }
  453. /*!
  454. * \brief Compute the euclid norm of vector x
  455. *
  456. * \f[ a = ||{\bf x}||_2 \f]
  457. *
  458. * The functions creates its own parallel OpenMP region.
  459. * It can be called within a parallel OpenMP region if nested parallelism is supported
  460. * by the compiler.
  461. *
  462. * \param x (int *) -- the vector
  463. * \param value (double *) -- the return value
  464. * \param rows (int)
  465. * \return (void)
  466. *
  467. * */
  468. void G_math_i_euclid_norm(int *x, double *value, int rows)
  469. {
  470. int i;
  471. double s = 0.0;
  472. #pragma omp parallel for schedule (static) reduction(+:s)
  473. for (i = rows - 1; i >= 0; i--) {
  474. s += x[i] * x[i];
  475. }
  476. #pragma omp single
  477. {
  478. *value = sqrt(s);
  479. }
  480. return;
  481. }
  482. /*!
  483. * \brief Compute the asum norm of vector x
  484. *
  485. * \f[ a = ||{\bf x}||_1 \f]
  486. *
  487. * The functions creates its own parallel OpenMP region.
  488. * It can be called within a parallel OpenMP region if nested parallelism is supported
  489. * by the compiler.
  490. *
  491. * \param x (int *)-- the vector
  492. * \param value (double *) -- the return value
  493. * \param rows (int)
  494. * \return (void)
  495. *
  496. * */
  497. void G_math_i_asum_norm(int *x, double *value, int rows)
  498. {
  499. int i;
  500. double s = 0.0;
  501. #pragma omp parallel for schedule (static) reduction(+:s)
  502. for (i = rows - 1; i >= 0; i--) {
  503. s += (double)abs(x[i]);
  504. }
  505. #pragma omp single
  506. {
  507. *value = s;
  508. }
  509. return;
  510. }
  511. /*!
  512. * \brief Compute the maximum norm of vector x
  513. *
  514. * \f[ a = ||{\bf x}||_\infty \f]
  515. *
  516. * This function is not multi-threaded
  517. *
  518. * \param x (int *)-- the vector
  519. * \param value (int *) -- the return value
  520. * \param rows (int)
  521. * \return (void)
  522. *
  523. * */
  524. void G_math_i_max_norm(int *x, int *value, int rows)
  525. {
  526. int i;
  527. int max = 0.0;
  528. max = abs(x[rows - 1]);
  529. for (i = rows - 2; i >= 0; i--) {
  530. if (max < abs(x[i]))
  531. max = abs(x[i]);
  532. }
  533. *value = max;
  534. }
  535. /*!
  536. * \brief Scales vectors x and y with the scalars a and b and adds them
  537. *
  538. * \f[ {\bf z} = a{\bf x} + b{\bf y} \f]
  539. *
  540. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  541. *
  542. * \param x (int *)
  543. * \param y (int *)
  544. * \param z (int *)
  545. * \param a (int)
  546. * \param b (int)
  547. * \param rows (int)
  548. * \return (void)
  549. *
  550. * */
  551. void G_math_i_ax_by(int *x, int *y, int *z, int a, int b, int rows)
  552. {
  553. int i;
  554. /*find specific cases */
  555. if (b == 0.0) {
  556. #pragma omp for schedule (static)
  557. for (i = rows - 1; i >= 0; i--) {
  558. z[i] = a * x[i];
  559. }
  560. }
  561. else if ((a == 1.0) && (b == 1.0)) {
  562. #pragma omp for schedule (static)
  563. for (i = rows - 1; i >= 0; i--) {
  564. z[i] = x[i] + y[i];
  565. }
  566. }
  567. else if ((a == 1.0) && (b == -1.0)) {
  568. #pragma omp for schedule (static)
  569. for (i = rows - 1; i >= 0; i--) {
  570. z[i] = x[i] - y[i];
  571. }
  572. }
  573. else if (a == b) {
  574. #pragma omp for schedule (static)
  575. for (i = rows - 1; i >= 0; i--) {
  576. z[i] = a * (x[i] + y[i]);
  577. }
  578. }
  579. else if (b == -1.0) {
  580. #pragma omp for schedule (static)
  581. for (i = rows - 1; i >= 0; i--) {
  582. z[i] = a * x[i] - y[i];
  583. }
  584. }
  585. else if (b == 1.0) {
  586. #pragma omp for schedule (static)
  587. for (i = rows - 1; i >= 0; i--) {
  588. z[i] = a * x[i] + y[i];
  589. }
  590. }
  591. else {
  592. #pragma omp for schedule (static)
  593. for (i = rows - 1; i >= 0; i--) {
  594. z[i] = a * x[i] + b * y[i];
  595. }
  596. }
  597. return;
  598. }
  599. /*!
  600. * \brief Copy the vector x to y
  601. *
  602. * \f[ {\bf y} = {\bf x} \f]
  603. *
  604. * This function is not multi-threaded
  605. *
  606. * \param x (int *)
  607. * \param y (int *)
  608. * \param rows (int)
  609. *
  610. * */
  611. void G_math_i_copy(int *x, int *y, int rows)
  612. {
  613. y = memcpy(y, x, rows * sizeof(int));
  614. return;
  615. }