blas_level_2.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. /*****************************************************************************
  2. *
  3. * MODULE: Grass numerical math interface
  4. * AUTHOR(S): Soeren Gebbert, Berlin (GER) Dec 2006
  5. * soerengebbert <at> googlemail <dot> com
  6. *
  7. * PURPOSE: gras blas implementation
  8. * part of the gmath library
  9. *
  10. * COPYRIGHT: (C) 2010 by the GRASS Development Team
  11. *
  12. * This program is free software under the GNU General Public
  13. * License (>=v2). Read the file COPYING that comes with GRASS
  14. * for details.
  15. *
  16. *****************************************************************************/
  17. #include <math.h>
  18. #include <unistd.h>
  19. #include <stdio.h>
  20. #include <string.h>
  21. #include <stdlib.h>
  22. #include <grass/gmath.h>
  23. #include <grass/gis.h>
  24. #include <grass/gisdefs.h>
  25. #define EPSILON 0.00000000000000001
  26. /*!
  27. * \brief Compute the matrix - vector product
  28. * of matrix A and vector x.
  29. *
  30. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  31. *
  32. * y = A * x
  33. *
  34. *
  35. * \param A (double ** )
  36. * \param x (double *)
  37. * \param y (double *)
  38. * \param rows (int)
  39. * \param cols (int)
  40. * \return (void)
  41. *
  42. * */
  43. void G_math_d_Ax(double **A, double *x, double *y, int rows, int cols)
  44. {
  45. int i, j;
  46. double tmp;
  47. #pragma omp for schedule (static) private(i, j, tmp)
  48. for (i = 0; i < rows; i++) {
  49. tmp = 0;
  50. for (j = cols - 1; j >= 0; j--) {
  51. tmp += A[i][j] * x[j];
  52. }
  53. y[i] = tmp;
  54. }
  55. return;
  56. }
  57. /*!
  58. * \brief Compute the matrix - vector product
  59. * of matrix A and vector x.
  60. *
  61. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  62. *
  63. * y = A * x
  64. *
  65. *
  66. * \param A (float ** )
  67. * \param x (float *)
  68. * \param y (float *)
  69. * \param rows (int)
  70. * \param cols (int)
  71. * \return (void)
  72. *
  73. * */
  74. void G_math_f_Ax(float **A, float *x, float *y, int rows, int cols)
  75. {
  76. int i, j;
  77. float tmp;
  78. #pragma omp for schedule (static) private(i, j, tmp)
  79. for (i = 0; i < rows; i++) {
  80. tmp = 0;
  81. for (j = cols - 1; j >= 0; j--) {
  82. tmp += A[i][j] * x[j];
  83. }
  84. y[i] = tmp;
  85. }
  86. return;
  87. }
  88. /*!
  89. * \brief Compute the dyadic product of two vectors.
  90. * The result is stored in the matrix A.
  91. *
  92. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  93. *
  94. * A = x * y^T
  95. *
  96. *
  97. * \param x (double *)
  98. * \param y (double *)
  99. * \param A (float **) -- matrix of size rows*cols
  100. * \param rows (int) -- length of vector x
  101. * \param cols (int) -- lengt of vector y
  102. * \return (void)
  103. *
  104. * */
  105. void G_math_d_x_dyad_y(double *x, double *y, double **A, int rows, int cols)
  106. {
  107. int i, j;
  108. #pragma omp for schedule (static) private(i, j)
  109. for (i = 0; i < rows; i++) {
  110. for (j = cols - 1; j >= 0; j--) {
  111. A[i][j] = x[i] * y[j];
  112. }
  113. }
  114. return;
  115. }
  116. /*!
  117. * \brief Compute the dyadic product of twMo vectors.
  118. * The result is stored in the matrix A.
  119. *
  120. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  121. *
  122. * A = x * y^T
  123. *
  124. *
  125. * \param x (float *)
  126. * \param y (float *)
  127. * \param A (float **= -- matrix of size rows*cols
  128. * \param rows (int) -- length of vector x
  129. * \param cols (int) -- lengt of vector y
  130. * \return (void)
  131. *
  132. * */
  133. void G_math_f_x_dyad_y(float *x, float *y, float **A, int rows, int cols)
  134. {
  135. int i, j;
  136. #pragma omp for schedule (static) private(i, j)
  137. for (i = 0; i < rows; i++) {
  138. for (j = cols - 1; j >= 0; j--) {
  139. A[i][j] = x[i] * y[j];
  140. }
  141. }
  142. return;
  143. }
  144. /*!
  145. * \brief Compute the scaled matrix - vector product
  146. * of matrix double **A and vector x and y.
  147. *
  148. * z = a * A * x + b * y
  149. *
  150. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  151. *
  152. *
  153. * \param A (double **)
  154. * \param x (double *)
  155. * \param y (double *)
  156. * \param a (double)
  157. * \param b (double)
  158. * \param z (double *)
  159. * \param rows (int)
  160. * \param cols (int)
  161. * \return (void)
  162. *
  163. * */
  164. void G_math_d_aAx_by(double **A, double *x, double *y, double a, double b,
  165. double *z, int rows, int cols)
  166. {
  167. int i, j;
  168. double tmp;
  169. /*catch specific cases */
  170. if (a == b) {
  171. #pragma omp for schedule (static) private(i, j, tmp)
  172. for (i = 0; i < rows; i++) {
  173. tmp = 0;
  174. for (j = cols - 1; j >= 0; j--) {
  175. tmp += A[i][j] * x[j] + y[j];
  176. }
  177. z[i] = a * tmp;
  178. }
  179. }
  180. else if (b == -1.0) {
  181. #pragma omp for schedule (static) private(i, j, tmp)
  182. for (i = 0; i < rows; i++) {
  183. tmp = 0;
  184. for (j = cols - 1; j >= 0; j--) {
  185. tmp += a * A[i][j] * x[j] - y[j];
  186. }
  187. z[i] = tmp;
  188. }
  189. }
  190. else if (b == 0.0) {
  191. #pragma omp for schedule (static) private(i, j, tmp)
  192. for (i = 0; i < rows; i++) {
  193. tmp = 0;
  194. for (j = cols - 1; j >= 0; j--) {
  195. tmp += A[i][j] * x[j];
  196. }
  197. z[i] = a * tmp;
  198. }
  199. }
  200. else if (a == -1.0) {
  201. #pragma omp for schedule (static) private(i, j, tmp)
  202. for (i = 0; i < rows; i++) {
  203. tmp = 0;
  204. for (j = cols - 1; j >= 0; j--) {
  205. tmp += b * y[j] - A[i][j] * x[j];
  206. }
  207. z[i] = tmp;
  208. }
  209. }
  210. else {
  211. #pragma omp for schedule (static) private(i, j, tmp)
  212. for (i = 0; i < rows; i++) {
  213. tmp = 0;
  214. for (j = cols - 1; j >= 0; j--) {
  215. tmp += a * A[i][j] * x[j] + b * y[j];
  216. }
  217. z[i] = tmp;
  218. }
  219. }
  220. return;
  221. }
  222. /*!
  223. * \brief Compute the scaled matrix - vector product
  224. * of matrix A and vectors x and y.
  225. *
  226. * z = a * A * x + b * y
  227. *
  228. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  229. *
  230. *
  231. * \param A (float **)
  232. * \param x (float *)
  233. * \param y (float *)
  234. * \param a (float)
  235. * \param b (float)
  236. * \param z (float *)
  237. * \param rows (int)
  238. * \param cols (int)
  239. * \return (void)
  240. *
  241. * */
  242. void G_math_f_aAx_by(float **A, float *x, float *y, float a, float b,
  243. float *z, int rows, int cols)
  244. {
  245. int i, j;
  246. float tmp;
  247. /*catch specific cases */
  248. if (a == b) {
  249. #pragma omp for schedule (static) private(i, j, tmp)
  250. for (i = 0; i < rows; i++) {
  251. tmp = 0;
  252. for (j = cols - 1; j >= 0; j--) {
  253. tmp += A[i][j] * x[j] + y[j];
  254. }
  255. z[i] = a * tmp;
  256. }
  257. }
  258. else if (b == -1.0) {
  259. #pragma omp for schedule (static) private(i, j, tmp)
  260. for (i = 0; i < rows; i++) {
  261. tmp = 0;
  262. for (j = cols - 1; j >= 0; j--) {
  263. tmp += a * A[i][j] * x[j] - y[j];
  264. }
  265. z[i] = tmp;
  266. }
  267. }
  268. else if (b == 0.0) {
  269. #pragma omp for schedule (static) private(i, j, tmp)
  270. for (i = 0; i < rows; i++) {
  271. tmp = 0;
  272. for (j = cols - 1; j >= 0; j--) {
  273. tmp += A[i][j] * x[j];
  274. }
  275. z[i] = a * tmp;
  276. }
  277. }
  278. else if (a == -1.0) {
  279. #pragma omp for schedule (static) private(i, j, tmp)
  280. for (i = 0; i < rows; i++) {
  281. tmp = 0;
  282. for (j = cols - 1; j >= 0; j--) {
  283. tmp += b * y[j] - A[i][j] * x[j];
  284. }
  285. z[i] = tmp;
  286. }
  287. }
  288. else {
  289. #pragma omp for schedule (static) private(i, j, tmp)
  290. for (i = 0; i < rows; i++) {
  291. tmp = 0;
  292. for (j = cols - 1; j >= 0; j--) {
  293. tmp += a * A[i][j] * x[j] + b * y[j];
  294. }
  295. z[i] = tmp;
  296. }
  297. }
  298. return;
  299. }
  300. /*!
  301. * \fn int G_math_d_A_T(double **A, int rows)
  302. *
  303. * \brief Compute the transposition of matrix A.
  304. * Matrix A will be overwritten.
  305. *
  306. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  307. *
  308. * Returns 0.
  309. *
  310. * \param A (double **)
  311. * \param rows (int)
  312. * \return int
  313. */
  314. int G_math_d_A_T(double **A, int rows)
  315. {
  316. int i, j;
  317. double tmp;
  318. #pragma omp for schedule (static) private(i, j, tmp)
  319. for (i = 0; i < rows; i++)
  320. for (j = 0; j < i; j++) {
  321. tmp = A[i][j];
  322. A[i][j] = A[j][i];
  323. A[j][i] = tmp;
  324. }
  325. return 0;
  326. }
  327. /*!
  328. * \fn int G_math_f_A_T(float **A, int rows)
  329. *
  330. * \brief Compute the transposition of matrix A.
  331. * Matrix A will be overwritten.
  332. *
  333. * This function is multi-threaded with OpenMP and can be called within a parallel OpenMP region.
  334. *
  335. * Returns 0.
  336. *
  337. * \param A (float **)
  338. * \param rows (int)
  339. * \return int
  340. */
  341. int G_math_f_A_T(float **A, int rows)
  342. {
  343. int i, j;
  344. float tmp;
  345. #pragma omp for schedule (static) private(i, j, tmp)
  346. for (i = 0; i < rows; i++)
  347. for (j = 0; j < i; j++) {
  348. tmp = A[i][j];
  349. A[i][j] = A[j][i];
  350. A[j][i] = tmp;
  351. }
  352. return 0;
  353. }