소스 검색

HPCC-17543 Work around misaligned double access in eclblas::dgemm

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 8 년 전
부모
커밋
bcf397408e
1개의 변경된 파일2개의 추가작업 그리고 1개의 파일을 삭제
  1. 2 1
      plugins/eclblas/dgemm.cpp

+ 2 - 1
plugins/eclblas/dgemm.cpp

@@ -28,6 +28,7 @@ ECLBLAS_CALL void dgemm(bool & __isAllResult, size32_t & __lenResult,
                         double alpha, bool isAllA, size32_t lenA, const void* A,
                         bool isAllB, size32_t lenB, const void* B, double beta,
                         bool isAllC, size32_t lenC, const void* C) {
+  typedef double __attribute__((aligned(1))) misaligned_double; // prevent gcc from assuming the data is correctly aligned.
   unsigned int lda = transposeA==0 ? m  : k;
   unsigned int ldb = transposeB==0 ? k  : n;
   unsigned int ldc = m;
@@ -35,7 +36,7 @@ ECLBLAS_CALL void dgemm(bool & __isAllResult, size32_t & __lenResult,
   __lenResult = m * n * sizeof(double);
   double *result = (double*) rtlMalloc(__lenResult);
   // populate if provided
-  for(uint32_t i=0; i<m*n; i++) result[i] = (__lenResult==lenC) ?((double*)C)[i] :0.0;
+  for(uint32_t i=0; i<m*n; i++) result[i] = (__lenResult==lenC) ?((misaligned_double*)C)[i] :0.0;
   cblas_dgemm(CblasColMajor,
               transposeA ? CblasTrans : CblasNoTrans,
               transposeB ? CblasTrans : CblasNoTrans,