浏览代码

Merge pull request #10006 from ghalliday/issue17641

HPCC-17641 Implement constant folding of external functions for WIN64

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 8 年之前
父节点
当前提交
449dd83976
共有 7 个文件被更改,包括 100 次插入15 次删除
  1. 2 2
      common/thorhelper/roxierow.cpp
  2. 11 0
      ecl/hql/CMakeLists.txt
  3. 9 3
      ecl/hql/hqlfold.cpp
  4. 55 0
      ecl/hql/hqlfoldasm.asm
  5. 16 7
      ecl/hql/hqlstack.hpp
  6. 1 1
      system/include/platform.h
  7. 6 2
      system/jlib/jexcept.cpp

+ 2 - 2
common/thorhelper/roxierow.cpp

@@ -342,7 +342,7 @@ public:
         //This test should get constant folded to avoid the decrement when not checked.
         if (CHECKER::extraSize)
             newCapacity -= CHECKER::extraSize;
-        allocatedSize = newCapacity;
+        allocatedSize = (size32_t)newCapacity;
         return row;
     }
 
@@ -353,7 +353,7 @@ public:
         void * newrow = heap->resizeRow(row, oldsize, newSize+CHECKER::extraSize, newCapacity);
         if (CHECKER::extraSize)
             newCapacity -= CHECKER::extraSize;
-        size = newCapacity;
+        size = (size32_t)newCapacity;
         return newrow;
     }
 

+ 11 - 0
ecl/hql/CMakeLists.txt

@@ -26,6 +26,15 @@
 
 project( hql ) 
 
+set(extrasources)
+if (WIN32)
+  if (ARCH64BIT)
+    # 64bit windows builds do not support inline assembler - so the folding function needs to be defined in an asm file
+    enable_language(ASM_MASM)
+    set(extrasources hqlfoldasm.asm)
+  endif()
+endif()
+
 set (   SRCS 
         hqlatoms.cpp
         hqlattr.cpp
@@ -57,6 +66,8 @@ set (   SRCS
         hqlxmldb.cpp
         reservedwords.cpp
 
+        ${extrasources}
+
         hqlgram.y
         hqllex.l
          

+ 9 - 3
ecl/hql/hqlfold.cpp

@@ -710,6 +710,10 @@ void *loadExternalEntryPoint(IHqlExpression* expr, unsigned foldOptions, ITempla
     return fh;
 }
 
+#if defined(_WIN32) && defined(_ARCH_X86_64_)
+extern __int64 foldExternalCallStub(void * fh, double * doubleresult, size_t len, void * params);
+#endif
+
 IValue * doFoldExternalCall(IHqlExpression* expr, unsigned foldOptions, ITemplateContext *templateContext, const char *library, const char *entrypoint, void *fh)
 {
     // NOTE - on OSX there are compiler bugs that prevent exceptions thrown from within this function from properly unwinding.
@@ -870,7 +874,9 @@ IValue * doFoldExternalCall(IHqlExpression* expr, unsigned foldOptions, ITemplat
 #ifdef _WIN32
  // Note - we assume X86/X86_64 Procedure Call Standard
  #if defined (_ARCH_X86_64_)
-        UNIMPLEMENTED;
+
+        int64result = foldExternalCallStub(fh, &doubleresult, len, strbuf);
+        intresult = (int)int64result;
  #elif defined (_ARCH_X86_)
         _asm{
         ;save registers that will be used
@@ -1278,7 +1284,7 @@ IValue * doFoldExternalCall(IHqlExpression* expr, unsigned foldOptions, ITemplat
 #else
             tgt = (char *)intresult;
 #endif
-            tlen = retUCharStar ? rtlUnicodeStrlen((UChar *)tgt) : strlen(tgt);
+            tlen = retUCharStar ? rtlUnicodeStrlen((UChar *)tgt) : (size32_t)strlen(tgt);
         }
         
         Linked<ITypeInfo> resultType = retType;
@@ -1556,7 +1562,7 @@ IHqlExpression *deserializeConstantSet(ITypeInfo *type, bool isAll, size32_t len
             case type_varstring:
                 values.append(*createConstant(data));
                 if (size==UNKNOWN_LENGTH)
-                    size = strlen(data)+1;
+                    size = (size32_t)(strlen(data)+1);
                 break;
             case type_string:
                 if (size==UNKNOWN_LENGTH)

+ 55 - 0
ecl/hql/hqlfoldasm.asm

@@ -0,0 +1,55 @@
+; extern __int64 foldExternalCallStub(void * func, double * doubleresult, size_t len, void * params);
+; default calling convention:  rcx, rdx, r8, r9
+; func= rcx, doubleresult = rdx, len = r8, params = r9
+; rax, r10, r11, xmm4 and xmm5 are considered volatile
+; result returned in xmm0 or rax
+
+.code
+?foldExternalCallStub@@YA_JPEAXPEAN_K0@Z proc
+
+        push   rdx          ; address for double results saved until after the call
+        push   rbx
+        sub    rsp, 8       ; ensure the stack is 16byte aligned
+        mov    rbx, r8      ; save the length in rbx so we can restore the stack after the call
+
+        ;copy parameters to the stack, len(r8) is always >= 32
+        mov    r10, r8
+        sub    rsp, r8
+        mov    r11, r9
+
+    loop1:
+        mov    rax, [r11]
+        mov    [rsp], rax
+        add    rsp,8
+        add    r11,8
+        sub    r10,8
+        jne    loop1
+
+        ; move the function pointer to a volatile register
+        mov  r10, rcx
+        ; adjust esp to point to the start of the parameters
+        sub  rsp, r8
+
+        ; the 1st four integer arguments are passed in registers, but the space is still reserved on the stack
+        mov rcx, [rsp]
+        mov rdx, [rsp+8]
+        mov r8, [rsp+16]
+        mov r9, [rsp+24]
+
+        call   r10
+
+        ; adjust the stack pointer back again
+        add rsp, rbx
+
+        ; restore registers
+        add rsp, 8
+        pop rbx
+        pop rdx
+
+        ; save any floating point result.
+        movd r8, xmm0
+        mov [rdx], r8
+        ret
+
+?foldExternalCallStub@@YA_JPEAXPEAN_K0@Z endp
+end

+ 16 - 7
ecl/hql/hqlstack.hpp

@@ -30,11 +30,19 @@
 #define MAXARGS          32
 
 #if defined (_ARCH_X86_64_)
- #define ALIGNMENT 8
- #define REGSIZE 8
- #define MAXFPREGS 8
- #define REGPARAMS 6
- #define EVEN_STACK_ALIGNMENT
+ #if defined(_WIN32)
+  #define ALIGNMENT 8
+  #define REGSIZE 8
+  #define MAXFPREGS 0       // Not yet implemented - fix if someone complains
+  #define REGPARAMS 4
+  #define EVEN_STACK_ALIGNMENT
+#else
+  #define ALIGNMENT 8
+  #define REGSIZE 8
+  #define MAXFPREGS 8
+  #define REGPARAMS 6
+  #define EVEN_STACK_ALIGNMENT
+#endif
 #elif defined (_ARCH_X86_)
  #define ALIGNMENT 4
  #define REGSIZE 4
@@ -72,6 +80,7 @@
  #endif
 #endif
 
+#define NUMFPREGS (MAXFPREGS ? MAXFPREGS : 1)
 class FuncCallStack {
 private:
     unsigned   tos;
@@ -86,8 +95,8 @@ private:
     union {
         double d;
         float f;
-    } fpRegs[MAXFPREGS];
-    unsigned fpSizes[MAXFPREGS];
+    } fpRegs[NUMFPREGS];
+    unsigned fpSizes[NUMFPREGS];
  #endif
     unsigned    numFpRegs;
 #endif

+ 1 - 1
system/include/platform.h

@@ -179,7 +179,7 @@ typedef memsize_t rowsize_t;
 #define LibraryExtension           ".lib"
 #define ProcessExtension           ".exe"
 #define GetSharedProcedure(h,name) GetProcAddress(h,(char *)name)
-#define LoadSucceeded(h)           ((unsigned)h >= 32)
+#define LoadSucceeded(h)           ((memsize_t)h >= 32)
 #define GetSharedObjectError()     GetLastError()
 #define strtok_r(a,b,c)            j_strtok_r(a,b,c)
 #define __builtin_prefetch(addr)   _mm_prefetch((const char *)(addr), _MM_HINT_T0)

+ 6 - 2
system/jlib/jexcept.cpp

@@ -666,7 +666,7 @@ static BOOL GetLogicalAddress( PVOID addr, PTSTR szModule, DWORD len, DWORD& sec
         if ( (rva >= sectionStart) && (rva <= sectionEnd) )
         {
             section = i+1;
-            offset = rva - sectionStart;
+            offset = (DWORD)(rva - sectionStart);
             return TRUE;
         }
     }
@@ -848,7 +848,11 @@ static void PrintExceptionReport( PEXCEPTION_POINTERS pExceptionInfo)
 #ifdef _ARCH_X86_64_
     PrintLog("RAX:%016" I64F "X  RBX:%016" I64F "X  RCX:%016" I64F "X  RDX:%016" I64F "X  RSI:%016" I64F "X  RDI:%016" I64F "X",
         pCtx->Rax, pCtx->Rbx, pCtx->Rcx, pCtx->Rdx, pCtx->Rsi, pCtx->Rdi );
-    
+    PrintLog("R8: %016" I64F "X  R9: %016" I64F "X  R10:%016" I64F "X  R11:%016" I64F "X  R12:%016" I64F "X  R13:%016" I64F "X",
+        pCtx->R8, pCtx->R9, pCtx->R10, pCtx->R11, pCtx->R12, pCtx->R13);
+    PrintLog("R14:%016" I64F "X  R15:%016" I64F "X",
+        pCtx->R14, pCtx->R15);
+
     PrintLog( "CS:RIP:%04X:%016" I64F "X", pCtx->SegCs, pCtx->Rip );
     PrintLog( "SS:PSP:%04X:%016" I64F "X  PBP:%016" I64F "X",
         pCtx->SegSs, pCtx->Rsp, pCtx->Rbp );