Jelajahi Sumber

HPCC-25577 Capture extra info. when regression query times out

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith 4 tahun lalu
induk
melakukan
b5d7e2e3cf
2 mengubah file dengan 27 tambahan dan 2 penghapusan
  1. 24 0
      .github/workflows/smoketest-preabort.sh
  2. 3 2
      .github/workflows/smoketest.yml

+ 24 - 0
.github/workflows/smoketest-preabort.sh

@@ -0,0 +1,24 @@
+#!/bin/bash
+
+dumpstacks()
+{
+  local processName=$1
+  for p in $(pidof ${processName}); do
+    echo "${processName}[${p}] stacks:"
+    sudo gdb --batch --quiet -ex "set interactive-mode off" -ex "thread apply all bt" -ex "quit" $(which ${processName}) ${p}
+    echo '==============='
+  done
+}
+
+echo 'List of processes:'
+ps aux 
+
+dumpstacks daserver
+dumpstacks esp
+dumpstacks ecl
+dumpstacks eclcc
+
+echo 'job queues meta data:'
+daliadmin . export /JobQueues jq.xml
+cat jq.xml
+echo '***************'

+ 3 - 2
.github/workflows/smoketest.yml

@@ -284,6 +284,7 @@ jobs:
         # configure environment.xml to slavesPerNode=2, channelsPerNode=1
         xmlstarlet ed -L -u 'Environment/Software/ThorCluster/@slavesPerNode' -v 2 -u 'Environment/Software/ThorCluster/@channelsPerSlave' -v 1 install/etc/HPCCSystems/environment.xml
         cp ${{ github.workspace }}/src/.github/workflows/timeoutcmd install/opt/HPCCSystems/bin/
+        cp ${{ github.workspace }}/src/.github/workflows/smoketest-preabort.sh install/opt/HPCCSystems/bin/
         mkdir copy
         cp -rp install copy/
         rm -rf copy/install/var/lib/HPCCSystems/hpcc-data
@@ -301,7 +302,7 @@ jobs:
         cd install/opt/HPCCSystems/testing/regress
         # force regression suite to timeout after 8 minutes, so it captures ZAP report of any inflight hung queries
         timeoutcmd $((8 * 60)) \
-          ./ecl-test setup --pq 2 --generateStackTrace
+          ./ecl-test setup --preAbort 'smoketest-preabort.sh' --pq 2 --generateStackTrace
         ${{ github.workspace }}/install/opt/HPCCSystems/etc/init.d/hpcc-init stop
 
     - name: regression-setup-logs-artifact
@@ -407,7 +408,7 @@ jobs:
 
         # force regression suite to timeout after 28 minutes, so it captures ZAP report of any inflight hung queries
         timeoutcmd $((28 * 60)) \
-          ./ecl-test query --pq 2 --target ${{ matrix.engine }} --excludeclass python2,embedded-r,embedded-js,3rdpartyservice,spray --generateStackTrace ${queries}
+          ./ecl-test query --preAbort 'smoketest-preabort.sh' --pq 2 --target ${{ matrix.engine }} --excludeclass python2,embedded-r,embedded-js,3rdpartyservice,spray --generateStackTrace ${queries}
         grep Failure: ${{ github.workspace }}/regress/log/${{ matrix.engine }}.*.log
         if [[ "$(grep -oP '(?<=^Failure: )[0-9]+$' ${{ github.workspace }}/regress/log/${{ matrix.engine }}.*.log)" -gt 0 ]]; then exit 1; fi