소스 검색

Fix gh-851 Thor graph execute rate condition

The graph executor thread could cause a pending graph to execute too
early. The consquence was that on the slave this early subgraph was
queued, but nothing polled to run it and master stalled waiting for it
to initialize

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 년 전
부모
커밋
d5985473a1
2개의 변경된 파일15개의 추가작업 그리고 13개의 파일을 삭제
  1. 14 12
      thorlcr/graph/thgraph.cpp
  2. 1 1
      thorlcr/slave/slavmain.cpp

+ 14 - 12
thorlcr/graph/thgraph.cpp

@@ -2235,20 +2235,22 @@ public:
                     break;
                 }
                 if (job.queryPausing()) return; // pending graphs will re-run on resubmission
-                assertex(running.ordinality() <= limit);
                 bool added = false;
-                while (toRun.ordinality())
+                if (running.ordinality() < limit)
                 {
-                    Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
-                    toRun.remove(0);
-                    running.append(*LINK(graphInfo));
-                    CGraphBase *subGraph = graphInfo->subGraph;
-                    PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
-                    added = true;
-                    PooledThreadHandle h = graphPool->start(graphInfo.getClear());
-                    subGraph->poolThreadHandle = h;
-                    if (running.ordinality() >= limit)
-                        break;
+                    while (toRun.ordinality())
+                    {
+                        Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
+                        toRun.remove(0);
+                        running.append(*LINK(graphInfo));
+                        CGraphBase *subGraph = graphInfo->subGraph;
+                        PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
+                        added = true;
+                        PooledThreadHandle h = graphPool->start(graphInfo.getClear());
+                        subGraph->poolThreadHandle = h;
+                        if (running.ordinality() >= limit)
+                            break;
+                    }
                 }
                 if (!added)
                     Sleep(1000); // still more to come

+ 1 - 1
thorlcr/slave/slavmain.cpp

@@ -258,10 +258,10 @@ public:
                         CJobSlave *job = jobs.find(jobKey.get());
                         if (!job)
                             throw MakeStringException(0, "Job not found: %s", jobKey.get());
-                        PROGLOG("GraphInit: %s", jobKey.get());
                         Owned<IPropertyTree> graphNode = createPTree(msg);
                         Owned<CSlaveGraph> subGraph = (CSlaveGraph *)job->createGraph();
                         subGraph->createFromXGMML(graphNode, NULL, NULL, NULL);
+                        PROGLOG("GraphInit: %s, graphId=%"GIDPF"d", jobKey.get(), subGraph->queryGraphId());
                         subGraph->setExecuteReplyTag(subGraph->queryJob().deserializeMPTag(msg));
                         unsigned len;
                         msg.read(len);