Pārlūkot izejas kodu

Fix gh-851 Thor graph execute rate condition

The graph executor thread could cause a pending graph to execute too
early. The consquence was that on the slave this early subgraph was
queued, but nothing polled to run it and master stalled waiting for it
to initialize

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 gadi atpakaļ
vecāks
revīzija
d5985473a1
2 mainītis faili ar 15 papildinājumiem un 13 dzēšanām
  1. 14 12
      thorlcr/graph/thgraph.cpp
  2. 1 1
      thorlcr/slave/slavmain.cpp

+ 14 - 12
thorlcr/graph/thgraph.cpp

@@ -2235,20 +2235,22 @@ public:
                     break;
                 }
                 if (job.queryPausing()) return; // pending graphs will re-run on resubmission
-                assertex(running.ordinality() <= limit);
                 bool added = false;
-                while (toRun.ordinality())
+                if (running.ordinality() < limit)
                 {
-                    Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
-                    toRun.remove(0);
-                    running.append(*LINK(graphInfo));
-                    CGraphBase *subGraph = graphInfo->subGraph;
-                    PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
-                    added = true;
-                    PooledThreadHandle h = graphPool->start(graphInfo.getClear());
-                    subGraph->poolThreadHandle = h;
-                    if (running.ordinality() >= limit)
-                        break;
+                    while (toRun.ordinality())
+                    {
+                        Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
+                        toRun.remove(0);
+                        running.append(*LINK(graphInfo));
+                        CGraphBase *subGraph = graphInfo->subGraph;
+                        PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
+                        added = true;
+                        PooledThreadHandle h = graphPool->start(graphInfo.getClear());
+                        subGraph->poolThreadHandle = h;
+                        if (running.ordinality() >= limit)
+                            break;
+                    }
                 }
                 if (!added)
                     Sleep(1000); // still more to come

+ 1 - 1
thorlcr/slave/slavmain.cpp

@@ -258,10 +258,10 @@ public:
                         CJobSlave *job = jobs.find(jobKey.get());
                         if (!job)
                             throw MakeStringException(0, "Job not found: %s", jobKey.get());
-                        PROGLOG("GraphInit: %s", jobKey.get());
                         Owned<IPropertyTree> graphNode = createPTree(msg);
                         Owned<CSlaveGraph> subGraph = (CSlaveGraph *)job->createGraph();
                         subGraph->createFromXGMML(graphNode, NULL, NULL, NULL);
+                        PROGLOG("GraphInit: %s, graphId=%"GIDPF"d", jobKey.get(), subGraph->queryGraphId());
                         subGraph->setExecuteReplyTag(subGraph->queryJob().deserializeMPTag(msg));
                         unsigned len;
                         msg.read(len);