Browse Source

Fix gh-851 Thor graph execute rate condition

The graph executor thread could cause a pending graph to execute too
early. The consquence was that on the slave this early subgraph was
queued, but nothing polled to run it and master stalled waiting for it
to initialize

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 years ago
parent
commit
d5985473a1
2 changed files with 15 additions and 13 deletions
  1. 14 12
      thorlcr/graph/thgraph.cpp
  2. 1 1
      thorlcr/slave/slavmain.cpp

+ 14 - 12
thorlcr/graph/thgraph.cpp

@@ -2235,20 +2235,22 @@ public:
                     break;
                 }
                 if (job.queryPausing()) return; // pending graphs will re-run on resubmission
-                assertex(running.ordinality() <= limit);
                 bool added = false;
-                while (toRun.ordinality())
+                if (running.ordinality() < limit)
                 {
-                    Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
-                    toRun.remove(0);
-                    running.append(*LINK(graphInfo));
-                    CGraphBase *subGraph = graphInfo->subGraph;
-                    PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
-                    added = true;
-                    PooledThreadHandle h = graphPool->start(graphInfo.getClear());
-                    subGraph->poolThreadHandle = h;
-                    if (running.ordinality() >= limit)
-                        break;
+                    while (toRun.ordinality())
+                    {
+                        Linked<CGraphExecutorGraphInfo> graphInfo = &toRun.item(0);
+                        toRun.remove(0);
+                        running.append(*LINK(graphInfo));
+                        CGraphBase *subGraph = graphInfo->subGraph;
+                        PROGLOG("Wait: Launching graph thread for graphId=%"GIDPF"d", subGraph->queryGraphId());
+                        added = true;
+                        PooledThreadHandle h = graphPool->start(graphInfo.getClear());
+                        subGraph->poolThreadHandle = h;
+                        if (running.ordinality() >= limit)
+                            break;
+                    }
                 }
                 if (!added)
                     Sleep(1000); // still more to come

+ 1 - 1
thorlcr/slave/slavmain.cpp

@@ -258,10 +258,10 @@ public:
                         CJobSlave *job = jobs.find(jobKey.get());
                         if (!job)
                             throw MakeStringException(0, "Job not found: %s", jobKey.get());
-                        PROGLOG("GraphInit: %s", jobKey.get());
                         Owned<IPropertyTree> graphNode = createPTree(msg);
                         Owned<CSlaveGraph> subGraph = (CSlaveGraph *)job->createGraph();
                         subGraph->createFromXGMML(graphNode, NULL, NULL, NULL);
+                        PROGLOG("GraphInit: %s, graphId=%"GIDPF"d", jobKey.get(), subGraph->queryGraphId());
                         subGraph->setExecuteReplyTag(subGraph->queryJob().deserializeMPTag(msg));
                         unsigned len;
                         msg.read(len);