Przeglądaj źródła

Merge branch 'candidate-5.4.0'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 10 lat temu
rodzic
commit
b025875cb7

+ 1 - 0
common/workunit/workunit.cpp

@@ -1856,6 +1856,7 @@ mapEnums querySortFields[] =
    { WUQSFActivited, "@activated" },
    { WUQSFSuspendedByUser, "@suspended" },
    { WUQSFLibrary, "Library"},
+   { WUQSFPublishedBy, "@publishedBy" },
    { WUQSFterm, NULL }
 };
 

+ 1 - 0
common/workunit/workunit.hpp

@@ -1226,6 +1226,7 @@ enum WUQuerySortField
     WUQSFActivited = 14,
     WUQSFSuspendedByUser = 15,
     WUQSFLibrary = 16,
+    WUQSFPublishedBy = 17,
     WUQSFterm = 0,
     WUQSFreverse = 256,
     WUQSFnocase = 512,

+ 17 - 0
dali/ft/daftformat.cpp

@@ -78,18 +78,34 @@ void CPartitioner::commonCalcPartitions()
     const offset_t endOffset = thisOffset + thisSize;
     unsigned firstSplit;
     unsigned lastSplit;
+    bool appendingContent=false;
     if (partSize)
     {
         firstSplit = (unsigned)((thisOffset + partSize-1)/partSize);
         lastSplit = (unsigned)((endOffset-1)/partSize);
+        appendingContent=(thisOffset % partSize)!=0;
     }
     else
     {
         firstSplit = (unsigned)((thisOffset*numParts)/totalSize);
         lastSplit = (unsigned)(((endOffset-1)*numParts)/totalSize);
+        appendingContent=((thisOffset*numParts) % totalSize)!=0;
     }
     if (endOffset == totalSize) lastSplit = numParts-1;
     if (lastSplit >= numParts) lastSplit = numParts-1;                                      // very rare with variable length records, last file is very small or copying a couple of records 50 ways.
+
+    if (!partSeparator.isEmpty() && appendingContent) //appending to existing content, add a separator if necessary
+    {
+        Owned<PartitionPoint> separator = new PartitionPoint;
+        separator->inputOffset = 0;
+        separator->inputLength = partSeparator.length();
+        separator->outputLength = partSeparator.length();
+        separator->fixedText.set(partSeparator.length(), partSeparator.get());
+        separator->whichInput = whichInput;
+        separator->whichOutput = firstSplit-1;
+        results.append(*separator.getClear());
+    }
+
     offset_t startInputOffset = thisOffset;
     offset_t startOutputOffset = 0;
 
@@ -1589,6 +1605,7 @@ CJsonInputPartitioner::CJsonInputPartitioner(const FileFormat & _format)
         openfilecache = createFileIOCache(16);
     else
         openfilecache->Link();
+    partSeparator.set(",\n");
 }
 
 IFileIOCache *CJsonInputPartitioner::openfilecache = NULL;

+ 4 - 3
dali/ft/daftformat.ipp

@@ -55,6 +55,7 @@ protected:
     unsigned                    whichInput;
     RemoteFilename              inputName;
     StringAttr                  fullPath;
+    StringAttr                  partSeparator;
     Linked<IOutputProcessor>    target;
 
     offset_t                    totalSize;
@@ -538,7 +539,7 @@ protected:
             return;
 
         offset_t prevRowEnd;
-        json->findRowEnd(splitOffset, prevRowEnd);
+        json->findRowEnd(splitOffset-thisOffset + thisHeaderSize, prevRowEnd);
         if (!json->rowStart)
             return;
         if (!json->newRowSet) //get rid of extra delimiter if we haven't closed and reopened in the meantime
@@ -547,9 +548,9 @@ protected:
             if (cursor.trimLength && json->isRootless()) //compensate for difference in rootless offset
                 cursor.trimLength--;
         }
-        cursor.inputOffset = json->getRowOffset();
+        cursor.inputOffset = json->getRowOffset() + thisOffset;
         if (json->findNextRow())
-            cursor.nextInputOffset = json->getRowOffset();
+            cursor.nextInputOffset = json->getRowOffset() + thisOffset;
         else
             cursor.nextInputOffset = cursor.inputOffset;  //eof
     }

+ 1 - 1
ecl/hql/hqlgram.y

@@ -6448,7 +6448,7 @@ primexpr1
                             if (options)
                                 $$.setExpr(createValueF(no_executewhen, expr->getType(), LINK(expr), $5.getExpr(), options.getClear(), NULL), $1);
                             else
-                                $$.setExpr(createCompound($5.getExpr(), expr), $1);
+                                $$.setExpr(createCompound($5.getExpr(), expr.getClear()), $1);
                         }
     | __COMMON__ '(' expression ')'
                         {

+ 1 - 0
esp/scm/ws_workunits.ecm

@@ -1273,6 +1273,7 @@ ESPrequest [nil_remove] WUListQueriesRequest
     [min_ver("1.50")] string WUID;
     [min_ver("1.51")] string QueryID;
     [min_ver("1.51")] string QueryName;
+    [min_ver("1.56")] string PublishedBy;
 
     nonNegativeInteger PageSize(0);
     nonNegativeInteger PageStartFrom(0);

+ 3 - 0
esp/services/ws_workunits/ws_workunitsQuerySets.cpp

@@ -1299,6 +1299,8 @@ bool CWsWorkunitsEx::onWUListQueries(IEspContext &context, IEspWUListQueriesRequ
             sortOrder[0] = (WUQuerySortField) (WUQSFwarnTimeLimit | WUQSFnumeric);
         else if (strieq(sortBy, "Priority"))
             sortOrder[0] = (WUQuerySortField) (WUQSFpriority | WUQSFnumeric);
+        else if (strieq(sortBy, "PublishedBy"))
+            sortOrder[0] = WUQSFPublishedBy;
         else if (strieq(sortBy, "QuerySetId"))
             sortOrder[0] = WUQSFQuerySet;
         else
@@ -1318,6 +1320,7 @@ bool CWsWorkunitsEx::onWUListQueries(IEspContext &context, IEspWUListQueriesRequ
     addWUQSQueryFilter(filters, filterCount, filterBuf, req.getQueryName(), (WUQuerySortField) (WUQSFname | WUQSFwild));
     addWUQSQueryFilter(filters, filterCount, filterBuf, req.getWUID(), WUQSFwuid);
     addWUQSQueryFilter(filters, filterCount, filterBuf, req.getLibraryName(), (WUQuerySortField) (WUQSFLibrary | WUQSFnocase));
+    addWUQSQueryFilter(filters, filterCount, filterBuf, req.getPublishedBy(), (WUQuerySortField) (WUQSFPublishedBy | WUQSFwild));
     if (!req.getMemoryLimitLow_isNull())
         addWUQSQueryFilterInt64(filters, filterCount, filterBuf, req.getMemoryLimitLow(), (WUQuerySortField) (WUQSFmemoryLimit | WUQSFnumeric));
     if (!req.getMemoryLimitHigh_isNull())

+ 101 - 8
plugins/cassandra/cassandraembed.cpp

@@ -146,6 +146,10 @@ void CassandraCluster::setOptions(const StringArray &options)
             }
             else if (stricmp(optName, "pageSize")==0)
                 pageSize = getUnsignedOption(val, "pageSize");
+            else if (stricmp(optName, "maxFutures")==0)
+                maxFutures=getUnsignedOption(val, "maxFutures");
+            else if (stricmp(optName, "maxRetries")==0)
+                maxRetries=getUnsignedOption(val, "maxRetries");
             else if (stricmp(optName, "port")==0)
             {
                 unsigned port = getUnsignedOption(val, "port");
@@ -332,20 +336,96 @@ void CassandraSession::set(CassSession *_session)
     session = _session;
 }
 
+
+class CassandraRetryingFuture : public CInterface
+{
+public:
+    CassandraRetryingFuture(CassSession *_session, CassStatement *_statement, Semaphore *_limiter = NULL, unsigned _retries = 10)
+    : session(_session), statement(_statement), retries(_retries), limiter(_limiter), future(NULL)
+    {
+        execute();
+    }
+    ~CassandraRetryingFuture()
+    {
+        if (future)
+            cass_future_free(future);
+    }
+    inline operator CassFuture *() const
+    {
+        return future;
+    }
+    void wait(const char *why)
+    {
+        cass_future_wait(future);
+        CassError rc = cass_future_error_code(future);
+        if(rc != CASS_OK)
+        {
+            switch (rc)
+            {
+            case CASS_ERROR_LIB_NO_HOSTS_AVAILABLE: // MORE - are there others we should retry?
+                if (retry(why))
+                    break;
+                // fall into
+            default:
+                const char *message;
+                size_t length;
+                cass_future_error_message(future, &message, &length);
+                VStringBuffer err("cassandra: failed to %s (%.*s)", why, (int) length, message);
+                rtlFail(0, err.str());
+            }
+        }
+    }
+private:
+    bool retry(const char *why)
+    {
+        for (int i = 0; i < retries; i++)
+        {
+            execute();
+            cass_future_wait(future);
+            CassError rc = cass_future_error_code(future);
+            if(rc == CASS_OK)
+                return true;
+        }
+        return false;
+    }
+    void execute()
+    {
+        if (limiter)
+            limiter->wait();
+        future = cass_session_execute(session, statement);
+        if (limiter)
+            cass_future_set_callback(future, signaller, this); // Note - this will call the callback if the future has already completed
+    }
+    static void signaller(CassFuture *future, void *data)
+    {
+        CassandraRetryingFuture *self = (CassandraRetryingFuture *) data;
+        if (self && self->limiter)
+            self->limiter->signal();
+    }
+    CassandraRetryingFuture(const CassandraFuture &);
+    CassFuture *future;
+    CassSession *session;
+    CassandraStatement statement;
+    unsigned retries;
+    Semaphore *limiter;
+};
+
 //----------------------
 
-CassandraStatementInfo::CassandraStatementInfo(CassandraSession *_session, CassandraPrepared *_prepared, unsigned _numBindings, CassBatchType _batchMode, unsigned pageSize)
-: session(_session), prepared(_prepared), numBindings(_numBindings), batchMode(_batchMode)
+CassandraStatementInfo::CassandraStatementInfo(CassandraSession *_session, CassandraPrepared *_prepared, unsigned _numBindings, CassBatchType _batchMode, unsigned pageSizee, unsigned _maxFutures, unsigned _maxRetries)
+    : session(_session), prepared(_prepared), numBindings(_numBindings), batchMode(_batchMode), semaphore(NULL), maxFutures(_maxFutures), maxRetries(_maxRetries)
 {
     assertex(prepared && *prepared);
     statement.setown(new CassandraStatement(cass_prepared_bind(*prepared)));
     if (pageSize)
         cass_statement_set_paging_size(*statement, pageSize);
-
+    inBatch = false;
 }
 CassandraStatementInfo::~CassandraStatementInfo()
 {
     stop();
+    futures.kill();
+    delete semaphore;
 }
 void CassandraStatementInfo::stop()
 {
@@ -380,10 +460,11 @@ bool CassandraStatementInfo::next()
 void CassandraStatementInfo::startStream()
 {
     if (batchMode != (CassBatchType) -1)
-    {
         batch.setown(new CassandraBatch(cass_batch_new(batchMode)));
-        statement.setown(new CassandraStatement(cass_prepared_bind(*prepared)));
-    }
+    else
+        semaphore = new Semaphore(maxFutures ? maxFutures : 100);
+    statement.setown(new CassandraStatement(cass_prepared_bind(*prepared)));
+    inBatch = true;
 }
 void CassandraStatementInfo::endStream()
 {
@@ -392,6 +473,13 @@ void CassandraStatementInfo::endStream()
         result.setown(new CassandraFutureResult (cass_session_execute_batch(*session, *batch)));
         assertex (rowCount() == 0);
     }
+    else
+    {
+        ForEachItemIn(idx, futures)
+        {
+            futures.item(idx).wait("endStream");
+        }
+    }
 }
 void CassandraStatementInfo::execute()
 {
@@ -401,6 +489,11 @@ void CassandraStatementInfo::execute()
         check(cass_batch_add_statement(*batch, *statement));
         statement.setown(new CassandraStatement(cass_prepared_bind(*prepared)));
     }
+    else if (inBatch)
+    {
+        futures.append(*new CassandraRetryingFuture(*session, statement->getClear(), semaphore, maxRetries));
+        statement.setown(new CassandraStatement(cass_prepared_bind(*prepared)));
+    }
     else
     {
         result.setown(new CassandraFutureResult(cass_session_execute(*session, *statement)));
@@ -1138,7 +1231,7 @@ class CassandraEmbedFunctionContext : public CInterfaceOf<IEmbedFunctionContext>
 {
 public:
     CassandraEmbedFunctionContext(const IContextLogger &_logctx, unsigned _flags, const char *options)
-      : logctx(_logctx), flags(_flags), nextParam(0), numParams(0)
+      : logctx(_logctx), flags(_flags), nextParam(0), numParams(0), batchMode((CassBatchType) -1), pageSize(0)
     {
         StringArray opts;
         opts.appendList(options, ",");
@@ -1587,7 +1680,7 @@ public:
                 numParams = countBindings(script);
             else
                 numParams = 0;
-            stmtInfo.setown(new CassandraStatementInfo(session, prepared, numParams, cluster->batchMode, cluster->pageSize));
+            stmtInfo.setown(new CassandraStatementInfo(session, prepared, numParams, cluster->batchMode, cluster->pageSize, cluster->maxFutures, cluster->maxRetries));
         }
     }
     virtual void callFunction()

+ 11 - 0
plugins/cassandra/cassandraembed.hpp

@@ -209,6 +209,12 @@ public:
             DBGLOG("Executing %s", query.str());
         return statement;
     }
+    inline CassStatement *getClear()
+    {
+        CassStatement *ret = statement;
+        statement = NULL;
+        return ret;
+    }
     void bindBool(unsigned idx, cass_bool_t value)
     {
         if (query.length())
@@ -393,6 +399,11 @@ protected:
     Owned<CassandraFutureResult> result;
     Owned<CassandraIterator> iterator;
     unsigned numBindings;
+    CIArrayOf<CassandraRetryingFuture> futures;
+    Semaphore *semaphore;
+    unsigned maxFutures;
+    unsigned maxRetries;
+    bool inBatch;
     CassBatchType batchMode;
 };
 

+ 9 - 4
system/security/LdapSecurity/ldapconnection.cpp

@@ -1402,6 +1402,11 @@ public:
                         DBGLOG("LDAP: Password Expired(1) for user %s", username);
                         user.setAuthenticateStatus(AS_PASSWORD_VALID_BUT_EXPIRED);
                     }
+                    else if (strstr(ldap_errstring, "data 773"))//User must reset password "80090308: LdapErr: DSID-0C0903A9, comment: AcceptSecurityContext error, data 773, v1db1'
+                    {
+                        DBGLOG("LDAP: User %s Must Reset Password", username);
+                        user.setAuthenticateStatus(AS_PASSWORD_VALID_BUT_EXPIRED);
+                    }
                     else
                     {
                         DBGLOG("LDAP: Authentication(1) for user %s failed - %s", username, ldap_err2string(rc));
@@ -2608,7 +2613,7 @@ public:
         StringBuffer filter;
         filter.append("sAMAccountName=").append(username);
 
-        char        *attrs[] = {"cn", NULL};
+        char        *attrs[] = {"distinguishedName", NULL};
         CLDAPMessage searchResult;
         int rc = ldap_search_ext_s(ld, (char*)m_ldapconfig->getUserBasedn(), LDAP_SCOPE_SUBTREE, (char*)filter.str(), attrs, 0, NULL, NULL, &timeOut, LDAP_NO_LIMIT,    &searchResult.msg );
 
@@ -2627,11 +2632,11 @@ public:
                   attribute != NULL;
                   attribute = atts.getNext())
             {
-                if(0 == stricmp(attribute, "cn"))
+                if(0 == stricmp(attribute, "distinguishedName"))
                 {
                     CLDAPGetValuesWrapper vals(ld, message, attribute);
                     if (vals.hasValues())
-                        userdn.append("cn=").append(vals.queryValues()[0]).append(",").append(m_ldapconfig->getUserBasedn());
+                        userdn.set(vals.queryValues()[0]);
                     break;
                 }
             }
@@ -2701,7 +2706,7 @@ public:
 
         //Error string ""80090308: LdapErr: DSID-0C0903A9, comment: AcceptSecurityContext error, data 532, v1db0."
         //is returned if pw valid but expired
-        if(rc == LDAP_SUCCESS || strstr(ldap_errstring, "data 532"))//
+        if(rc == LDAP_SUCCESS || strstr(ldap_errstring, "data 532") || strstr(ldap_errstring, "data 773"))//
             return true;
         else
             return false;

+ 16 - 7
testing/regress/ecl/cassandra-simple.ecl

@@ -24,7 +24,7 @@ IMPORT cassandra;
  This example illustrates various calls to embdded Cassandra CQL code
  */
 
-// This is the record structure in ECL that will correspond to the rows in the Cassabdra dataset
+// This is the record structure in ECL that will correspond to the rows in the Cassandra dataset
 // Note that the default values specified in the fields will be used when a NULL value is being
 // returned from Cassandra
 
@@ -93,16 +93,25 @@ createTables() := EMBED(cassandra : server(server),user('rchapman'),keyspace('te
 ENDEMBED;
 
 // Initialize the Cassandra table, passing in the ECL dataset to provide the rows
-// Note the batch option to control how cassandra inserts are batched
-// If not supplied, each insert is executed individually - because Cassandra
-// has restrictions about what can be done in a batch, we can't default to using batch
-// unless told to...
+// When not using batch mode, maxFutures controls how many simultaenous writes to Cassandra are allowed before
+// we start to throttle, and maxRetries controls how many times inserts that fail because Cassandra is too busy 
+// will be retried.
 
-initialize(dataset(childrec) values) := EMBED(cassandra : server(server),user('rchapman'),keyspace('test'),batch('unlogged'))
+initialize(dataset(childrec) values) := EMBED(cassandra : server(server),user('rchapman'),keyspace('test'),maxFutures(100),maxRetries(10))
   INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?,?);
 ENDEMBED;
 
-initialize2(row(childrec) values) := EMBED(cassandra : server(server),user('rchapman'),keyspace('test'))
+// Note the batch option to control how cassandra inserts are batched
+// If not supplied, each insert is executed individually - because Cassandra
+// has restrictions about what can be done in a batch, we can't default to using batch
+// unless told to... Also Cassandra 2.2 and later will fail if batch gets too large. In general
+// best NOT to try to use batch for performance unless you know that 
+//   (a) the resulting batch will be small (default limit is 50k bytes) and
+//   (b) all the records in the batch share the same partition key and
+//   (c) you use 'unlogged' mode
+// Use of batch to ensure all trasactions either fail together of pass together is ok, but subject to the same size restrictions
+
+initialize2(row(childrec) values) := EMBED(cassandra : server(server),user('rchapman'),keyspace('test'),batch('unlogged'))
   INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?,?);
 ENDEMBED;