浏览代码

Merge pull request #13244 from dcamper/hpcc-23202-kafka-updates

HPCC-23202 Update Kafka plugin to use latest released Kafka client library

Reviewed-By: Gavin Halliday <gavin.halliday@lexisnexis.com>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 5 年之前
父节点
当前提交
5da7a39bf2
共有 4 个文件被更改,包括 49 次插入53 次删除
  1. 42 32
      plugins/kafka/kafka.cpp
  2. 2 2
      plugins/kafka/kafka.ecllib
  3. 4 18
      plugins/kafka/kafka.hpp
  4. 1 1
      plugins/kafka/librdkafka

+ 42 - 32
plugins/kafka/kafka.cpp

@@ -130,12 +130,15 @@ namespace KafkaPlugin
 
     KafkaStreamedDataset::~KafkaStreamedDataset()
     {
-        if (consumedRecCount > 0)
+        if (consumerPtr)
         {
-            consumerPtr->commitOffset(lastMsgOffset);
-        }
+            if (consumedRecCount > 0)
+            {
+                consumerPtr->commitOffset(lastMsgOffset);
+            }
 
-        delete(consumerPtr);
+            delete(consumerPtr);
+        }
     }
 
     const void* KafkaStreamedDataset::nextRow()
@@ -145,7 +148,7 @@ namespace KafkaPlugin
         __int32 timeoutWait = 100;  //!< Amount of time to wait between retries
         __int32 attemptNum = 0;
 
-        if (maxRecords <= 0 || consumedRecCount < maxRecords)
+        if (consumerPtr && (maxRecords <= 0 || consumedRecCount < maxRecords))
         {
             RdKafka::Message* messageObjPtr = NULL;
             bool messageConsumed = false;
@@ -497,6 +500,22 @@ namespace KafkaPlugin
     {
         consumerPtr = NULL;
         topicPtr = NULL;
+
+        char cpath[_MAX_DIR];
+
+        GetCurrentDirectory(_MAX_DIR, cpath);
+        offsetPath.append(cpath);
+        addPathSepChar(offsetPath);
+
+        offsetPath.append(topic.c_str());
+        offsetPath.append("-");
+        offsetPath.append(partitionNum);
+        if (!consumerGroup.empty())
+        {
+            offsetPath.append("-");
+            offsetPath.append(consumerGroup.c_str());
+        }
+        offsetPath.append(".offset");
     }
 
     Consumer::~Consumer()
@@ -562,6 +581,10 @@ namespace KafkaPlugin
                         // Ensure that some items are set a certain way
                         // by setting them after loading the external conf
                         topicConfPtr->set("auto.commit.enable", "false", errStr);
+                        // Additional settings for updated librdkafka
+                        topicConfPtr->set("enable.auto.commit", "false", errStr);
+                        topicConfPtr->set("offset.store.method", "file", errStr);
+                        topicConfPtr->set("offset.store.path", offsetPath.str(), errStr);
 
                         // Create the topic
                         topicPtr.store(RdKafka::Topic::create(consumerPtr, topic, topicConfPtr, errStr), std::memory_order_release);
@@ -589,7 +612,7 @@ namespace KafkaPlugin
         return consumerPtr->consume(topicPtr, partitionNum, POLL_TIMEOUT);
     }
 
-    KafkaStreamedDataset* Consumer::getMessageDataset(IEngineRowAllocator* allocator, __int64 maxRecords)
+    void Consumer::prepForMessageFetch()
     {
         // Make sure we have a valid connection to the Kafka cluster
         ensureSetup();
@@ -608,28 +631,11 @@ namespace KafkaPlugin
         {
             throw MakeStringException(-1, "Kafka: Failed to start Consumer read for %s:%d @ %s; error: %d", topic.c_str(), partitionNum, brokers.c_str(), startErr);
         }
-
-        return new KafkaStreamedDataset(this, allocator, traceLevel, maxRecords);
-    }
-
-    StringBuffer &Consumer::offsetFilePath(StringBuffer &offsetPath) const
-    {
-        offsetPath.append(topic.c_str());
-        offsetPath.append("-");
-        offsetPath.append(partitionNum);
-        if (!consumerGroup.empty())
-        {
-            offsetPath.append("-");
-            offsetPath.append(consumerGroup.c_str());
-        }
-        offsetPath.append(".offset");
-
-        return offsetPath;
     }
 
     void Consumer::commitOffset(__int64 offset) const
     {
-        if (offset >= -1)
+        if (offset >= 0)
         {
             // Not using librdkafka's offset_store because it seems to be broken
             // topicPtr->offset_store(partitionNum, offset);
@@ -639,9 +645,6 @@ namespace KafkaPlugin
             // we left off; NOTE:  librdkafka does not clean the topic name
             // or consumer group name when constructing this path
             // (which is actually a security concern), so we can't clean, either
-            StringBuffer offsetPath;
-            offsetFilePath(offsetPath);
-
             std::ofstream outFile(offsetPath.str(), std::ofstream::trunc);
             outFile << offset;
 
@@ -654,12 +657,9 @@ namespace KafkaPlugin
 
     void Consumer::initFileOffsetIfNotExist() const
     {
-        StringBuffer offsetPath;
-        offsetFilePath(offsetPath);
-
         if (!checkFileExists(offsetPath.str()))
         {
-            commitOffset(-1);
+            commitOffset(0);
 
             if (traceLevel > 4)
             {
@@ -1000,7 +1000,17 @@ namespace KafkaPlugin
     {
         Consumer* consumerObjPtr = new Consumer(brokers, topic, consumerGroup, partitionNum, ctx->queryContextLogger().queryTraceLevel());
 
-        return consumerObjPtr->getMessageDataset(allocator, maxRecords);
+        try
+        {
+            consumerObjPtr->prepForMessageFetch();
+        }
+        catch(...)
+        {
+            delete(consumerObjPtr);
+            throw;
+        }
+
+        return new KafkaStreamedDataset(consumerObjPtr, allocator, ctx->queryContextLogger().queryTraceLevel(), maxRecords);
     }
 
     ECL_KAFKA_API __int64 ECL_KAFKA_CALL setMessageOffset(ICodeContext* ctx, const char* brokers, const char* topic, const char* consumerGroup, __int32 partitionNum, __int64 newOffset)

+ 2 - 2
plugins/kafka/kafka.ecllib

@@ -225,7 +225,7 @@ EXPORT KafkaConsumer(VARSTRING topic, VARSTRING brokers = 'localhost', VARSTRING
                 LOCAL
             );
 
-        RETURN COUNT(result(offset >= -1));
+        RETURN COUNT(result(offset >= 0));
     END;
 
     /**
@@ -245,7 +245,7 @@ EXPORT KafkaConsumer(VARSTRING topic, VARSTRING brokers = 'localhost', VARSTRING
                     (
                         KafkaMessageOffset,
                         SELF.partitionNum := COUNTER - 1,
-                        SELF.offset := -1
+                        SELF.offset := 0
                     )
             );
 

+ 4 - 18
plugins/kafka/kafka.hpp

@@ -262,24 +262,10 @@ extern "C++"
                 RdKafka::Message* getOneMessage();
 
                 /**
-                 * Retrieves many messages from the inbound Kafka topic and
-                 * returns them as a streamed dataset.  Note that this is a
-                 * per-brokers/per-topic/per-partition retrieval.
-                 *
-                 * @param   allocator       The allocator to use with RowBuilder
-                 * @param   maxRecords      The maximum number of records
-                 *                          to retrieved
-                 *
-                 * @return  An IRowStream streamed dataset object pointer
-                 */
-                KafkaStreamedDataset* getMessageDataset(IEngineRowAllocator* allocator, __int64 maxRecords = 1);
-
-                /**
-                 * @param offsetPath  StringBuffer object to contain the path to this
-                 *                    consumer's offset file
-                 * @return            Reference to pass-in buffer
+                 * Initializes the object and prepares it to receive
+                 * messages from a specific broker/topic/partition.
                  */
-                StringBuffer &offsetFilePath(StringBuffer &offsetPath) const;
+                void prepForMessageFetch();
 
                 /**
                  * Commits the given offset to storage so we can pick up
@@ -309,11 +295,11 @@ extern "C++"
                 std::string                     brokers;        //!< One or more Kafka bootstrap brokers; comma-delimited; NameOrIP[:port]
                 std::string                     topic;          //!< The name of the topic to consume from
                 std::string                     consumerGroup;  //!< The name of the consumer group for this consumer object
+                StringBuffer                    offsetPath;     //!< Full path to the Kafka topic offset file
                 RdKafka::Consumer*              consumerPtr;    //!< Pointer to librdkafka consumer object
                 std::atomic<RdKafka::Topic*>    topicPtr;       //!< Pointer to librdkafka topic object
                 CriticalSection                 lock;           //!< Mutex to ensure that only one thread creates the librdkafka object pointers or starts/stops the queue
                 __int32                         partitionNum;   //!< The partition within the topic from which we will be pulling messages
-                bool                            queueStarted;   //!< If true, we have started the process of reading from the queue
                 int                             traceLevel;     //!< The current logging level
         };
 

+ 1 - 1
plugins/kafka/librdkafka

@@ -1 +1 @@
-Subproject commit 3e1babf4f26a7d12bbd272c1cdf4aa6a44000d4a
+Subproject commit 4ffe54b4f59ee5ae3767f9f25dc14651a3384d62