logthread.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "jmisc.hpp"
  14. #include "jexcept.hpp"
  15. #include "jdebug.hpp"
  16. #include "LoggingErrors.hpp"
  17. #include "LogSerializer.hpp"
  18. #include "logthread.hpp"
  19. #include "compressutil.hpp"
  20. const char* const PropMaxLogQueueLength = "MaxLogQueueLength";
  21. const char* const PropQueueSizeSignal = "QueueSizeSignal";
  22. const char* const PropMaxTriesRS = "MaxTriesRS";
  23. const char* const PropFailSafe = "FailSafe";
  24. #define MaxLogQueueLength 500000 //Write a warning into log when queue length is greater than 500000
  25. #define QueueSizeSignal 10000 //Write a warning into log when queue length is increased by 10000
  26. const int DefaultMaxTriesRS = -1; // Max. # of attempts to send log message to WsReportService. Default: infinite
  27. extern LOGGINGCOMMON_API IUpdateLogThread* createUpdateLogThread(IPropertyTree* _cfg, const char* _service, const char* _agentName, IEspLogAgent* _logAgent)
  28. {
  29. if (!_cfg)
  30. return NULL;
  31. IUpdateLogThread* loggingThread = new CLogThread(_cfg, _service, _agentName, _logAgent);
  32. loggingThread->start();
  33. return loggingThread;
  34. }
  35. CLogThread::CLogThread(IPropertyTree* _cfg , const char* _service, const char* _agentName, IEspLogAgent* _logAgent)
  36. : stopping(false), agentName(_agentName)
  37. {
  38. if(!_agentName || !*_agentName)
  39. throw MakeStringException(-1,"No Logging agent name defined");
  40. if(!_cfg)
  41. throw MakeStringException(-1,"No Logging agent Configuration for %s", _agentName);
  42. if(!_service || !*_service)
  43. throw MakeStringException(-1,"No service name defined for %s", _agentName);
  44. if(!_logAgent)
  45. throw MakeStringException(-1,"No Logging agent interface for %s", _agentName);
  46. logAgent.setown(_logAgent);
  47. maxLogQueueLength = _cfg->getPropInt(PropMaxLogQueueLength, MaxLogQueueLength);
  48. signalGrowingQueueAt = _cfg->getPropInt(PropQueueSizeSignal, QueueSizeSignal);
  49. maxLogRetries = _cfg->getPropInt(PropMaxTriesRS, DefaultMaxTriesRS);
  50. ensureFailSafe = _cfg->getPropBool(PropFailSafe);
  51. if(ensureFailSafe)
  52. logFailSafe.setown(createFailSafeLogger(_cfg, _service, _agentName));
  53. time_t tNow;
  54. time(&tNow);
  55. localtime_r(&tNow, &m_startTime);
  56. }
  57. CLogThread::~CLogThread()
  58. {
  59. ESPLOG(LogMax, "CLogThread::~CLogThread()");
  60. }
  61. void CLogThread::start()
  62. {
  63. Thread::start();
  64. }
  65. int CLogThread::run()
  66. {
  67. Link();
  68. if(logFailSafe.get())
  69. checkPendingLogs(false);
  70. while(!stopping)
  71. {
  72. m_sem.wait(UPDATELOGTHREADWAITINGTIME);
  73. sendLog();
  74. if(logFailSafe.get())
  75. {
  76. checkPendingLogs(true);
  77. checkRollOver();
  78. }
  79. }
  80. Release();
  81. return 0;
  82. }
  83. void CLogThread::stop()
  84. {
  85. try
  86. {
  87. CriticalBlock b(logQueueCrit);
  88. if (!logQueue.ordinality() && logFailSafe.get() && logFailSafe->canRollCurrentLog())
  89. logFailSafe->RollCurrentLog();
  90. //If logQueue is not empty, the log files are rolled over so that queued jobs can be read
  91. //when the CLogThread is restarted.
  92. }
  93. catch(...)
  94. {
  95. DBGLOG("Exception");
  96. }
  97. stopping = true;
  98. m_sem.signal();
  99. join();
  100. }
  101. bool CLogThread::queueLog(IEspUpdateLogRequest* logRequest)
  102. {
  103. if (!logRequest)
  104. return false;
  105. Owned<IEspUpdateLogRequestWrap> logRequestWrap = new CUpdateLogRequestWrap(NULL, logRequest->getOption(), logRequest->getLogContent());
  106. return enqueue(logRequestWrap, nullptr);
  107. }
  108. bool CLogThread::queueLog(IEspUpdateLogRequestWrap* logRequest)
  109. {
  110. unsigned startTime = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  111. Owned<IEspUpdateLogRequestWrap> logRequestFiltered = logAgent->filterLogContent(logRequest);
  112. ESPLOG(LogNormal, "LThread:filterLog: %dms\n", msTick() - startTime);
  113. return enqueue(logRequestFiltered, nullptr);
  114. }
  115. bool CLogThread::enqueue(IEspUpdateLogRequestWrap* logRequest, const char* guid)
  116. {
  117. if (logFailSafe.get())
  118. {
  119. StringBuffer GUID, reqBuf;
  120. unsigned startTime = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  121. if (isEmptyString(guid))
  122. logFailSafe->GenerateGUID(GUID, nullptr);
  123. else
  124. GUID.set(guid);
  125. logRequest->setGUID(GUID.str());
  126. if (serializeLogRequestContent(logRequest, reqBuf))
  127. logFailSafe->Add(GUID, reqBuf.str(), nullptr);
  128. ESPLOG(LogNormal, "LThread:addToFailSafe: %dms\n", msTick() - startTime);
  129. }
  130. writeJobQueue(logRequest);
  131. m_sem.signal();
  132. return true;
  133. }
  134. void CLogThread::sendLog()
  135. {
  136. try
  137. {
  138. if(stopping)
  139. return;
  140. int recSend = 0;
  141. while(true)
  142. {
  143. IEspUpdateLogRequestWrap* logRequest = readJobQueue();
  144. if (!logRequest)
  145. break;
  146. const char* GUID= logRequest->getGUID();
  147. if ((!GUID || !*GUID) && ensureFailSafe && logFailSafe.get())
  148. continue;
  149. Owned<IEspUpdateLogRequestWrap> logRequestInFile = checkAndReadLogRequestFromSharedTankFile(logRequest);
  150. try
  151. {
  152. unsigned startTime = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  153. Owned<IEspUpdateLogResponse> logResponse = createUpdateLogResponse();
  154. if (logRequestInFile)
  155. logAgent->updateLog(*logRequestInFile, *logResponse);
  156. else
  157. logAgent->updateLog(*logRequest, *logResponse);
  158. if (!logResponse)
  159. throw MakeStringException(EspLoggingErrors::UpdateLogFailed, "no response");
  160. if (logResponse->getStatusCode())
  161. {
  162. const char* statusMessage = logResponse->getStatusMessage();
  163. if(statusMessage && *statusMessage)
  164. throw MakeStringException(EspLoggingErrors::UpdateLogFailed, "%s", statusMessage);
  165. else
  166. throw MakeStringException(EspLoggingErrors::UpdateLogFailed, "Unknown error");
  167. }
  168. ESPLOG(LogNormal, "LThread:updateLog: %dms\n", msTick() - startTime);
  169. if(ensureFailSafe && logFailSafe.get())
  170. {
  171. unsigned startTime1 = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  172. logFailSafe->AddACK(GUID);
  173. ESPLOG(LogNormal, "LThread:AddACK: %dms\n", msTick() - startTime1);
  174. }
  175. logRequest->Release();//Make sure that no data (such as GUID) is needed before releasing the logRequest.
  176. }
  177. catch(IException* e)
  178. {
  179. StringBuffer errorStr, errorMessage;
  180. errorMessage.appendf("Failed to update log for %s: error code %d, error message %s", GUID, e->errorCode(), e->errorMessage(errorStr).str());
  181. e->Release();
  182. if (logRequestInFile)
  183. logRequest->setNoResend(logRequestInFile->getNoResend());
  184. bool willRetry = false;
  185. if (!logRequest->getNoResend() && (maxLogRetries != 0))
  186. {
  187. unsigned retry = logRequest->incrementRetryCount();
  188. if (retry > maxLogRetries)
  189. errorMessage.append(" Max logging retries exceeded.");
  190. else
  191. {
  192. willRetry = true;
  193. writeJobQueue(logRequest);
  194. errorMessage.appendf(" Adding back to logging queue for retrying %d.", retry);
  195. }
  196. }
  197. if (!willRetry)
  198. {
  199. logRequest->Release();
  200. }
  201. IERRLOG("%s", errorMessage.str());
  202. }
  203. }
  204. }
  205. catch(IException* e)
  206. {
  207. StringBuffer errorStr, errorMessage;
  208. errorMessage.append("Exception thrown within update log thread: error code ").append(e->errorCode()).append(", error message ").append(e->errorMessage(errorStr));
  209. IERRLOG("%s", errorMessage.str());
  210. e->Release();
  211. }
  212. catch(...)
  213. {
  214. IERRLOG("Unknown exception thrown within update log thread");
  215. }
  216. return;
  217. }
  218. //At first, we check whether the logRequest contains the information about original log Request
  219. //in shared tank file created by logging manager. If yes, try to read the original log Request
  220. //based on the information. If the original log Request is found and unserialized, return new
  221. //IEspUpdateLogRequestWrap which contains original log Request.
  222. IEspUpdateLogRequestWrap* CLogThread::checkAndReadLogRequestFromSharedTankFile(IEspUpdateLogRequestWrap* logRequest)
  223. {
  224. //Read LogRequestInFile info if exists.
  225. Owned<IPropertyTree> logInFle = createPTreeFromXMLString(logRequest->getUpdateLogRequest());
  226. if (!logInFle)
  227. return nullptr;
  228. const char* GUID = logInFle->queryProp(LOGREQUEST_GUID);
  229. if (isEmptyString(GUID))
  230. return nullptr;
  231. const char* fileName = logInFle->queryProp(LOGCONTENTINFILE_FILENAME);
  232. if (isEmptyString(fileName))
  233. return nullptr;
  234. __int64 pos = logInFle->getPropInt64(LOGCONTENTINFILE_FILEPOS, -1);
  235. if (pos < 0)
  236. return nullptr;
  237. int size = logInFle->getPropInt64(LOGCONTENTINFILE_FILESIZE, -1);
  238. if (size < 0)
  239. return nullptr;
  240. Owned<CLogRequestInFile> reqInFile = new CLogRequestInFile();
  241. reqInFile->setGUID(GUID);
  242. reqInFile->setFileName(fileName);
  243. reqInFile->setPos(pos);
  244. reqInFile->setSize(size);
  245. //Read Log Request from the file
  246. StringBuffer logRequestStr;
  247. CLogSerializer logSerializer;
  248. if (!logSerializer.readLogRequest(reqInFile, logRequestStr))
  249. {
  250. ERRLOG("Failed to read Log Request from %s", fileName);
  251. return nullptr;
  252. }
  253. try
  254. {
  255. Owned<IPropertyTree> logRequestTree = createPTreeFromXMLString(logRequestStr.str());
  256. if (!logRequestTree)
  257. return nullptr;
  258. const char* guid = logRequestTree->queryProp("GUID");
  259. const char* opt = logRequestTree->queryProp("Option");
  260. const char* reqBuf = logRequestTree->queryProp("LogRequest");
  261. if (isEmptyString(reqBuf))
  262. return nullptr;
  263. StringBuffer decoded, req;
  264. JBASE64_Decode(reqBuf, decoded);
  265. LZWExpand(decoded, decoded.length(), req);
  266. return new CUpdateLogRequestWrap(guid, opt, req.str());
  267. }
  268. catch(IException* e)
  269. {
  270. StringBuffer errorStr;
  271. ERRLOG("Exception when unserializing Log Request Content: %d %s", e->errorCode(), e->errorMessage(errorStr).str());
  272. e->Release();
  273. }
  274. return nullptr;
  275. }
  276. //////////////////////////FailSafe////////////////////////////
  277. void CLogThread::checkRollOver()
  278. {
  279. try
  280. {
  281. bool bRollover = false;
  282. time_t tNow;
  283. time(&tNow);
  284. struct tm ltNow;
  285. localtime_r(&tNow, &ltNow);
  286. if ((ltNow.tm_year != m_startTime.tm_year || ltNow.tm_yday != m_startTime.tm_yday))
  287. {
  288. bRollover = true;
  289. localtime_r(&tNow, &m_startTime); // reset the start time for next rollover check
  290. }
  291. if (!bRollover)
  292. return;
  293. //Rename .log files to .old files
  294. logFailSafe->SafeRollover();
  295. CriticalBlock b(logQueueCrit);
  296. //Check and add queued requests to tank(.log) files
  297. unsigned numNewArrivals = logQueue.ordinality();
  298. if(numNewArrivals <= 0)
  299. return;
  300. ESPLOG(LogMax, "writing %d requests in the queue to the rolled over tank file.", numNewArrivals);
  301. unsigned startTime = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  302. for(unsigned i = 0; i < numNewArrivals; i++)
  303. {
  304. IInterface* pRequest = logQueue.item(i);
  305. if (!pRequest)
  306. continue;
  307. IEspUpdateLogRequestWrap* pEspRequest = dynamic_cast<IEspUpdateLogRequestWrap*>(pRequest);
  308. if(!pEspRequest)
  309. continue;
  310. StringBuffer reqBuf;
  311. const char* GUID = pEspRequest->getGUID();
  312. if(GUID && *GUID && serializeLogRequestContent(pEspRequest, reqBuf))
  313. logFailSafe->Add(GUID, reqBuf.str(), nullptr);
  314. }
  315. ESPLOG(LogNormal, "LThread:AddFailSafe: %dms\n", msTick() - startTime);
  316. }
  317. catch(IException* Ex)
  318. {
  319. StringBuffer str;
  320. Ex->errorMessage(str);
  321. IERRLOG("Exception thrown during tank file rollover: %s",str.str());
  322. Ex->Release();
  323. }
  324. catch(...)
  325. {
  326. IERRLOG("Unknown exception thrown during tank file rollover.");
  327. }
  328. }
  329. unsigned CLogThread::serializeLogRequestContent(IEspUpdateLogRequestWrap* pRequest, StringBuffer& logData)
  330. {
  331. const char* GUID = pRequest->getGUID();
  332. const char* option = pRequest->getOption();
  333. const char* logRequest = pRequest->getUpdateLogRequest();
  334. if (GUID && *GUID)
  335. logData.append("<GUID>").append(GUID).append("</GUID>");
  336. if (option && *option)
  337. logData.append("<Option>").append(option).append("</Option>");
  338. if (logRequest && *logRequest)
  339. {
  340. StringBuffer buffer;
  341. JBASE64_Encode(logRequest, strlen(logRequest), buffer, true);
  342. logData.append("<LogRequest>").append(buffer.str()).append("</LogRequest>");
  343. }
  344. return logData.length();
  345. }
  346. void CLogThread::checkPendingLogs(bool bOneRecOnly)
  347. {
  348. try
  349. {
  350. bool queueLogError = false;
  351. bool bFirst = true;
  352. StringBuffer GUID, logData;
  353. while (logFailSafe->PopPendingLogRecord(GUID, logData))
  354. {
  355. if (bFirst && !bOneRecOnly)
  356. {
  357. DBGLOG("We have old logs!. Will now try and recover the lost log messages");
  358. bFirst = false;
  359. }
  360. Owned<IEspUpdateLogRequestWrap> logRequest = unserializeLogRequestContent(logData.str());
  361. if (!logRequest)
  362. IERRLOG("checkPendingLogs: failed to unserialize: %s", logData.str());
  363. else if (!enqueue(logRequest, GUID))
  364. {
  365. OERRLOG("checkPendingLogs: failed to add a log request to queue");
  366. queueLogError=true;
  367. }
  368. if (bOneRecOnly)
  369. break;
  370. }
  371. //if everything went ok then we should be able to rollover the old logs.
  372. if (!queueLogError && !bOneRecOnly)
  373. logFailSafe->RollOldLogs();
  374. }
  375. catch(IException* ex)
  376. {
  377. StringBuffer errorStr;
  378. ex->errorMessage(errorStr);
  379. IERRLOG("CheckPendingLogs: %s:" ,errorStr.str());
  380. ex->Release();
  381. }
  382. catch(...)
  383. {
  384. IERRLOG("Unknown exception thrown in CheckPendingLogs");
  385. }
  386. }
  387. IEspUpdateLogRequestWrap* CLogThread::unserializeLogRequestContent(const char* logData)
  388. {
  389. if (!logData && *logData)
  390. return NULL;
  391. Owned<IPropertyTree> pLogTree = createPTreeFromXMLString(logData);
  392. if (!pLogTree)
  393. return NULL;
  394. const char* guid = pLogTree->queryProp("GUID");
  395. const char* opt = pLogTree->queryProp("Option");
  396. const char* logRequest = pLogTree->queryProp("LogRequest");
  397. if (!logRequest || !*logRequest)
  398. return NULL;
  399. StringBuffer buffer;
  400. JBASE64_Decode(logRequest, buffer);
  401. return new CUpdateLogRequestWrap(guid, opt, buffer.str());
  402. };
  403. void CLogThread::writeJobQueue(IEspUpdateLogRequestWrap* jobToWrite)
  404. {
  405. if (jobToWrite)
  406. {
  407. unsigned startTime = (getEspLogLevel()>=LogNormal) ? msTick() : 0;
  408. CriticalBlock b(logQueueCrit);
  409. ESPLOG(LogNormal, "LThread:waitWQ: %dms\n", msTick() - startTime);
  410. int QueueSize = logQueue.ordinality();
  411. if(QueueSize > maxLogQueueLength)
  412. OERRLOG("LOGGING QUEUE SIZE %d EXCEEDED MaxLogQueueLength %d, check the logging server.",QueueSize, maxLogQueueLength);
  413. if(QueueSize!=0 && QueueSize % signalGrowingQueueAt == 0)
  414. OERRLOG("Logging Queue at %d records. Check the logging server.",QueueSize);
  415. logQueue.enqueue(LINK(jobToWrite));
  416. }
  417. }
  418. IEspUpdateLogRequestWrap* CLogThread::readJobQueue()
  419. {
  420. #define LOG_LEVEL LogNormal
  421. unsigned startTime = (getEspLogLevel()>=LOG_LEVEL) ? msTick() : 0;
  422. CriticalBlock b(logQueueCrit);
  423. unsigned delta = (getEspLogLevel()>=LOG_LEVEL) ? msTick() - startTime : 0;
  424. if (delta > 1) // <=1ms is not indicative of an unexpected delay
  425. ESPLOG(LOG_LEVEL, "LThread:waitRQ: %dms", delta);
  426. return (IEspUpdateLogRequestWrap*)logQueue.dequeue();
  427. #undef LOG_LEVEL
  428. }