Browse Source

Merge pull request #11143 from AttilaVamos/HPCC-19267-improvement-7.0.0

HPCC-19267 Add a flushDiskCache option option switchable by //version in to regression suite

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 years ago
parent
commit
1e680c0e93

+ 50 - 24
testing/regress/README.rst

@@ -28,6 +28,7 @@ Result:
 |                       [--runclass class[,class,...]]
 |                       [--excludeclass class[,class,...]]
 |                       [--handleEclccWarningFile]
+|                       [--flushDiskCache]
 |                       {list,setup,run,query} ...
 | 
 |       HPCC Platform Regression suite
@@ -63,6 +64,8 @@ Result:
 |        --handleEclccWarningFile, -w
 |                                 Create/overwrite/delete ECLCC warning file.
 |        --jobnamesuffix suffix   Specify workunit job name suffix.
+|        --flushDiskCache         Flush OS (Linux) Disk Cache before execute ECL code
+|                                 (sudo privileges needed). Ignored when --pq <n> > 1
 |
 
 Important!
@@ -91,10 +94,10 @@ Result:
 |
 |       usage: ecl-test list [-h] [--config [CONFIG]] [--loglevel [{info,debug}]]
 |                            [--runclass class[,class,...]]
-|                     [--excludeclass class[,class,...]]
-|                     [--jobnamesuffix suffix] [--clusters] [--setup] [--run]
-|                     [--target [target_cluster_list | all]]
-|                     [--createEclRunArg]
+|                            [--excludeclass class[,class,...]]
+|                            [--jobnamesuffix suffix] [--flushDiskCache] [--clusters]
+|                            [--setup] [--run] [--target [target_cluster_list | all]]
+|                            [--createEclRunArg]
 |
 |       optional arguments:
 |        -h, --help               Show this help message and exit
@@ -109,6 +112,8 @@ Result:
 |                                 'none'
 |        --jobnamesuffix suffix
 |                                 Specify workunit job name suffix.
+|        --flushDiskCache         Flush OS (Linux) Disk Cache before execute ECL code
+|                                 (sudo privileges needed). Ignored when --pq <n> > 1
 |        --clusters               Print target clusters from config (ecl-test.json by
 |                                 default).
 |        --setup                  Print testcases executed in setup.
@@ -143,7 +148,9 @@ Result:
 |                             [--noversion]
 |                             [--runclass class[,class,...]]
 |                             [--excludeclass class[,class,...]]
+|                             [--jobnamesuffix suffix] [--flushDiskCache]
 |                             [--target [target_cluster_list | all]]
+|                             [--handleEclccWarningFile]
 |
 |       optional arguments:
 |        -h, --help               Show this help message and exit
@@ -166,11 +173,15 @@ Result:
 |                                 Run subclass(es) of the suite. Default value is 'all'
 |        --excludeclass class[,class,...], -e class[,class,...]
 |                                 Exclude subclass(es) of the suite. Default value is 'none'
-|        --handleEclccWarningFile, -w
-|                                 Create/overwrite/delete ECLCC warning file.
 |        --jobnamesuffix suffix   Specify workunit job name suffix.
+|        --flushDiskCache         Flush OS (Linux) Disk Cache before execute ECL code
+|                                 (sudo privileges needed). Ignored when --pq <n> > 1
 |        --target [target_cluster_list | all], -t [target_cluster_list | all]
-|                                 Run the setup on target cluster(s). If target = 'all' then run setup on all clusters. If undefined the config 'defaultSetupClusters' value will be used.
+|                                 Run the setup on target cluster(s). If target = 'all'
+|                                 then run setup on all clusters. If not defined then
+|                                 default value(s) come from config (ecl-test.json by default).
+|        --handleEclccWarningFile, -w
+|                                 Create/overwrite/delete ECLCC warning file
 |
 
 Parameters of Regression Suite run sub-command:
@@ -191,12 +202,12 @@ Result:
 |                           [--ignoreResult]
 |                           [-X name1=value1[,name2=value2...]]
 |                           [-f optionA=valueA[,optionB=valueB...]]
-|                           [--pq threadNumber]
-|                           [--noversion]
-|                           [--runclass class[,class,...]]
+|                           [--pq threadNumber] [--noversion]
+|                           [--server [networkAddress]] [--runclass class[,class,...]]
 |                           [--excludeclass class[,class,...]]
-|                           [--target [target_cluster_list | all]]
-|                           [--publish]
+|                           [--jobnamesuffix suffix] [--flushDiskCache]
+|                           [--target [target_cluster_list | all]] [--publish]
+|                           [--handleEclccWarningFile]
 |
 |       optional arguments:
 |        -h, --help               Show this help message and exit
@@ -215,16 +226,23 @@ Result:
 |                                 Set an ECL option (equivalent to #option).
 |        --pq threadNumber        Parallel query execution with threadNumber threads. (If threadNumber is '-1' on a single node system then threadNumber = numberOfLocalCore * 2)
 |        --noversion              Avoid version expansion of queries. Execute them as a standard test.
+|        --server [networkAddress]
+|                                 ESP server address. Default value (espIp) defined in
+|                                 ecl-test.json config file.
 |        --runclass class[,class,...], -r class[,class,...]
 |                                 Run subclass(es) of the suite. Default value is 'all'
 |        --excludeclass class[,class,...], -e class[,class,...]
 |                                 Exclude subclass(es) of the suite. Default value is 'none'
-|        --handleEclccWarningFile, -w
-|                                 Create/overwrite/delete ECLCC warning file.
 |        --jobnamesuffix suffix   Specify workunit job name suffix.
+|        --flushDiskCache         Flush OS (Linux) Disk Cache before execute ECL code
+|                                 (sudo privileges needed). Ignored when --pq <n> > 1
 |        --target [target_cluster_list | all], -t [target_cluster_list | all]
-|                                 Run the setup on target cluster(s). If target = 'all' then run setup on all clusters. If undefined the config 'defaultSetupClusters' value will be used.
+|                                 Run the cluster(s) suite. If target = 'all' then run
+|                                 suite on all clusters. If not defined then default
+|                                 value(s) come from config (ecl-test.json by default).
 |        --publish, -p            Publish compiled query instead of run.
+|        --handleEclccWarningFile, -w
+|                                 Create/overwrite/delete ECLCC warning file.
 |
 
 
@@ -246,12 +264,13 @@ Result:
 |                             [--ignoreResult]
 |                             [-X name1=value1[,name2=value2...]]
 |                             [-f optionA=valueA[,optionB=valueB...]]
-|                             [--pq threadNumber]
-|                             [--noversion]
+|                             [--pq threadNumber]  [--noversion]
+|                             [--server [networkAddress]]
 |                             [--runclass class[,class,...]]
 |                             [--excludeclass class[,class,...]]
-|                             [--target [target_cluster_list | all]]
-|                             [--publish]
+|                             [--jobnamesuffix suffix] [--flushDiskCache]
+|                             [--target [target_cluster_list | all]] [--publish]
+|                             [--handleEclccWarningFile]
 |                             ECL_query [ECL_query ...]
 |
 |       positional arguments:
@@ -274,16 +293,22 @@ Result:
 |                                 Set an ECL option (equivalent to #option).
 |        --pq threadNumber        Parallel query execution with threadNumber threads. (If threadNumber is '-1' on a single node system then threadNumber = numberOfLocalCore * 2)
 |        --noversion              Avoid version expansion of queries. Execute them as a standard test.
+|        --server [networkAddress]
+|                                 ESP server address. Default value (espIp) defined in ecl-test.json config file.
 |        --runclass class[,class,...], -r class[,class,...]
 |                                 Run subclass(es) of the suite. Default value is 'all'
 |        --excludeclass class[,class,...], -e class[,class,...]
 |                                 Exclude subclass(es) of the suite. Default value is 'none'
-|        --handleEclccWarningFile, -w
-|                                 Create/overwrite/delete ECLCC warning file.
-|        --jobnamesuffix suffix   Specify workunit job name suffix.
+|        --jobnamesuffix suffix
+|                                 Specify workunit job name suffix.
+|        --flushDiskCache         Flush OS (Linux) Disk Cache before execute ECL code (sudo privileges needed). Ignored when --pq <n> > 1
 |        --target [target_cluster_list | all], -t [target_cluster_list | all]
-|                                 Run the setup on target cluster(s). If target = 'all' then run setup on all clusters. If undefined the config 'defaultSetupClusters' value will be used.
-|        --publish, -p            Publish compiled query instead of run.
+|                                 Target cluster(s) for query to run. If target = 'all'
+|                                 then run query on all clusters. If not defined then
+|                                 default value(s) come from config (ecl-test.json by default).
+|         --publish, -p           Publish compiled query instead of run.
+|         --handleEclccWarningFile, -w
+|                                 Create/overwrite/delete ECLCC warning file.
 |
 
 Steps to run Regression Suite
@@ -626,6 +651,7 @@ The format of the output is the same as 'run', except there is a log, result and
     The regression suite engine executes the file once for each //version line in the file. It is compiled with command line option -Dn1=v1 -Dn2=v2 etc.
     The string value should quoted with \'.
     Optionally 'no<target>' exclusion info can add at the end of tag.
+    Special variable 'flushDiskCache' with 'true' can be used to force OS (Linux) disk cache flush beforeore execute ECl code.
 //version <n1>=<v1>,<n2>=<v2>,...[,no<target>[,no<target>]]
 
     This tag should use when a test case intentionally fails to handle it as pass.

+ 25 - 5
testing/regress/ecl-test

@@ -27,11 +27,13 @@ import glob
 
 from hpcc.util import argparse
 from hpcc.regression.regress import Regression
-from hpcc.util.ecl.file import ECLFile
-from hpcc.util.util import setConfig, checkPqParam, getVersionNumbers, checkXParam, convertPath, getRealIPAddress, checkClusters, checkHpccStatus, getEclRunArgs
+from hpcc.util.util import setConfig, checkPqParam, getVersionNumbers, checkXParam, convertPath
+from hpcc.util.util import getRealIPAddress, checkClusters, checkHpccStatus
+from hpcc.util.util import getEclRunArgs, isSudoer
 from hpcc.util.expandcheck import ExpandCheck
 from hpcc.common.error import Error
 from hpcc.common.config import Config
+from hpcc.common.logger import Logger
 
 # For coverage
 if ('coverage' in os.environ) and (os.environ['coverage'] == '1'):
@@ -116,7 +118,7 @@ class RegressMain:
                     logging.error("%s. No ECL file match for cluster:'%s'!" % (1,  self.args.target))
                     raise Error("4001")
             except IOError:
-                logging.error("%s. Query %s does not exist!" % (1,  eclfile.getBaseEcl()))
+                logging.error("%s. Some query does not exist!" % (0))
                 exit()
 
     def setup(self):
@@ -143,6 +145,7 @@ class RegressMain:
         description = 'HPCC Platform Regression suite'
         pythonVer = getVersionNumbers()
         defaultConfigFile="ecl-test.json"
+        self.log = Logger('info')
 
         if (pythonVer['main'] <= 2) and (pythonVer['minor'] <=6) and (pythonVer['patch'] <6):
             print "\nError!"
@@ -193,6 +196,9 @@ class RegressMain:
                                 nargs=1,  default = ['none'],   metavar="class[,class,...]")
         executionParser.add_argument('--jobnamesuffix', help="Specify workunit job name suffix.", default='',
                                 metavar="suffix")
+        executionParser.add_argument('--flushDiskCache', help="Flush OS (Linux) Disk Cache before execute ECL code (sudo privileges needed). Ignored when --pq <n>  > 1"
+                                , action = 'store_true')
+
 
         parser = argparse.ArgumentParser(prog=prog, description=description,  parents=[helperParser, commonParser,  executionParser])
 
@@ -262,6 +268,17 @@ class RegressMain:
         if ('server' in self.args) and (self.args.server != None):
             self.config.set('espIp',  self.args.server)
             pass
+
+        if self.args.flushDiskCache and not isSudoer():
+            err = Error("7000")
+            logging.error("%s. clearOSCache error:%s" % (-1,  err))
+            exit(err.getErrorCode())
+
+        # There is no sense to clear disk cache if same test runnnig parallel by versioning
+        if self.args.pq > 1:
+            self.args.flushDiskCache = False
+
+        self.config.set('log',  self.log)
         setConfig(self.config)
 
         # Process target parameter
@@ -329,11 +346,14 @@ class RegressMain:
                 self.run()
         except Error as e:
             logging.critical(e)
-            exit(e.getErrorCode());
+            logging.critical(traceback.format_exc())
+            exit(e.getErrorCode())
         except Exception as e:
-            logging.critical("Regression Test Engine internal error")
+            err = Error("6007")
+            logging.critical(" RegressMain error:%s" % (-1,  err))
             logging.critical(e)
             logging.critical(traceback.format_exc())
+            exit(e.getErrorCode())
         except KeyboardInterrupt:
             logging.critical("Keyboard Interrupt Caught.")
         finally:

+ 3 - 1
testing/regress/hpcc/common/error.py

@@ -35,7 +35,9 @@ ERROR = {
     "6003": "Parameter error when try to call ecl command!",
     "6004": "Can't connect to remote HPCC System!",
     "6005": "Syntax error in //skip tag!",
-    "6006": "Error in build suite"
+    "6006": "Error in build suite",
+    "6007": "Regression Test Engine internal error",
+    "7000": "You have not enough privilege to use '--flushDiskCache' parameter. Check sudoer settings or try it with sudo."
 }
 
 

+ 1 - 1
testing/regress/hpcc/common/logger.py

@@ -115,7 +115,6 @@ class Logger(object):
                 msg = self.format(record)
                 stream = self.stream
                 isBuffer = hasattr(record, 'filebuffer')
-                toSort = hasattr(record,  'filesort')
                 taskId = 0
                 if hasattr(record, 'taskId'):
                     taskId = getattr(record,  'taskId')
@@ -155,6 +154,7 @@ class Logger(object):
             except (KeyboardInterrupt, SystemExit):
                 raise
             except Exception as ex:
+                self.handleError(str(ex))
                 self.handleError(record)
 
     def addHandler(self, fd, level='info'):

+ 12 - 4
testing/regress/hpcc/regression/regress.py

@@ -26,7 +26,6 @@ import threading
 import inspect
 
 from ..common.error import Error
-from ..common.logger import Logger
 from ..common.report import Report
 from ..regression.suite import Suite
 from ..util.ecl.cc import ECLCC
@@ -50,7 +49,11 @@ class Regression:
         self.args = args
         self.config = getConfig()
         self.suites = {}
-        self.log = Logger(args.loglevel)
+
+        # Use the existing logger instance
+        self.log = self.config.log
+        self.log.setLevel(args.loglevel)
+
         if args.timeout == '0':
             self.timeout = int(self.config.timeout);
         else:
@@ -249,7 +252,7 @@ class Regression:
                             self.taskParam[startThreadId]['jobName'] = query.getJobname()
                             self.taskParam[startThreadId]['retryCount'] = int(self.config.maxAttemptCount)
                             self.exitmutexes[startThreadId].acquire()
-                            sysThreadId = thread.start_new_thread(self.runQuery, (cluster, query, report, cnt, suite.testPublish(query.ecl),  startThreadId))
+                            thread.start_new_thread(self.runQuery, (cluster, query, report, cnt, suite.testPublish(query.ecl),  startThreadId))
                             started = True
                             break
 
@@ -438,6 +441,11 @@ class Regression:
             suite.close()
             self.closeLogging()
 
+        except Error as e:
+            self.StopTimeoutThread()
+            suite.close()
+            raise(e)
+
         except Exception as e:
             self.StopTimeoutThread()
             suite.close()
@@ -474,7 +482,7 @@ class Regression:
                 self.timeouts[threadId] = self.timeout
             self.retryCount = int(self.config.maxAttemptCount)
             self.exitmutexes[threadId].acquire()
-            sysThreadId = thread.start_new_thread(self.runQuery, (cluster, eclfile, report, cnt, eclfile.testPublish(),  threadId))
+            thread.start_new_thread(self.runQuery, (cluster, eclfile, report, cnt, eclfile.testPublish(),  threadId))
             time.sleep(0.1)
             self.CheckTimeout(cnt, threadId,  eclfile)
 

+ 10 - 1
testing/regress/hpcc/util/ecl/command.py

@@ -21,10 +21,11 @@ import logging
 import os
 import sys
 import inspect
+import traceback
 
 from ...common.shell import Shell
 from ...common.error import Error
-from ...util.util import queryWuid, getConfig
+from ...util.util import queryWuid, getConfig, clearOSCache
 
 import xml.etree.ElementTree as ET
 
@@ -96,6 +97,9 @@ class ECLcmd(Shell):
         state = ""
         results=''
         try:
+            if eclfile.flushDiskCache():
+                clearOSCache()
+                pass
             #print "runCmd:", args
             results, stderr = self.__ECLcmd()(*args)
             logging.debug("%3d. results:'%s'", eclfile.getTaskId(),  results)
@@ -140,6 +144,11 @@ class ECLcmd(Shell):
             data = str(err)
             logging.error("------" + err + "------")
             raise err
+        except:
+            err = Error("6007")
+            logging.critical(err)
+            logging.critical(traceback.format_exc())
+            raise err
         finally:
             res = queryWuid(eclfile.getJobname(), eclfile.getTaskId())
             logging.debug("%3d. in finally -> 'wuid':'%s', 'state':'%s', data':'%s', ", eclfile.getTaskId(), res['wuid'], res['state'], data)

+ 15 - 0
testing/regress/hpcc/util/ecl/file.py

@@ -89,6 +89,10 @@ class ECLFile:
         if self.jobNameSuffix != '':
             self.jobNameSuffix = '-' + self.jobNameSuffix
 
+        self.isFlushDiskCache = False
+        if self.args.flushDiskCache:
+            self.isFlushDiskCache = True
+
         #If there is a --publish CL parameter then force publish this ECL file
         self.forcePublish=False
         if 'publish' in self.args:
@@ -498,6 +502,13 @@ class ECLFile:
         self.elapsTime = time
 
     def setJobnameVersion(self,  version):
+        # Overrides the global flushDiskCache parameter if --pq <= 1
+        # There is no sense to clear disk cache if same test running parallel by versioning
+        if ('flushDiskCache=true' in version) and (self.args.pq in (0, 1)):
+            self.isFlushDiskCache = True
+        if 'flushDiskCache=false' in version:
+            self.isFlushDiskCache = False
+
         # convert this kind of version string
         #  'multiPart=false,useSequential=true'
         # to this
@@ -626,3 +637,7 @@ class ECLFile:
     def getEclccWarningChanges(self):
         # return with self.eclccWarningChanges
         return self.eclccWarningChanges+"\n"
+
+    def flushDiskCache(self):
+        logging.debug("%3d. isFlushDiskCache (ecl:'%s'): '%s')" % (self.taskId,  self.ecl, str(self.isFlushDiskCache)))
+        return self.isFlushDiskCache

+ 30 - 0
testing/regress/hpcc/util/util.py

@@ -22,6 +22,8 @@ import platform
 import logging
 import os
 import subprocess
+import sys
+import traceback
 
 from ..common.error import Error
 from ..common.shell import Shell
@@ -297,3 +299,31 @@ def checkHpccStatus():
 
     finally:
         pass
+
+def isSudoer():
+    retVal = False
+    if 'linux' in sys.platform :
+        myProc = subprocess.Popen(["timeout -k 2 2 sudo id && echo Access granted || echo Access denied"], shell=True, bufsize=8192, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        (myStdout,  myStderr) = myProc.communicate()
+        result = "returncode:" + str(myProc.returncode) + ", stdout:\n'" + myStdout + "', stderr:\n'" + myStderr + "'."
+        logging.debug("%3d. isSudoer() result is: '%s'",  -1, result)
+        if 'Access denied' not in myStdout:
+            retVal = True
+
+    return retVal
+
+def clearOSCache():
+    if 'linux' in sys.platform :
+        if isSudoer():
+            myProc = subprocess.Popen(["free; sudo -S sync; echo 3 | sudo tee /proc/sys/vm/drop_caches; free"], shell=True, bufsize=8192, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            (myStdout,  myStderr) = myProc.communicate()
+            result = "returncode:" + str(myProc.returncode) + ", stdout:\n'" + myStdout + "', stderr:\n'" + myStderr + "'."
+            logging.debug("%3d. clearOSCache() result is: '%s'",  -1, result)
+        else:
+            err = Error("7000")
+            logging.error("%s. clearOSCache error:%s" % (-1,  err))
+            logging.critical(traceback.format_exc())
+            raise Error(err)
+    else:
+        logging.debug("%3d. clearOSCache() not supported on %s.",  -1, sys.platform)
+    pass