Browse Source

HPCC-9629 New language features to support bundles

Add evaluate mode to ECL compiler (to extract bundle information) and various
flags to allow bundles to be parsed without picking up extraneous files.

Option to suppress logging by eclcc (otherwise sudo ecl bundle install
tends to leavelocked logfiles around...)

Fix trivial memory leak in file hook plugins.

Add ecl-bundle program for bundle file manipulation

Add BundleBase to Std library, and provide an example bundle
implementation.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 12 năm trước cách đây
mục cha
commit
51f204fb42

+ 1 - 0
common/remote/hooks/git/gitfile.cpp

@@ -453,6 +453,7 @@ extern GITFILE_API void removeFileHook()
         if (gitRepositoryFileHook)
         {
             removeContainedFileHook(gitRepositoryFileHook);
+            delete gitRepositoryFileHook;
             gitRepositoryFileHook = NULL;
         }
     }

+ 34 - 1
common/remote/hooks/libarchive/archive.cpp

@@ -110,6 +110,9 @@ public:
         mode = archive_entry_filetype(entry);
         filesize = archive_entry_size(entry);
         path.set(archive_entry_pathname(entry));
+        accessTime = archive_entry_atime(entry);
+        createTime = archive_entry_ctime(entry);
+        modifiedTime = archive_entry_mtime(entry);
     }
     bool isDir() const
     {
@@ -123,10 +126,28 @@ public:
     {
         return path.get();
     }
+    CDateTime &getAccessTime(CDateTime &t)
+    {
+        t.set(accessTime);
+        return t;
+    }
+    CDateTime &getCreateTime(CDateTime &t)
+    {
+        t.set(createTime);
+        return t;
+    }
+    CDateTime &getModifiedTime(CDateTime &t)
+    {
+        t.set(modifiedTime);
+        return t;
+    }
 private:
     unsigned mode;
     offset_t filesize;
     StringAttr path;
+    time_t accessTime;
+    time_t createTime;
+    time_t modifiedTime;
 };
 
 // IFileIO implementation for reading out of libarchive-supported archives
@@ -267,7 +288,18 @@ public:
     }
     virtual bool getTime(CDateTime * createTime, CDateTime * modifiedTime, CDateTime * accessedTime)
     {
-        UNIMPLEMENTED; // MORE - maybe could implement if required
+        if (entry)
+        {
+            if (accessedTime)
+                entry->getAccessTime(*accessedTime);
+            if (createTime)
+                entry->getCreateTime(*createTime);
+            if (accessedTime)
+                entry->getModifiedTime(*modifiedTime);
+            return true;
+        }
+        else
+            return false;
     }
     virtual fileBool isDirectory()
     {
@@ -550,6 +582,7 @@ extern ARCHIVEFILE_API void removeFileHook()
         if (archiveFileHook)
         {
             removeContainedFileHook(archiveFileHook);
+            delete archiveFileHook;
             archiveFileHook = NULL;
         }
     }

+ 1 - 0
common/remote/hooks/libarchive/archive.hpp

@@ -36,4 +36,5 @@ extern "C" {
   extern ARCHIVEFILE_API void installFileHook();
   extern ARCHIVEFILE_API void removeFileHook();
 };
+
 #endif

+ 1 - 0
ecl/CMakeLists.txt

@@ -21,6 +21,7 @@ HPCC_ADD_SUBDIRECTORY (eclcmd "CLIENTTOOLS")
 HPCC_ADD_SUBDIRECTORY (eclscheduler "PLATFORM")
 HPCC_ADD_SUBDIRECTORY (eclplus "CLIENTTOOLS")
 HPCC_ADD_SUBDIRECTORY (ecl-package "CLIENTTOOLS")
+HPCC_ADD_SUBDIRECTORY (ecl-bundle "CLIENTTOOLS")
 HPCC_ADD_SUBDIRECTORY (hql)
 HPCC_ADD_SUBDIRECTORY (hqlcpp)
 HPCC_ADD_SUBDIRECTORY (hthor)

+ 69 - 0
ecl/ecl-bundle/CMakeLists.txt

@@ -0,0 +1,69 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2013 HPCC Systems.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+
+# Component: ecl-bundle
+#####################################################
+# Description:
+# ------------
+#    Cmake Input File for ecl-bundle
+#####################################################
+
+
+project( ecl-bundle )
+
+include(${HPCC_SOURCE_DIR}/esp/scm/smcscm.cmake)
+
+set (    SRCS
+         ${ESPSCM_GENERATED_DIR}/common_esp.cpp
+         ${ESPSCM_GENERATED_DIR}/ws_smc_esp.cpp
+         ecl-bundle.cpp
+         ${HPCC_SOURCE_DIR}/ecl/eclcmd/eclcmd_shell.cpp
+         ${HPCC_SOURCE_DIR}/ecl/eclcmd/eclcmd_common.hpp
+         ${HPCC_SOURCE_DIR}/ecl/eclcmd/eclcmd_common.cpp
+    )
+
+include_directories (
+         ${CMAKE_BINARY_DIR}
+         ${CMAKE_BINARY_DIR}/oss
+         ${HPCC_SOURCE_DIR}/system/include
+         ${HPCC_SOURCE_DIR}/system/jlib
+         ${HPCC_SOURCE_DIR}/common/workunit
+         ${HPCC_SOURCE_DIR}/common/remote
+         ${HPCC_SOURCE_DIR}/esp/clients
+         ${HPCC_SOURCE_DIR}/esp/bindings
+         ${HPCC_SOURCE_DIR}/esp/bindings/SOAP/xpp
+         ${HPCC_SOURCE_DIR}/esp/platform
+         ${HPCC_SOURCE_DIR}/system/security/shared
+         ${HPCC_SOURCE_DIR}/system/include
+         ${HPCC_SOURCE_DIR}/system/xmllib
+         ${HPCC_SOURCE_DIR}/ecl/eclcmd
+    )
+
+ADD_DEFINITIONS( -D_CONSOLE )
+
+HPCC_ADD_EXECUTABLE ( ecl-bundle ${SRCS} )
+install ( TARGETS ecl-bundle RUNTIME DESTINATION ${EXEC_DIR} )
+target_link_libraries ( ecl-bundle
+        jlib
+        esphttp
+        workunit
+    )
+
+if ( UNIX )
+    install ( PROGRAMS ecl-bundle.install DESTINATION etc/init.d/install COMPONENT Runtime )
+    install ( PROGRAMS ecl-bundle.uninstall DESTINATION etc/init.d/uninstall COMPONENT Runtime )
+endif ( UNIX )

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 1277 - 0
ecl/ecl-bundle/ecl-bundle.cpp


+ 1 - 0
ecl/ecl-bundle/ecl-bundle.install

@@ -0,0 +1 @@
+installFile "$binPath/ecl-bundle" "/usr/bin/ecl-bundle" 1 || exit 1

+ 1 - 0
ecl/ecl-bundle/ecl-bundle.uninstall

@@ -0,0 +1 @@
+removeSymlink "/usr/bin/ecl-bundle"

+ 144 - 12
ecl/eclcc/eclcc.cpp

@@ -26,6 +26,7 @@
 #include "workunit.hpp"
 
 #include "hqlecl.hpp"
+#include "hqlir.hpp"
 #include "hqlerrors.hpp"
 #include "hqlwuerr.hpp"
 #include "hqlfold.hpp"
@@ -203,6 +204,7 @@ public:
         logTimings = false;
         optArchive = false;
         optCheckEclVersion = true;
+        optEvaluateResult = false;
         optGenerateMeta = false;
         optGenerateDepend = false;
         optIncludeMeta = false;
@@ -210,10 +212,14 @@ public:
         optShared = false;
         optWorkUnit = false;
         optNoCompile = false;
+        optNoLogFile = false;
+        optNoStdInc = false;
+        optNoBundles = false;
         optOnlyCompile = false;
         optBatchMode = false;
         optSaveQueryText = false;
         optGenerateHeader = false;
+        optShowPaths = false;
         optTargetClusterType = HThorCluster;
         optTargetCompiler = DEFAULT_COMPILER;
         optThreads = 0;
@@ -241,6 +247,7 @@ protected:
     bool checkWithinRepository(StringBuffer & attributePath, const char * sourcePathname);
     IFileIO * createArchiveOutputFile(EclCompileInstance & instance);
     ICppCompiler *createCompiler(const char * coreName, const char * sourceDir = NULL, const char * targetDir = NULL);
+    void evaluateResult(EclCompileInstance & instance);
     bool generatePrecompiledHeader();
     void generateOutput(EclCompileInstance & instance);
     void instantECL(EclCompileInstance & instance, IWorkUnit *wu, const char * queryFullName, IErrorReceiver *errs, const char * outputFile);
@@ -264,6 +271,7 @@ protected:
 protected:
     Owned<IEclRepository> pluginsRepository;
     Owned<IEclRepository> libraryRepository;
+    Owned<IEclRepository> bundlesRepository;
     Owned<IEclRepository> includeRepository;
     const char * programName;
 
@@ -272,8 +280,12 @@ protected:
     StringBuffer hooksPath;
     StringBuffer templatePath;
     StringBuffer eclLibraryPath;
+    StringBuffer eclBundlePath;
     StringBuffer stdIncludeLibraryPath;
     StringBuffer includeLibraryPath;
+    StringBuffer compilerPath;
+    StringBuffer libraryPath;
+
     StringBuffer cclogFilename;
     StringAttr optLogfile;
     StringAttr optIniFilename;
@@ -304,17 +316,22 @@ protected:
     bool logTimings;
     bool optArchive;
     bool optCheckEclVersion;
+    bool optEvaluateResult;
     bool optGenerateMeta;
     bool optGenerateDepend;
     bool optIncludeMeta;
     bool optWorkUnit;
     bool optNoCompile;
+    bool optNoLogFile;
+    bool optNoStdInc;
+    bool optNoBundles;
     bool optBatchMode;
     bool optShared;
     bool optOnlyCompile;
     bool optSaveQueryText;
     bool optLegacy;
     bool optGenerateHeader;
+    bool optShowPaths;
     int argc;
     const char **argv;
 };
@@ -364,7 +381,6 @@ static int doMain(int argc, const char *argv[])
     EclCC processor(argc, argv);
     if (!processor.parseCommandLineOptions(argc, argv))
         return 1;
-
     try
     {
         if (!processor.processFiles())
@@ -481,8 +497,6 @@ void EclCC::loadOptions()
 
     globals.setown(createProperties(optIniFilename, true));
 
-    StringBuffer compilerPath, libraryPath;
-
     if (globals->hasProp("targetGcc"))
         optTargetCompiler = globals->getPropBool("targetGcc") ? GccCppCompiler : Vs6CppCompiler;
 
@@ -501,13 +515,14 @@ void EclCC::loadOptions()
         extractOption(hooksPath, globals, "HPCC_FILEHOOKS_PATH", "filehooks", syspath, "filehooks");
         extractOption(templatePath, globals, "ECLCC_TPL_PATH", "templatePath", syspath, "componentfiles");
         extractOption(eclLibraryPath, globals, "ECLCC_ECLLIBRARY_PATH", "eclLibrariesPath", syspath, "share/ecllibrary/");
+        extractOption(eclBundlePath, globals, "ECLCC_ECLBUNDLE_PATH", "eclBundlesPath", syspath, "share/bundles/");
     }
     extractOption(stdIncludeLibraryPath, globals, "ECLCC_ECLINCLUDE_PATH", "eclIncludePath", ".", NULL);
 
-    if (!optLogfile.length() && !optBatchMode)
+    if (!optLogfile.length() && !optBatchMode && !optNoLogFile)
         extractOption(optLogfile, globals, "ECLCC_LOGFILE", "logfile", "eclcc.log", NULL);
 
-    if (logVerbose || optLogfile)
+    if ((logVerbose || optLogfile) && !optNoLogFile)
     {
         if (optLogfile.length())
         {
@@ -820,7 +835,12 @@ bool EclCC::checkWithinRepository(StringBuffer & attributePath, const char * sou
         return false;
 
     StringBuffer searchPath;
-    searchPath.append(eclLibraryPath).append(ENVSEPCHAR).append(stdIncludeLibraryPath).append(ENVSEPCHAR).append(includeLibraryPath);
+    searchPath.append(eclLibraryPath).append(ENVSEPCHAR);
+    if (!optNoBundles)
+        searchPath.append(eclBundlePath).append(ENVSEPCHAR);
+    if (!optNoStdInc)
+        searchPath.append(stdIncludeLibraryPath).append(ENVSEPCHAR);
+    searchPath.append(includeLibraryPath);
 
     StringBuffer expandedSourceName;
     makeAbsolutePath(sourcePathname, expandedSourceName);
@@ -828,6 +848,72 @@ bool EclCC::checkWithinRepository(StringBuffer & attributePath, const char * sou
     return findFilenameInSearchPath(attributePath, searchPath, expandedSourceName);
 }
 
+void EclCC::evaluateResult(EclCompileInstance & instance)
+{
+    IHqlExpression *query = instance.query;
+    if (query->getOperator()==no_output)
+        query = query->queryChild(0);
+    if (query->getOperator()==no_datasetfromdictionary)
+        query = query->queryChild(0);
+    if (query->getOperator()==no_selectfields)
+        query = query->queryChild(0);
+    if (query->getOperator()==no_createdictionary)
+        query = query->queryChild(0);
+    OwnedHqlExpr folded = foldHqlExpression(query, NULL, HFOthrowerror|HFOloseannotations|HFOforcefold|HFOfoldfilterproject|HFOconstantdatasets);
+    StringBuffer out;
+    IValue *result = folded->queryValue();
+    if (result)
+        result->generateECL(out);
+    else if (folded->getOperator()==no_list)
+    {
+        out.append('[');
+        ForEachChild(idx, folded)
+        {
+            IHqlExpression *child = folded->queryChild(idx);
+            if (idx)
+                out.append(", ");
+            result = child->queryValue();
+            if (result)
+                result->generateECL(out);
+            else
+                throw MakeStringException(1, "Expression cannot be evaluated");
+        }
+        out.append(']');
+    }
+    else if (folded->getOperator()==no_inlinetable)
+    {
+        IHqlExpression *transformList = folded->queryChild(0);
+        if (transformList && transformList->getOperator()==no_transformlist)
+        {
+            IHqlExpression *transform = transformList->queryChild(0);
+            assertex(transform && transform->getOperator()==no_transform);
+            out.append('[');
+            ForEachChild(idx, transform)
+            {
+                IHqlExpression *child = transform->queryChild(idx);
+                assertex(child->getOperator()==no_assign);
+                if (idx)
+                    out.append(", ");
+                result = child->queryChild(1)->queryValue();
+                if (result)
+                    result->generateECL(out);
+                else
+                    throw MakeStringException(1, "Expression cannot be evaluated");
+            }
+            out.append(']');
+        }
+        else
+            throw MakeStringException(1, "Expression cannot be evaluated");
+    }
+    else
+    {
+#ifdef _DEBUG
+        EclIR::dump_ir(folded);
+#endif
+        throw MakeStringException(1, "Expression cannot be evaluated");
+    }
+    printf("%s\n", out.str());
+}
 
 void EclCC::processSingleQuery(EclCompileInstance & instance,
                                IFileContents * queryContents,
@@ -924,7 +1010,7 @@ void EclCC::processSingleQuery(EclCompileInstance & instance,
 
             gatherWarnings(ctx.errs, instance.query);
 
-            if (instance.query && !syntaxChecking && !optGenerateMeta)
+            if (instance.query && !syntaxChecking && !optGenerateMeta && !optEvaluateResult)
                 instance.query.setown(convertAttributeToQuery(instance.query, ctx));
 
             if (instance.wu->getDebugValueBool("addTimingToWorkunit", true))
@@ -932,6 +1018,9 @@ void EclCC::processSingleQuery(EclCompileInstance & instance,
 
             if (optIncludeMeta || optGenerateMeta)
                 instance.generatedMeta.setown(parseCtx.getMetaTree());
+
+            if (optEvaluateResult && !errs->errCount() && instance.query)
+                evaluateResult(instance);
         }
         catch (IException *e)
         {
@@ -954,7 +1043,7 @@ void EclCC::processSingleQuery(EclCompileInstance & instance,
     if (instance.archive)
         return;
 
-    if (syntaxChecking || optGenerateMeta)
+    if (syntaxChecking || optGenerateMeta || optEvaluateResult)
         return;
 
     StringBuffer targetFilename;
@@ -1139,6 +1228,8 @@ void EclCC::processFile(EclCompileInstance & instance)
         EclRepositoryArray repositories;
         repositories.append(*LINK(pluginsRepository));
         repositories.append(*LINK(libraryRepository));
+        if (bundlesRepository)
+            repositories.append(*LINK(bundlesRepository));
 
         //Ensure that this source file is used as the definition (in case there are potential clashes)
         //Note, this will not override standard library files.
@@ -1313,7 +1404,14 @@ void EclCC::processReference(EclCompileInstance & instance, const char * queryAt
     if (optArchive || optGenerateDepend)
         instance.archive.setown(createAttributeArchive());
 
-    instance.dataServer.setown(createCompoundRepositoryF(pluginsRepository.get(), libraryRepository.get(), includeRepository.get(), NULL));
+    EclRepositoryArray repositories;
+    repositories.append(*LINK(pluginsRepository));
+    repositories.append(*LINK(libraryRepository));
+    if (bundlesRepository)
+        repositories.append(*LINK(bundlesRepository));
+    repositories.append(*LINK(includeRepository));
+    instance.dataServer.setown(createCompoundRepository(repositories));
+
     processSingleQuery(instance, NULL, queryAttributePath);
 
     if (instance.reportErrorSummary())
@@ -1378,6 +1476,20 @@ bool EclCC::processFiles()
     {
         processArgvFilename(inputFiles, inputFileNames.item(idx));
     }
+    if (optShowPaths)
+    {
+        loadOptions();
+        printf("CL_PATH=%s\n", compilerPath.str());
+        printf("ECLCC_ECLBUNDLE_PATH=%s\n", eclBundlePath.str());
+        printf("ECLCC_ECLINCLUDE_PATH=%s\n", stdIncludeLibraryPath.str());
+        printf("ECLCC_ECLLIBRARY_PATH=%s\n", eclLibraryPath.str());
+        printf("ECLCC_INCLUDE_PATH=%s\n", cppIncludePath.str());
+        printf("ECLCC_LIBRARY_PATH=%s\n", libraryPath.str());
+        printf("ECLCC_PLUGIN_PATH=%s\n", pluginsPath.str());
+        printf("ECLCC_TPL_PATH=%s\n", templatePath.str());
+        printf("HPCC_FILEHOOKS_PATH=%s\n", hooksPath.str());
+        return true;
+    }
     if (optGenerateHeader)
     {
         return generatePrecompiledHeader();
@@ -1393,10 +1505,14 @@ bool EclCC::processFiles()
 
 
     StringBuffer searchPath;
-    searchPath.append(stdIncludeLibraryPath).append(ENVSEPCHAR).append(includeLibraryPath);
+    if (!optNoStdInc)
+        searchPath.append(stdIncludeLibraryPath).append(ENVSEPCHAR);
+    searchPath.append(includeLibraryPath);
 
     Owned<IErrorReceiver> errs = createFileErrorReceiver(stderr);
     pluginsRepository.setown(createNewSourceFileEclRepository(errs, pluginsPath.str(), ESFallowplugins, logVerbose ? PLUGIN_DLL_MODULE : 0));
+    if (!optNoBundles)
+        bundlesRepository.setown(createNewSourceFileEclRepository(errs, eclBundlePath.str(), 0, 0));
     libraryRepository.setown(createNewSourceFileEclRepository(errs, eclLibraryPath.str(), 0, 0));
     includeRepository.setown(createNewSourceFileEclRepository(errs, searchPath.str(), 0, 0));
 
@@ -1563,6 +1679,15 @@ bool EclCC::parseCommandLineOptions(int argc, const char* argv[])
         else if (iter.matchOption(optLogfile, "--logfile"))
         {
         }
+        else if (iter.matchFlag(optNoLogFile, "--nologfile"))
+        {
+        }
+        else if (iter.matchFlag(optNoStdInc, "--nostdinc"))
+        {
+        }
+        else if (iter.matchFlag(optNoBundles, "--nobundles"))
+        {
+        }
         else if (iter.matchOption(optLogDetail, "--logdetail"))
         {
         }
@@ -1581,6 +1706,9 @@ bool EclCC::parseCommandLineOptions(int argc, const char* argv[])
         else if (iter.matchFlag(optGenerateDepend, "-Md"))
         {
         }
+        else if (iter.matchFlag(optEvaluateResult, "-Me"))
+        {
+        }
         else if (iter.matchFlag(optOutputFilename, "-o"))
         {
         }
@@ -1611,6 +1739,9 @@ bool EclCC::parseCommandLineOptions(int argc, const char* argv[])
                 return false;
             }
         }
+        else if (iter.matchFlag(optShowPaths, "-showpaths"))
+        {
+        }
         else if (iter.matchOption(optManifestFilename, "-manifest"))
         {
             if (!isManifestFileValid(optManifestFilename))
@@ -1686,14 +1817,14 @@ bool EclCC::parseCommandLineOptions(int argc, const char* argv[])
     }
 
     // Option post processing follows:
-    if (optArchive || optWorkUnit || optGenerateMeta || optGenerateDepend)
+    if (optArchive || optWorkUnit || optGenerateMeta || optGenerateDepend || optShowPaths)
         optNoCompile = true;
 
     loadManifestOptions();
 
     if (inputFileNames.ordinality() == 0)
     {
-        if (optGenerateHeader || (!optBatchMode && optQueryRepositoryReference))
+        if (optGenerateHeader || optShowPaths || (!optBatchMode && optQueryRepositoryReference))
             return true;
         ERRLOG("No input filenames supplied");
         return false;
@@ -1765,6 +1896,7 @@ const char * const helpText[] = {
     "!   -legacy       Use legacy import semantics (deprecated)",
     "    --logfile <file> Write log to specified file",
     "!   --logdetail=n Set the level of detail in the log file",
+    "!   --nologfile   Do not write any logfile",
 #ifdef _WIN32
     "!   -m            Enable leak checking",
 #endif

+ 10 - 7
ecl/eclcmd/eclcmd_common.hpp

@@ -204,17 +204,20 @@ public:
     virtual eclCmdOptionMatchIndicator matchCommandLineOption(ArgvIterator &iter, bool finalAttempt=false);
     virtual bool finalizeOptions(IProperties *globals);
 
-    virtual void usage()
+    virtual void usage(bool includeESP = true)
     {
         fprintf(stdout,
             "   --help                 display usage information for the given command\n"
             "   -v, --verbose          output additional tracing information\n"
-            "   -s, --server=<ip>      ip of server running ecl services (eclwatch)\n"
-            "   -ssl, --ssl            use SSL to secure the connection to the server\n"
-            "   --port=<port>          ecl services port\n"
-            "   -u, --username=<name>  username for accessing ecl services\n"
-            "   -pw, --password=<pw>   password for accessing ecl services\n"
-        );
+          );
+        if (includeESP)
+            fprintf(stdout,
+                "   -s, --server=<ip>      ip of server running ecl services (eclwatch)\n"
+                "   -ssl, --ssl            use SSL to secure the connection to the server\n"
+                "   --port=<port>          ecl services port\n"
+                "   -u, --username=<name>  username for accessing ecl services\n"
+                "   -pw, --password=<pw>   password for accessing ecl services\n"
+              );
     }
 public:
     StringAttr optServer;

+ 66 - 0
ecllibrary/std/BundleBase.ecl

@@ -0,0 +1,66 @@
+/*##############################################################################
+## HPCC SYSTEMS software Copyright (C) 2013 HPCC Systems.  All rights reserved.
+############################################################################## */
+
+EXPORT BundleBase := MODULE,VIRTUAL
+  /*
+   * Record format for Properties dictionary.
+   * @return Record format for Properties dictionary.
+   */
+  EXPORT PropertyRecord := { STRING key => STRING value };
+
+  /*
+   * Name of this bundle.
+   * @return Name
+   */
+  EXPORT STRING Name := '';
+
+  /*
+   * Description of this bundle.
+   * @return Description
+   */
+  EXPORT UTF8 Description := 'ECL Bundle';
+
+  /*
+   * List of strings containing author name(s).
+   * @return Authors list
+   */
+  /*
+   * List of strings containing author name(s).
+   * @return Authors list
+   */
+  EXPORT SET OF UTF8 Authors := [];
+
+  /*
+   * URL or text of licence for this bundle. If not overridden by a bundle, the Apache
+   * license is assumed.
+   * @return License
+   */
+  EXPORT UTF8 License := 'http://www.apache.org/licenses/LICENSE-2.0';
+
+  /*
+   * Copyright message for this bundle.
+   * @return Copyright message
+   */
+  EXPORT UTF8 Copyright := '';
+
+  /*
+   * Dependencies. A set of strings containing names of any bundles that this bundle depends
+   * on. One or more versions or version ranges may be specified in after the name, separated
+   * by spaces.
+   * @return Dependency list
+   */
+  EXPORT SET OF STRING DependsOn := [];
+
+  /*
+   * Version of this bundle. This should be of the form X.Y.Z, where X, Y and Z are integers.
+   * @return Version string
+   */
+  EXPORT STRING Version := '1.0.0';
+
+  /*
+   * Additional properties, represented as key-value pairs. Not presently used by the bundle system,
+   * @return Properties dictionary
+   */
+  EXPORT Properties := DICTIONARY([], PropertyRecord);
+END;

+ 192 - 0
initfiles/examples/Bundles/Bloom.ecl

@@ -0,0 +1,192 @@
+EXPORT Bloom := MODULE,FORWARD
+  IMPORT Std;
+  EXPORT Bundle := MODULE(Std.BundleBase)
+    EXPORT Name := 'Bloom';
+    EXPORT Description := 'Bloom filter implementation, and example of ECL bundle layout';
+    EXPORT Authors := ['Richard Chapman','Charles Kaminsky'];
+    EXPORT License := 'http://www.apache.org/licenses/LICENSE-2.0';
+    EXPORT Copyright := 'Copyright (C) 2013 HPCC Systems';
+    EXPORT DependsOn := [];
+    EXPORT Version := '1.0.0';
+  END;
+
+  /*
+   * Create a bloom filter. The parameters will determine the size of the hash table used and
+   * the number of hashes required to give the answer. Expect at times up to 7 hashes and
+   * consequently 7 lookups per key. If this number of lookups degrades performance, use
+   * forceNumHashes, but expect that the bloom filter table size will increase (how much
+   * larger depends on the false positive probability).
+   *
+   * @param falsePositiveProbability   A value between 0.05 and 0.3 representing the desired probability of false positives
+   * @param cardinality                The expected approximate number of values to be added to the bloom hash table
+   * @param forceNumHashes             Optional parameter to force the number of hashes per lookup
+   * @param forceNumBits               Optional parameter to force the number of bits in the hash table
+   * @return                           A module exporting bloom filter helper attributes
+   */
+
+  EXPORT bloomFilter(UNSIGNED DECIMAL6_3 falsePositiveProbability,
+                     UNSIGNED INTEGER8 cardinality,
+                     UNSIGNED integer4 forceNumHashes = 0,
+                     UNSIGNED integer4 forceNumBits = 0
+                     ) := MODULE
+
+    UNSIGNED DECIMAL6_3 fpProb  := IF (falsePositiveProbability >=0.3, 0.3,
+                                       IF (falsePositiveProbability <=0.050, 0.05,
+                                           falsePositiveProbability));
+    UNSIGNED _numBits := IF (forceNumHashes = 0, ROUNDUP(-(cardinality*ln((REAL4) fpProb))/POWER(ln(2),2)), forceNumBits);
+
+    /*
+     * Return the actual size of the table used, calculated from the parameters to the module
+     * @return Actual table size, in bytes
+     */
+    EXPORT UNSIGNED tableSize := (_numBits + 7) / 8;
+
+    /*
+     * Return the actual size of the table used, calculated from the parameters to the module
+     * @return Actual table size, in bits
+     */
+    EXPORT UNSIGNED numBits := tableSize*8;
+
+    /*
+     * Return the actual number of hashes used, calculated from the parameters to the module
+     * @return Actual number of hashes
+     */
+    EXPORT UNSIGNED numHashes := IF (forceNumHashes = 0, (numBits/cardinality)*ln(2), forceNumHashes);
+
+    /*
+     * The resulting bloom table
+     * @return Bloom table
+     */
+    EXPORT bloomrec := RECORD
+      DATA bits { maxlength(tablesize) };
+    END;
+
+    EXPORT TRANSFORM(bloomrec) addBloom(UNSIGNED4 hash1, UNSIGNED4 hash2, UNSIGNED4 _numhashes = numHashes, UNSIGNED _tablesize=tableSize) := BEGINC++
+      byte * self = __self.ensureCapacity(_tablesize + sizeof(unsigned), NULL);
+      if (*(unsigned *) self == 0)
+      {
+         *(unsigned *) self = _tablesize;
+         memset(self+sizeof(unsigned), 0, _tablesize);
+      }
+      unsigned long long bit  = 0;
+      unsigned long long slot = 0;
+      unsigned int shift      = 0;
+      unsigned int mask       = 0;
+      unsigned int test       = 0;
+      const int slotsize = 8;
+      unsigned long long numbits = _tablesize * slotsize;
+      byte * outbits = self + sizeof(unsigned);
+      for (int i=0; i< _numhashes; i++)
+      {
+        // Kirsch and Mitzenmacher technique (Harvard U)
+        bit = (hash1 + (i * hash2)) % numbits;
+        slot = bit / slotsize;
+        shift = bit % slotsize;
+        mask = 1 << shift;
+        outbits[slot] |= mask;
+      }
+      return _tablesize+sizeof(unsigned);
+    ENDC++;
+
+    EXPORT transform(bloomrec) addBloom64(UNSIGNED8 hashVal) := addBloom(hashVal >> 32, hashVal & 0xffffffff);
+
+    TRANSFORM(bloomrec) _mergeBloom(bloomrec r, UNSIGNED _tablesize=tableSize) := BEGINC++
+      if (!r || !__self.row())
+        rtlFail(0, "Unexpected error in _mergeBloom");
+
+      byte * self = __self.ensureCapacity(_tablesize + sizeof(unsigned), NULL);
+      unsigned lenR = *(unsigned *) r;
+      unsigned lenS = *(unsigned *) self;
+      if (lenS != lenR || lenS != _tablesize)
+        rtlFail(0, "Unexpected error in _mergeBloom");
+
+      self += sizeof(unsigned);
+      r += sizeof(unsigned);
+      while (lenR--)
+        *self++ |= *r++;
+      return _tablesize+sizeof(unsigned);
+    ENDC++;
+
+    EXPORT TRANSFORM(bloomrec) mergeBloom(bloomrec r) := _mergeBloom(r);
+
+    EXPORT BOOLEAN testBloom(DATA bits, unsigned4 hash1, unsigned4 hash2, unsigned4 _numhashes = numHashes) := BEGINC++
+      #option pure
+      const char *bitarray = (const char *) bits;
+      unsigned long long bit  = 0;
+      unsigned long long slot = 0;
+      unsigned int shift      = 0;
+      unsigned int mask       = 0;
+      unsigned int test       = 0;
+
+      const int slotsize = 8;
+      unsigned long long numbits = lenBits * slotsize;
+
+      bool retval = true;
+      // Test each bit in the char array
+      for (int i=0; i< _numhashes; i++)
+      {
+        // Kirsch and Mitzenmacher technique (Harvard U)
+        bit =  (hash1 + (i * hash2)) % numbits;
+        slot  = bit / 8;
+        shift = bit % 8;
+        mask = 1 << shift;
+        test = bitarray[slot] & mask;
+        // If a bit isn't on,
+        // return false
+        if (test == 0)
+        {
+          retval = false;
+          break;
+        }
+      }
+      return retval;
+    ENDC++;
+
+    EXPORT boolean testBloom64(DATA bits, unsigned8 hashVal) := testBloom(bits, hashVal >> 32, hashVal & 0xffffffff);
+  END;
+
+  EXPORT buildBloomFilter(UNSIGNED DECIMAL6_3 fpProb,
+                          UNSIGNED INTEGER8 cardinality,
+                          VIRTUAL DATASET ds, <?> ANY keyfields) := MODULE
+
+    SHARED myBloomFilter := bloomFilter(fpProb, cardinality);
+    SHARED myBloomRec := myBloomFilter.bloomrec;
+    EXPORT UNSIGNED numBits := myBloomFilter.numBits;
+    EXPORT UNSIGNED numHashes := myBloomFilter.numHashes;
+    EXPORT UNSIGNED tableSize := myBloomFilter.tableSize;
+
+    TRANSFORM(myBloomRec) addTransform(ds L) := myBloomFilter.addBloom64(hash64(L.<keyfields>));
+    EXPORT buildDS := AGGREGATE(ds, myBloomRec, addTransform(LEFT), myBloomFilter.mergeBloom(ROWS(RIGHT)[NOBOUNDCHECK 2]));
+    EXPORT buildbits := buildDS[1].bits;
+
+    EXPORT lookupBits(DATA bloomFilterData, typeof(ds.<keyfields>) keyval) := FUNCTION
+      RETURN myBloomFilter.testBloom64(bloomFilterData, HASH64(keyval));
+    END;
+
+    EXPORT lookup(STRING filename, typeof(ds.<keyfields>) keyval) := FUNCTION
+      bloomFile := DATASET(filename, myBloomRec, FLAT);
+      bloomFilterData := bloomFile[1].bits : ONCE;
+      RETURN lookupBits(bloomFilterData, keyval);
+    END;
+  END;
+
+  EXPORT __selfTest := MODULE
+    SHARED testrec := RECORD
+      STRING20 name;
+    END;
+
+    testdata := DATASET([{'Richard'}], testrec);
+    theFilter := buildBloomFilter(0.3, 100, testdata, testdata.name);
+    filterBits := theFilter.buildBits;
+
+    EXPORT __selfTest := [
+      // OUTPUT('NumBits is ' + theFilter.numBits + '\n');
+      // OUTPUT('NumHashes is ' + theFilter.numHashes + '\n');
+      ASSERT(theFilter.numBits = 256);
+      ASSERT(theFilter.numHashes = 1);
+      ASSERT(theFilter.lookupBits(filterBits, 'Richard') = TRUE);
+      ASSERT(theFilter.lookupBits(filterBits, 'Lorraine') = FALSE)
+    ];
+  END;
+
+END;

+ 1 - 1
system/jlib/jfile.cpp

@@ -3066,7 +3066,7 @@ bool ensureFileExtension(StringBuffer& filename, const char* desiredExtension)
     return true;
 }
 
-/* Get full file name. If noExtension is true, the extesion (if any) will be trimmed */ 
+/* Get full file name. If noExtension is true, the extension (if any) will be trimmed */
 StringBuffer& getFullFileName(StringBuffer& filename, bool noExtension)
 {
     char drive[_MAX_DRIVE];