Browse Source

HPCC-26479 Improve the git authentication for remote eclccserver

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 3 years ago
parent
commit
721bf4ff16

+ 1 - 1
common/workunit/workunit.cpp

@@ -14312,7 +14312,7 @@ static void runKubectlCommand(const char *title, const char *cmd, const char *in
     StringBuffer _output, error;
     if (!output)
         output = &_output;
-    unsigned ret = runExternalCommand(title, *output, error, cmd, input, ".");
+    unsigned ret = runExternalCommand(title, *output, error, cmd, input, ".", nullptr);
     if (output->length())
         MLOG(MCExtraneousInfo, unknownJob, "%s: ret=%u, stdout=%s", cmd, ret, output->trimRight().str());
     if (error.length())

+ 108 - 0
devdoc/GitAuthenticate.md

@@ -0,0 +1,108 @@
+# HPCC git support
+
+Version 8.4 of the HPCC platform allows package files to define dependencies between git repositories and also allows you to compile directly from a git repository.
+
+E.g.
+
+```
+ecl run hthor --main demo.main@ghalliday/gch-ecldemo-d#version1 --server=...
+```
+
+There are no futher requirements if the repositories are public, but private repositories have the additional complication of supplying authentication information.  Git provides various methods for providing the credentials...
+
+## Credentials for local development
+
+The following are the recommended approaches configuring the credentials on a local development system interacting with github:
+
+1) ssh key.
+
+In this scenario, the ssh key associated with the local developer machine is registered with the github account.  For more details see https://docs.github.com/en/authentication/connecting-to-github-with-ssh/about-ssh
+
+This is used when the github reference is of the form ssh://github.com.  The sshkey can be protected with a passcode, and there are various options to avoid having to enter the passcode each time.
+
+It is preferrable to use the https:// protocol instead of ssh:// for links in package-lock.json files.  If ssh:// is used it requires any machine that processes the dependency to have access to a registered ssh key.
+
+2) github authentication
+
+Download the GitHub command line tool (https://github.com/cli/cli).  You can then use it to authenticate all git access with
+```
+gh auth login
+```
+
+Probably the simplest option if you are using github.  More details are found at https://cli.github.com/manual/gh_auth_login
+
+3) Use a personal access token
+
+These are similar to a password, but with additional restrictions on their lifetime and the resources that can be accessed.
+
+Details on how to to create them are found : https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
+
+These can then be used with the various git credential caching options.  E.g. see https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage
+
+
+## Configuring eclccserver
+
+All of the options above are likely to involve some user interaction - passphrases for ssh keys, web interaction with github authentication, and initial entry for cached access tokens.  This is problematic for eclccserver - which cannot support user interaction, and it is preferrable not to pass credentials around.
+
+The solution is to use a personal access token securely stored as a secret.  (This would generally be associated with a special service account.)  This avoids the need to pass credentials and allows the keys to be rotated.
+
+The following describes the support in the different versions:
+
+## Kubernetes
+
+In Kubernetes you need to take the following steps:
+
+a) add the gitUsername property to the eclccserver component in the value.yaml file:
+
+```
+eclccserver:
+- name: myeclccserver
+  gitUsername: ghalliday
+```
+
+b) add a secret to the values.yaml file, with a key that matches the username:
+
+```
+secrets:
+  git:
+    ghalliday: my-git-secret
+```
+
+note: this cannot currently use a vault - probably need to rethink that.  (Possibly extract from secret and supply as an optional environment variable to be picked up by the bash script.)
+
+c) add a secret to Kubernetes containing the personal access token:
+
+```
+apiVersion: v1
+kind: Secret
+metadata:
+  name: my-git-secret
+type: Opaque
+stringData:
+  password: ghp_eZLHeuoHxxxxxxxxxxxxxxxxxxxxol3986sS=
+```
+
+```
+kubectl apply -f ~/dev/hpcc/helm/secrets/my-git-secret
+```
+
+When a query is submitted to eclccserver, any git repositories are accessed using the user name and password.
+
+## Bare-metal
+
+Bare-metal require some similar configuration steps:
+
+a) Define the environment variable HPCC_GIT_USERNAME
+
+```
+export HPCC_GIT_USERNAME=ghalliday
+```
+
+b) Store the access token in /opt/HPCCSystems/secrets/git/$HPCC_GIT_USERNAME/password
+
+E.g.
+
+```
+$cat /opt/HPCCSystems/secrets/git/ghalliday/password
+ghp_eZLHeuoHxxxxxxxxxxxxxxxxxxxxol3986sS=
+```

+ 1 - 1
ecl/eclcc/eclcc.cpp

@@ -253,7 +253,7 @@ public:
 #endif
         const char * defaultGitPrefix = getenv("ECLCC_DEFAULT_GITPREFIX");
         if (isEmptyString(defaultGitPrefix))
-            defaultGitPrefix = "git+ssh://github.com/";
+            defaultGitPrefix = "https://github.com/";
         optDefaultGitPrefix.set(defaultGitPrefix);
     }
     ~EclCC()

+ 1 - 0
ecl/eclccserver/eclccserver.cpp

@@ -601,6 +601,7 @@ class EclccCompileThread : implements IPooledThread, implements IErrorReporter,
         eclccCmd.appendf(" -o%s", wuid);
         eclccCmd.appendf(" -platform=%s", target);
         eclccCmd.appendf(" --component=%s", queryStatisticsComponentName());
+        eclccCmd.appendf(" --fetchrepos=1 --updaterepos=1");  // Default these options on in eclccserver (can be overridden in debug options)
 
         Owned<IStringIterator> debugValues = &workunit->getDebugValues();
         ForEach (*debugValues)

+ 1 - 0
ecl/eclccserver/vchooks/CMakeLists.txt

@@ -16,6 +16,7 @@
 
 FOREACH( iFILES
     ${CMAKE_CURRENT_SOURCE_DIR}/git.sh
+    ${CMAKE_CURRENT_SOURCE_DIR}/hpccaskpass.sh
 )
     install ( PROGRAMS ${iFILES} DESTINATION ${EXEC_DIR} COMPONENT Runtime )
 ENDFOREACH ( iFILES )

+ 36 - 0
ecl/eclccserver/vchooks/hpccaskpass.sh

@@ -0,0 +1,36 @@
+#!/bin/bash
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+################################################################################
+# Script to allow username and password to be securly passed to git when
+# compiling source from multiple directories
+#
+# This hook expects environment variables to be set as follows:
+# HPCC_GIT_USERNAME   - The name of the service account used to read the repos
+# HPCC_GIT_SECRETPATH - The path to the root of the secret directory
+#
+# The filename providing the password is git/<username>/password within the
+# HPCC_GIT_SECRETPATH directory
+################################################################################
+
+#!/bin/bash
+secretPath="${HPCC_GIT_SECRETPATH:-/opt/HPCCSystems/secrets}"
+if [[ $1 =~ ^[Uu]sername ]]; then
+  echo $HPCC_GIT_USERNAME
+elif [[ $1 =~ ^[Pp]assword ]]; then
+  cat $secretPath/git/$HPCC_GIT_USERNAME/password
+fi

+ 16 - 6
ecl/hql/hqlrepository.cpp

@@ -629,7 +629,7 @@ IEclPackage * EclRepositoryManager::queryDependentRepository(IIdAtom * name, con
             {
                 if (options.updateRepos)
                 {
-                    unsigned retCode = runGitCommand(nullptr, "fetch origin", repoPath);
+                    unsigned retCode = runGitCommand(nullptr, "fetch origin", repoPath, true);
                     if (retCode != 0)
                         DBGLOG("Failed to download the latest version of %s", defaultUrl);
                 }
@@ -645,7 +645,7 @@ IEclPackage * EclRepositoryManager::queryDependentRepository(IIdAtom * name, con
                         throw makeStringExceptionV(99, "Failed to create directory %s'", options.eclRepoPath.str());
 
                     VStringBuffer params("clone %s \"%s\" --no-checkout", repoUrn.str(), repo.str());
-                    unsigned retCode = runGitCommand(nullptr, params, options.eclRepoPath);
+                    unsigned retCode = runGitCommand(nullptr, params, options.eclRepoPath, true);
                     if (retCode != 0)
                         throw makeStringExceptionV(99, "Failed to clone dependency '%s'", defaultUrl);
                     ok = true;
@@ -662,7 +662,7 @@ IEclPackage * EclRepositoryManager::queryDependentRepository(IIdAtom * name, con
             // Check for a sha/tag and map it to a version.  If that does not work see if it is a branch.
             VStringBuffer params("rev-parse --short %s", version.str());
             StringBuffer sha;
-            unsigned retCode = runGitCommand(&sha, params, repoPath);
+            unsigned retCode = runGitCommand(&sha, params, repoPath, false);
             if (retCode == 0)
             {
                 if (requireSHA)
@@ -682,7 +682,7 @@ IEclPackage * EclRepositoryManager::queryDependentRepository(IIdAtom * name, con
 
                 //Check for a branch origin/<version>
                 params.clear().appendf("rev-parse --short origin/%s", version.str());
-                unsigned retCode = runGitCommand(&sha.clear(), params, repoPath);
+                unsigned retCode = runGitCommand(&sha.clear(), params, repoPath, false);
                 if (retCode == 0)
                     version.set(sha);
             }
@@ -740,16 +740,26 @@ void EclRepositoryManager::kill()
     allSources.kill();
 }
 
-unsigned EclRepositoryManager::runGitCommand(StringBuffer * output, const char *args, const char * cwd)
+unsigned EclRepositoryManager::runGitCommand(StringBuffer * output, const char *args, const char * cwd, bool needCredentials)
 {
     StringBuffer tempOutput;
     if (!output)
         output= &tempOutput;
 
+    EnvironmentVector env;
+    //If fetching from git and the username is specified then use the script file to provide the username/password
+    if (needCredentials && getenv("HPCC_GIT_USERNAME"))
+    {
+        StringBuffer scriptPath;
+        getPackageFolder(scriptPath);
+        addPathSepChar(scriptPath).append("bin/hpccaskpass.sh");
+        env.emplace_back("GIT_ASKPASS", scriptPath);
+    }
+
     const char * cmd = "git";
     VStringBuffer runcmd("%s %s", cmd, args);
     StringBuffer error;
-    unsigned ret = runExternalCommand(cmd, *output, error, runcmd, nullptr, cwd);
+    unsigned ret = runExternalCommand(cmd, *output, error, runcmd, nullptr, cwd, &env);
     if (options.optVerbose)
     {
         if (ret > 0)

+ 1 - 1
ecl/hql/hqlrepository.hpp

@@ -69,7 +69,7 @@ protected:
     IEclRepository * createSingleDefinitionEclRepository(const char * moduleName, const char * attrName, IFileContents * contents, bool includeInArchive);
     IEclRepository * createRepository(IEclSourceCollection * source, const char * rootScopeFullName, bool includeInArchive);
 
-    unsigned runGitCommand(StringBuffer * output, const char *args, const char * cwd);
+    unsigned runGitCommand(StringBuffer * output, const char *args, const char * cwd, bool needCredentials);
 
 private:
     using DependencyInfo = std::pair<std::string, Shared<IEclPackage>>;

+ 8 - 2
helm/hpcc/templates/eclccserver.yaml

@@ -82,9 +82,11 @@ data:
 {{- $postJobCommand := $misc.postJobCommand | default "" }}
 {{- $eclccserverCmd := printf "eclccserver %s %s _HPCC_ARGS_" (include "hpcc.configArg" .me) (include "hpcc.daliArg" .root) }}
 {{ include "hpcc.addCommandAndLifecycle" (. | merge (dict "command" $eclccserverCmd)) | indent 12 }}
-{{- if .env }}
             env:
 {{ include "hpcc.mergeEnvironments" .env | indent 12 }}
+{{- if .me.gitUsername }}
+            - name: HPCC_GIT_USERNAME
+              value: {{ .me.gitUsername | quote }}
 {{- end }}
             workingDir: /tmp
             volumeMounts:
@@ -112,7 +114,7 @@ data:
 {{ range $.Values.eclccserver -}}
 {{- if not .disabled -}}
 {{- $env := concat ($.Values.global.env | default list) (.env | default list) -}}
-{{- $secretsCategories := list "system" "codeVerify" }}
+{{- $secretsCategories := list "system" "codeVerify" "git" }}
 {{- $commonCtx := dict "root" $ "me" . "includeCategories" (list "dll") "secretsCategories" $secretsCategories "env" $env }}
 {{- $configSHA := include "hpcc.getConfigSHA" ($commonCtx | merge (dict "configMapHelper" "hpcc.eclccServerConfigMap" "component" "eclccserver" "excludeKeys" "global,eclccserver.queues")) }}
 apiVersion: apps/v1
@@ -158,6 +160,10 @@ spec:
 {{ include "hpcc.mergeEnvironments" $env | indent 8 -}}
         - name: "SENTINEL"
           value: "/tmp/{{ .name }}.sentinel"
+ {{- if .gitUsername }}
+        - name: HPCC_GIT_USERNAME
+          value: {{ .gitUsername | quote }}
+ {{- end }}
 {{ include "hpcc.addSentinelProbes" . | indent 8 }}
 {{- include "hpcc.addSecurityContext" $commonCtx | indent 8 }}
 {{- if .useChildProcesses }}

+ 7 - 0
helm/hpcc/values.schema.json

@@ -77,6 +77,9 @@
         "codeVerify": {
           "$ref": "#/definitions/secrets"
         },
+        "git": {
+          "$ref": "#/definitions/secrets"
+        },
         "system": {
           "$ref": "#/definitions/secrets"
         }
@@ -942,6 +945,10 @@
           "type": "array",
           "items": { "$ref": "#/definitions/compileOption" }
         },
+        "gitUsername": {
+          "type": "string",
+          "description": "The username to use for all remote repository access"
+        },
         "resources": {
           "$ref": "#/definitions/resources"
         }

+ 6 - 0
helm/hpcc/values.yaml

@@ -275,6 +275,9 @@ secrets:
   system: {}
     ## Category for secrets published to all components for system level useage
 
+  git: {}
+    ## Category to provide passwords for eclccserver to access private git repos
+
 ## The vaults section mirrors the secret section but leverages vault for the storage of secrets.
 ## There is an additional category for vaults named "ecl-user".  In the future "ecl-user" vault
 ## secrets will be readable directly from ECL code.  Other secret categories are read internally
@@ -443,6 +446,9 @@ eclccserver:
   #  value: false
   #  cluster: name   # optional cluster this is applied to
 
+  # used to configure the authentication for git when using the option to compile from a repo.  Also requires an associated secret.
+  #gitUsername: <git-service-account>
+
   ## The following resources apply to child compile pods when useChildProcesses=false, otherwise they apply to eclccserver pod.
   #resources:
   #  cpu: "1"

+ 1 - 1
roxie/ccd/ccdfile.cpp

@@ -1959,7 +1959,7 @@ public:
 #ifndef _WIN32
                 StringBuffer output;
                 VStringBuffer command("ccdcache %s -t %u", cacheFileName.str(), cacheWarmTraceLevel);
-                unsigned retcode = runExternalCommand(nullptr, output, output, command, nullptr, ".");
+                unsigned retcode = runExternalCommand(nullptr, output, output, command, nullptr, ".", nullptr);
                 if (output.length())
                 {
                     StringArray outputLines;

+ 8 - 1
system/jlib/jthread.cpp

@@ -2212,7 +2212,14 @@ public:
         if (_title)
         {
             title.set(_title);
-            PROGLOG("%s: Creating PIPE program process : '%s' - hasinput=%d, hasoutput=%d stderrbufsize=%d", title.get(), prog.get(),(int)hasinput, (int)hasoutput, stderrbufsize);
+            StringBuffer envText;
+            ForEachItemIn(idx, env)
+            {
+                const auto & cur = env.item(idx);
+                envText.append(" ").append(cur);
+            }
+
+            PROGLOG("%s: Creating PIPE program process : '%s' - hasinput=%d, hasoutput=%d stderrbufsize=%d [%s]", title.get(), prog.get(),(int)hasinput, (int)hasoutput, stderrbufsize, envText.str());
         }
         CheckAllowedProgram(prog,allowedprogs);
         retcode = 0;

+ 7 - 2
system/jlib/jutil.cpp

@@ -1912,10 +1912,10 @@ static const char *findExtension(const char *fn)
 
 unsigned runExternalCommand(StringBuffer &output, StringBuffer &error, const char *cmd, const char *input)
 {
-    return runExternalCommand(cmd, output, error, cmd, input, ".");
+    return runExternalCommand(cmd, output, error, cmd, input, ".", nullptr);
 }
 
-unsigned runExternalCommand(const char *title, StringBuffer &output, StringBuffer &error, const char *cmd, const char *input, const char * cwd)
+unsigned runExternalCommand(const char *title, StringBuffer &output, StringBuffer &error, const char *cmd, const char *input, const char * cwd, const EnvironmentVector * optEnvironment)
 {
     try
     {
@@ -1923,6 +1923,11 @@ unsigned runExternalCommand(const char *title, StringBuffer &output, StringBuffe
             cwd = ".";
 
         Owned<IPipeProcess> pipe = createPipeProcess();
+        if (optEnvironment)
+        {
+            for (const auto & cur : *optEnvironment)
+                pipe->setenv(cur.first.c_str(), cur.second.c_str());
+        }
         int ret = START_FAILURE;
         if (pipe->run(title, cmd, cwd, input != NULL, true, true, 1024*1024))
         {

+ 3 - 1
system/jlib/jutil.hpp

@@ -27,6 +27,7 @@
 #include <algorithm> 
 #include <iterator>
 #include <functional>
+#include <vector>
 
 #if defined (__APPLE__)
 #include <mach/mach_time.h>
@@ -308,8 +309,9 @@ inline constexpr bool isContainerized() { return false; }
 #define arraysize(T) (sizeof(T)/sizeof(*T))
 #endif
 
+using EnvironmentVector = std::vector<std::pair<std::string, std::string>>;
 extern jlib_decl unsigned runExternalCommand(StringBuffer &output, StringBuffer &error, const char *cmd, const char *input);
-extern jlib_decl unsigned runExternalCommand(const char *title, StringBuffer &output, StringBuffer &error, const char *cmd, const char *input, const char * cwd);
+extern jlib_decl unsigned runExternalCommand(const char *title, StringBuffer &output, StringBuffer &error, const char *cmd, const char *input, const char * cwd, const EnvironmentVector * optEnvironment);
 
 extern jlib_decl unsigned __int64 greatestCommonDivisor(unsigned __int64 left, unsigned __int64 right);