123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "platform.h"
- #include "jlib.hpp"
- #include "jio.hpp"
- #include "jmutex.hpp"
- #include "jfile.hpp"
- #include "jregexp.hpp"
- #include "gitfile.hpp"
- #include "jlog.hpp"
- #include "git2.h"
- /*
- * Direct access to files in git repositories, by revision, without needing to check them out first
- * Installs hooks into createIFile, spotting filenames of the form /my/directory/.git/{revision}/path/within/git
- * Bare repositories of the form /my/directory.git/{revision}/path/within/git also supported
- */
- IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask=NULL, bool sub=false,bool includedirs=false);
- static void splitGitFileName(const char *fullName, StringAttr &gitDir, StringAttr &revision, StringAttr &relPath)
- {
- assertex(fullName);
- const char *git = strstr(fullName, ".git" PATHSEPSTR "{" );
- assertex(git);
- const char *tail = git+5;
- gitDir.set(fullName, tail-fullName);
- assertex (*tail=='{');
- tail++;
- const char *end = strchr(tail, '}');
- if (!end)
- throw MakeStringException(0, "Invalid git repository filename - no matching } found");
- revision.set(tail, end - tail);
- tail = end+1;
- if (*tail==PATHSEPCHAR || *tail == '/')
- tail++;
- else if (*tail != 0)
- throw MakeStringException(0, "Invalid git repository filename - " PATHSEPSTR " expected after }");
- if (tail && *tail)
- {
- StringBuffer s(tail);
- s.replace(PATHSEPCHAR, '/');
- relPath.set(s);
- }
- else
- relPath.clear();
- // Check it's a valid git repository
- StringBuffer configName(gitDir);
- configName.append("config");
- if (!checkFileExists(configName.str()))
- throw MakeStringException(0, "Invalid git repository - config file %s not found", configName.str());
- }
- static StringBuffer & buildGitFileName(StringBuffer &fullname, const char *gitDir, const char *revision, const char *relPath)
- {
- fullname.append(gitDir);
- fullname.append('{').append(revision).append('}').append('/');
- if (relPath && *relPath)
- fullname.append(relPath);
- return fullname;
- }
- //--------------------------------------------------------------------------------------------------------------------
- // New implementation using libgit2
- //--------------------------------------------------------------------------------------------------------------------
- static git_oid nullOid;
- #define GIT_CHECK(x) check(x, #x)
- class GitCommitTree : public CInterface
- {
- public:
- GitCommitTree(const char * directory, const char * version)
- {
- GIT_CHECK(git_repository_open(&gitRepo, directory));
- if (gitRepo)
- {
- //Check to see if the version is a tag/branch etc. - these take precedence if they happen to match a sha prefix
- git_reference * ref = nullptr;
- if (git_reference_dwim(&ref, gitRepo, version) == 0)
- {
- //Map the symbolic reference to the underlying object
- git_reference * resolvedRef = nullptr;
- if (git_reference_resolve(&resolvedRef, ref) == 0)
- {
- const git_oid * oid = git_reference_target(resolvedRef);
- GIT_CHECK(git_commit_lookup(&gitCommit, gitRepo, oid));
- git_reference_free(resolvedRef);
- }
- git_reference_free(ref);
- }
- if (!gitCommit)
- {
- git_oid gitOid;
- if (git_oid_fromstrp(&gitOid, version) == 0)
- {
- //User provided a SHA (possibly shorted) -> resolve it. Error will be reported later if it does not exist.
- GIT_CHECK(git_commit_lookup_prefix(&gitCommit, gitRepo, &gitOid, strlen(version)));
- }
- }
- if (gitCommit)
- GIT_CHECK(git_commit_tree(&gitRoot, gitCommit));
- }
- }
- ~GitCommitTree()
- {
- git_tree_free(gitRoot);
- git_commit_free(gitCommit);
- git_repository_free(gitRepo);
- }
- const git_tree * queryTree() const { return gitRoot; }
- protected:
- void check(int code, const char * func)
- {
- if (code != 0)
- {
- const git_error * err = git_error_last();
- const char * errmsg = err ? err->message : "<unknown>";
- WARNLOG("libgit %s returned %u: %s", func, code, errmsg);
- }
- }
- protected:
- git_repository * gitRepo = nullptr;
- git_commit * gitCommit = nullptr;
- git_tree * gitRoot = nullptr;
- };
- class GitRepositoryFileIO : implements CSimpleInterfaceOf<IFileIO>
- {
- public:
- GitRepositoryFileIO(GitCommitTree * commitTree, const git_oid * oid)
- {
- git_blob *blob = nullptr;
- int error = git_blob_lookup(&blob, git_tree_owner(commitTree->queryTree()), oid);
- if (error)
- throw MakeStringException(0, "git git_blob_lookup returned exit status %d", error);
- git_object_size_t blobsize = git_blob_rawsize(blob);
- const void * data = git_blob_rawcontent(blob);
- buf.append(blobsize, data);
- git_blob_free(blob);
- }
- virtual size32_t read(offset_t pos, size32_t len, void * data)
- {
- if (pos >= buf.length())
- return 0;
- if (pos+len > buf.length())
- len = buf.length()-pos;
- memcpy_iflen(data, buf.toByteArray()+pos, len);
- return len;
- }
- virtual offset_t size()
- {
- return buf.length();
- }
- virtual void close()
- {
- }
- // Write methods not implemented - this is a read-only file
- virtual size32_t write(offset_t pos, size32_t len, const void * data)
- {
- throwUnexpected();
- }
- virtual offset_t appendFile(IFile *file,offset_t pos=0,offset_t len=(offset_t)-1)
- {
- throwUnexpected();
- }
- virtual void setSize(offset_t size)
- {
- throwUnexpected();
- }
- virtual void flush()
- {
- throwUnexpected();
- }
- unsigned __int64 getStatistic(StatisticKind kind)
- {
- //This could be implemented, but not likely to be useful so currently return nothing.
- return 0;
- }
- protected:
- MemoryBuffer buf;
- };
- class GitRepositoryFile : implements IFile, public CInterface
- {
- public:
- IMPLEMENT_IINTERFACE;
- GitRepositoryFile(const char *_gitFileName, offset_t _fileSize, bool _isDir, bool _isExisting, GitCommitTree * _commitTree, const git_oid & _oid)
- : commitTree(_commitTree), oid(_oid), fullName(_gitFileName),fileSize(_fileSize), isDir(_isDir), isExisting(_isExisting)
- {
- splitGitFileName(fullName, gitDirectory, revision, relFileName);
- }
- virtual bool exists()
- {
- return isExisting;
- }
- virtual bool getTime(CDateTime * createTime, CDateTime * modifiedTime, CDateTime * accessedTime)
- {
- if (createTime)
- createTime->clear();
- if (modifiedTime)
- modifiedTime->clear();
- if (accessedTime)
- accessedTime->clear();
- return false;
- }
- virtual fileBool isDirectory()
- {
- if (!isExisting)
- return fileBool::notFound;
- return isDir ? fileBool::foundYes : fileBool::foundNo;
- }
- virtual fileBool isFile()
- {
- if (!isExisting)
- return fileBool::notFound;
- return !isDir ? fileBool::foundYes : fileBool::foundNo;
- }
- virtual fileBool isReadOnly()
- {
- if (!isExisting)
- return fileBool::notFound;
- return fileBool::foundYes;
- }
- virtual IFileAsyncIO * openAsync(IFOmode mode)
- {
- UNIMPLEMENTED;
- }
- virtual const char * queryFilename()
- {
- return fullName.str();
- }
- virtual offset_t size()
- {
- if (!isExisting)
- return (offset_t) -1;
- return fileSize;
- }
- // Directory functions
- virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs)
- {
- if (!isDir || (mask && !*mask)) // Empty mask string means matches nothing - NULL means matches everything
- return createNullDirectoryIterator();
- else
- {
- StringBuffer dirName(fullName);
- dirName.append(PATHSEPCHAR);
- return createGitRepositoryDirectoryIterator(dirName, mask, sub, includeDirs);
- }
- }
- virtual bool getInfo(bool &isdir,offset_t &size,CDateTime &modtime)
- {
- isdir = isDir;
- size = fileSize;
- modtime.clear();
- return true;
- }
- virtual IFileIO * open(IFOmode mode, IFEflags extraFlags) override
- {
- assertex(mode==IFOread && isExisting && !isDir);
- return new GitRepositoryFileIO(commitTree, &oid);
- }
- virtual IFileIO * openShared(IFOmode mode, IFSHmode shmode, IFEflags extraFlags) override
- {
- assertex(mode==IFOread && isExisting && !isDir);
- return new GitRepositoryFileIO(commitTree, &oid);
- }
- // Not going to be implemented - this IFile interface is too big..
- virtual bool setTime(const CDateTime * createTime, const CDateTime * modifiedTime, const CDateTime * accessedTime) { UNIMPLEMENTED; }
- virtual bool remove() { UNIMPLEMENTED; }
- virtual void rename(const char *newTail) { UNIMPLEMENTED; }
- virtual void move(const char *newName) { UNIMPLEMENTED; }
- virtual void setReadOnly(bool ro) { UNIMPLEMENTED; }
- virtual void setFilePermissions(unsigned fPerms) { UNIMPLEMENTED; }
- virtual bool setCompression(bool set) { UNIMPLEMENTED; }
- virtual offset_t compressedSize() { UNIMPLEMENTED; }
- virtual unsigned getCRC() { UNIMPLEMENTED; }
- virtual void setCreateFlags(unsigned short cflags) { UNIMPLEMENTED; }
- virtual void setShareMode(IFSHmode shmode) { UNIMPLEMENTED; }
- virtual bool createDirectory() { UNIMPLEMENTED; }
- virtual IDirectoryDifferenceIterator *monitorDirectory(
- IDirectoryIterator *prev=NULL, // in (NULL means use current as baseline)
- const char *mask=NULL,
- bool sub=false,
- bool includedirs=false,
- unsigned checkinterval=60*1000,
- unsigned timeout=(unsigned)-1,
- Semaphore *abortsem=NULL) { UNIMPLEMENTED; }
- virtual void copySection(const RemoteFilename &dest, offset_t toOfs=(offset_t)-1, offset_t fromOfs=0, offset_t size=(offset_t)-1, ICopyFileProgress *progress=NULL, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
- virtual void copyTo(IFile *dest, size32_t buffersize=DEFAULT_COPY_BLKSIZE, ICopyFileProgress *progress=NULL, bool usetmp=false, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
- virtual IMemoryMappedFile *openMemoryMapped(offset_t ofs=0, memsize_t len=(memsize_t)-1, bool write=false) { UNIMPLEMENTED; }
- protected:
- Linked<GitCommitTree> commitTree;
- const git_oid oid;
- StringAttr gitDirectory;
- StringAttr revision;
- StringAttr relFileName;
- StringBuffer fullName;
- offset_t fileSize;
- bool isDir;
- bool isExisting;
- };
- class GitRepositoryDirectoryIterator : implements IDirectoryIterator, public CInterface
- {
- static int treeCallback(const char *root, const git_tree_entry *entry, void *payload)
- {
- GitRepositoryDirectoryIterator * self = reinterpret_cast<GitRepositoryDirectoryIterator *>(payload);
- return self->noteEntry(root, entry);
- }
- public:
- IMPLEMENT_IINTERFACE;
- GitRepositoryDirectoryIterator(const char *_gitFileName, const char *_mask, bool _sub, bool _includeDirs)
- : mask(_mask), sub(_sub), includeDirs(_includeDirs)
- {
- splitGitFileName(_gitFileName, gitDirectory, revision, relDir);
- curIndex = 0;
- const char * version = revision.length() ? revision.get() : "HEAD";
- commitTree.setown(new GitCommitTree(gitDirectory, version));
- if (!commitTree->queryTree())
- throw makeStringExceptionV(9900 , "Cannot resolve git revision %s", _gitFileName);
- }
- virtual StringBuffer &getName(StringBuffer &buf)
- {
- assertex(curFile);
- return buf.append(curFile->queryFilename());
- }
- virtual bool isDir()
- {
- assertex(curFile);
- return curFile->isDirectory()==fileBool::foundYes;
- }
- virtual __int64 getFileSize()
- {
- assertex(curFile);
- return curFile->size();
- }
- virtual bool getModifiedTime(CDateTime &ret)
- {
- UNIMPLEMENTED;
- }
- virtual bool first()
- {
- files.kill();
- curFile.clear();
- curIndex = 0;
- matchedPath = 0;
- git_tree_walk(commitTree->queryTree(), GIT_TREEWALK_PRE, treeCallback, this);
- open();
- return isValid();
- }
- virtual bool next()
- {
- curIndex++;
- open();
- return isValid();
- }
- virtual bool isValid() { return curFile != NULL; }
- virtual IFile & query() { return *curFile; }
- protected:
- int noteEntry(const char *root, const git_tree_entry *entry)
- {
- const char * filename = git_tree_entry_name(entry);
- // BLOB is a file revision object, TREE is a nested directory, COMMIT seems to be used for an empty directory.
- git_object_t kind = git_tree_entry_type(entry);
- bool isDirectory = kind != GIT_OBJECT_BLOB;
- if (matchedPath >= relDir.length())
- {
- if (relDir)
- {
- //Check for the root directory changing - if it does we have finished all the matches => abort recursion
- size_t lenRoot = strlen(root);
- if (lenRoot != relDir.length())
- return -1;
- if (!strieq(root, relDir.str()))
- return -1;
- }
- //Currently avoid de-referencing the file sizes - may need to revisit if it is required
- sizes.append(isDirectory ? (offset_t) -1 : 0);
- files.append(filename);
- oids.emplace_back(*git_tree_entry_id(entry));
- return 1; // do not recurse - only expand a single level of the directory tree
- }
- unsigned lenFilename = strlen(filename);
- unsigned remaining = relDir.length() - matchedPath;
- if (lenFilename <= remaining)
- {
- const char * next = relDir.str() + matchedPath;
- if (strnicmp(next, filename, lenFilename) == 0)
- {
- if (lenFilename == remaining)
- {
- sizes.append(isDirectory ? (offset_t) -1 : 0);
- files.append(filename);
- oids.emplace_back(*git_tree_entry_id(entry));
- return -1; // found the single match
- }
- unsigned nextChar = next[lenFilename];
- if (isPathSepChar(nextChar))
- {
- matchedPath += (lenFilename + 1);
- return 0; // recurse
- }
- // filename only matches a substring of the next directory that needs to match
- }
- }
- return 1; // skip
- }
- protected:
- StringAttr gitDirectory;
- StringAttr revision;
- StringAttr relDir;
- StringAttr mask;
- Owned<IFile> curFile;
- unsigned curIndex = 0;
- StringArray files;
- UInt64Array sizes;
- std::vector<git_oid> oids;
- Owned<GitCommitTree> commitTree;
- bool includeDirs = true;
- bool sub = false;
- unsigned matchedPath = 0;
- void open()
- {
- if (files.isItem(curIndex))
- {
- const char *filename = files.item(curIndex);
- offset_t size = sizes.item(curIndex);
- const git_oid & oid = oids[curIndex];
- StringBuffer gitFileName;
- buildGitFileName(gitFileName, gitDirectory, revision, relDir);
- // Git ls-tree behaves differently according to whether you put the trailing / on the path you supply.
- // With /, it gets all files in that directory
- // Without, it will return just a single match (for the file or dir with that name)
- // So we are effectively in two different modes according to which we used.
- char lastChar = gitFileName.charAt(gitFileName.length()-1);
- // NOTE: / or PATHSEPCHAR - we translated to git representation, but root directory is .git{x}<pathsep>
- if ((lastChar == '/') || (lastChar == PATHSEPCHAR))
- gitFileName.append(filename);
- if (size==(offset_t) -1)
- curFile.setown(new GitRepositoryFile(gitFileName, 0, true, true, commitTree, oid));
- else
- curFile.setown(new GitRepositoryFile(gitFileName, size, false, true, commitTree, oid));
- }
- else
- curFile.clear();
- }
- };
- //--------------------------------------------------------------------------------------------------------------------
- IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask, bool sub, bool includeDirs)
- {
- assertex(sub==false); // I don't know what it means!
- return new GitRepositoryDirectoryIterator(gitFileName, mask, sub, includeDirs);
- }
- static IFile *createGitFile(const char *gitFileName)
- {
- StringBuffer fname(gitFileName);
- assertex(fname.length());
- removeTrailingPathSepChar(fname);
- StringAttr gitDirectory, revision, relDir;
- splitGitFileName(fname, gitDirectory, revision, relDir);
- if (relDir.isEmpty())
- {
- // Special case the root - ugly but apparently necessary
- return new GitRepositoryFile(fname, 0, true, true, nullptr, nullOid);
- }
- Owned<IDirectoryIterator> dir = createGitRepositoryDirectoryIterator(fname, NULL, false, true);
- if (dir->first())
- {
- Linked<IFile> file = &dir->query();
- assertex(!dir->next());
- return file.getClear();
- }
- else
- return new GitRepositoryFile(gitFileName, (offset_t) -1, false, false, nullptr, nullOid);
- }
- class CGitRepositoryFileHook : public CInterface, implements IContainedFileHook
- {
- public:
- IMPLEMENT_IINTERFACE;
- virtual IFile * createIFile(const char *fileName)
- {
- if (isGitFileName(fileName))
- return createGitFile(fileName);
- else
- return NULL;
- }
- protected:
- static bool isGitFileName(const char *fileName)
- {
- if (fileName && strstr(fileName, ".git" PATHSEPSTR "{"))
- return true;
- return false;
- }
- } *gitRepositoryFileHook;
- static CriticalSection *cs;
- extern GITFILE_API void installFileHook()
- {
- CriticalBlock b(*cs); // Probably overkill!
- if (!gitRepositoryFileHook)
- {
- gitRepositoryFileHook = new CGitRepositoryFileHook;
- addContainedFileHook(gitRepositoryFileHook);
- }
- }
- extern GITFILE_API void removeFileHook()
- {
- if (cs)
- {
- CriticalBlock b(*cs); // Probably overkill!
- if (gitRepositoryFileHook)
- {
- removeContainedFileHook(gitRepositoryFileHook);
- delete gitRepositoryFileHook;
- gitRepositoryFileHook = NULL;
- }
- }
- }
- MODULE_INIT(INIT_PRIORITY_STANDARD)
- {
- git_libgit2_init();
- cs = new CriticalSection;
- gitRepositoryFileHook = NULL; // Not really needed, but you have to have a modinit to match a modexit
- return true;
- }
- MODULE_EXIT()
- {
- if (gitRepositoryFileHook)
- {
- removeContainedFileHook(gitRepositoryFileHook);
- gitRepositoryFileHook = NULL;
- }
- ::Release(gitRepositoryFileHook);
- delete cs;
- cs = NULL;
- git_libgit2_shutdown();
- }
|