gitfile.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "jlib.hpp"
  15. #include "jio.hpp"
  16. #include "jmutex.hpp"
  17. #include "jfile.hpp"
  18. #include "jregexp.hpp"
  19. #include "gitfile.hpp"
  20. #include "jlog.hpp"
  21. #include "git2.h"
  22. /*
  23. * Direct access to files in git repositories, by revision, without needing to check them out first
  24. * Installs hooks into createIFile, spotting filenames of the form /my/directory/.git/{revision}/path/within/git
  25. * Bare repositories of the form /my/directory.git/{revision}/path/within/git also supported
  26. */
  27. IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask=NULL, bool sub=false,bool includedirs=false);
  28. static void splitGitFileName(const char *fullName, StringAttr &gitDir, StringAttr &revision, StringAttr &relPath)
  29. {
  30. assertex(fullName);
  31. const char *git = strstr(fullName, ".git" PATHSEPSTR "{" );
  32. assertex(git);
  33. const char *tail = git+5;
  34. gitDir.set(fullName, tail-fullName);
  35. assertex (*tail=='{');
  36. tail++;
  37. const char *end = strchr(tail, '}');
  38. if (!end)
  39. throw MakeStringException(0, "Invalid git repository filename - no matching } found");
  40. revision.set(tail, end - tail);
  41. tail = end+1;
  42. if (*tail==PATHSEPCHAR || *tail == '/')
  43. tail++;
  44. else if (*tail != 0)
  45. throw MakeStringException(0, "Invalid git repository filename - " PATHSEPSTR " expected after }");
  46. if (tail && *tail)
  47. {
  48. StringBuffer s(tail);
  49. s.replace(PATHSEPCHAR, '/');
  50. relPath.set(s);
  51. }
  52. else
  53. relPath.clear();
  54. // Check it's a valid git repository
  55. StringBuffer configName(gitDir);
  56. configName.append("config");
  57. if (!checkFileExists(configName.str()))
  58. throw MakeStringException(0, "Invalid git repository - config file %s not found", configName.str());
  59. }
  60. static StringBuffer & buildGitFileName(StringBuffer &fullname, const char *gitDir, const char *revision, const char *relPath)
  61. {
  62. fullname.append(gitDir);
  63. fullname.append('{').append(revision).append('}').append('/');
  64. if (relPath && *relPath)
  65. fullname.append(relPath);
  66. return fullname;
  67. }
  68. //--------------------------------------------------------------------------------------------------------------------
  69. // New implementation using libgit2
  70. //--------------------------------------------------------------------------------------------------------------------
  71. static git_oid nullOid;
  72. #define GIT_CHECK(x) check(x, #x)
  73. class GitCommitTree : public CInterface
  74. {
  75. public:
  76. GitCommitTree(const char * directory, const char * version)
  77. {
  78. GIT_CHECK(git_repository_open(&gitRepo, directory));
  79. if (gitRepo)
  80. {
  81. //Check to see if the version is a tag/branch etc. - these take precedence if they happen to match a sha prefix
  82. git_reference * ref = nullptr;
  83. if (git_reference_dwim(&ref, gitRepo, version) == 0)
  84. {
  85. //Map the symbolic reference to the underlying object
  86. git_reference * resolvedRef = nullptr;
  87. if (git_reference_resolve(&resolvedRef, ref) == 0)
  88. {
  89. const git_oid * oid = git_reference_target(resolvedRef);
  90. GIT_CHECK(git_commit_lookup(&gitCommit, gitRepo, oid));
  91. git_reference_free(resolvedRef);
  92. }
  93. git_reference_free(ref);
  94. }
  95. if (!gitCommit)
  96. {
  97. git_oid gitOid;
  98. if (git_oid_fromstrp(&gitOid, version) == 0)
  99. {
  100. //User provided a SHA (possibly shorted) -> resolve it. Error will be reported later if it does not exist.
  101. GIT_CHECK(git_commit_lookup_prefix(&gitCommit, gitRepo, &gitOid, strlen(version)));
  102. }
  103. }
  104. if (gitCommit)
  105. GIT_CHECK(git_commit_tree(&gitRoot, gitCommit));
  106. }
  107. }
  108. ~GitCommitTree()
  109. {
  110. git_tree_free(gitRoot);
  111. git_commit_free(gitCommit);
  112. git_repository_free(gitRepo);
  113. }
  114. const git_tree * queryTree() const { return gitRoot; }
  115. protected:
  116. void check(int code, const char * func)
  117. {
  118. if (code != 0)
  119. {
  120. const git_error * err = git_error_last();
  121. const char * errmsg = err ? err->message : "<unknown>";
  122. WARNLOG("libgit %s returned %u: %s", func, code, errmsg);
  123. }
  124. }
  125. protected:
  126. git_repository * gitRepo = nullptr;
  127. git_commit * gitCommit = nullptr;
  128. git_tree * gitRoot = nullptr;
  129. };
  130. class GitRepositoryFileIO : implements CSimpleInterfaceOf<IFileIO>
  131. {
  132. public:
  133. GitRepositoryFileIO(GitCommitTree * commitTree, const git_oid * oid)
  134. {
  135. git_blob *blob = nullptr;
  136. int error = git_blob_lookup(&blob, git_tree_owner(commitTree->queryTree()), oid);
  137. if (error)
  138. throw MakeStringException(0, "git git_blob_lookup returned exit status %d", error);
  139. git_object_size_t blobsize = git_blob_rawsize(blob);
  140. const void * data = git_blob_rawcontent(blob);
  141. buf.append(blobsize, data);
  142. git_blob_free(blob);
  143. }
  144. virtual size32_t read(offset_t pos, size32_t len, void * data)
  145. {
  146. if (pos >= buf.length())
  147. return 0;
  148. if (pos+len > buf.length())
  149. len = buf.length()-pos;
  150. memcpy_iflen(data, buf.toByteArray()+pos, len);
  151. return len;
  152. }
  153. virtual offset_t size()
  154. {
  155. return buf.length();
  156. }
  157. virtual void close()
  158. {
  159. }
  160. // Write methods not implemented - this is a read-only file
  161. virtual size32_t write(offset_t pos, size32_t len, const void * data)
  162. {
  163. throwUnexpected();
  164. }
  165. virtual offset_t appendFile(IFile *file,offset_t pos=0,offset_t len=(offset_t)-1)
  166. {
  167. throwUnexpected();
  168. }
  169. virtual void setSize(offset_t size)
  170. {
  171. throwUnexpected();
  172. }
  173. virtual void flush()
  174. {
  175. throwUnexpected();
  176. }
  177. unsigned __int64 getStatistic(StatisticKind kind)
  178. {
  179. //This could be implemented, but not likely to be useful so currently return nothing.
  180. return 0;
  181. }
  182. protected:
  183. MemoryBuffer buf;
  184. };
  185. class GitRepositoryFile : implements IFile, public CInterface
  186. {
  187. public:
  188. IMPLEMENT_IINTERFACE;
  189. GitRepositoryFile(const char *_gitFileName, offset_t _fileSize, bool _isDir, bool _isExisting, GitCommitTree * _commitTree, const git_oid & _oid)
  190. : commitTree(_commitTree), oid(_oid), fullName(_gitFileName),fileSize(_fileSize), isDir(_isDir), isExisting(_isExisting)
  191. {
  192. splitGitFileName(fullName, gitDirectory, revision, relFileName);
  193. }
  194. virtual bool exists()
  195. {
  196. return isExisting;
  197. }
  198. virtual bool getTime(CDateTime * createTime, CDateTime * modifiedTime, CDateTime * accessedTime)
  199. {
  200. if (createTime)
  201. createTime->clear();
  202. if (modifiedTime)
  203. modifiedTime->clear();
  204. if (accessedTime)
  205. accessedTime->clear();
  206. return false;
  207. }
  208. virtual fileBool isDirectory()
  209. {
  210. if (!isExisting)
  211. return fileBool::notFound;
  212. return isDir ? fileBool::foundYes : fileBool::foundNo;
  213. }
  214. virtual fileBool isFile()
  215. {
  216. if (!isExisting)
  217. return fileBool::notFound;
  218. return !isDir ? fileBool::foundYes : fileBool::foundNo;
  219. }
  220. virtual fileBool isReadOnly()
  221. {
  222. if (!isExisting)
  223. return fileBool::notFound;
  224. return fileBool::foundYes;
  225. }
  226. virtual IFileAsyncIO * openAsync(IFOmode mode)
  227. {
  228. UNIMPLEMENTED;
  229. }
  230. virtual const char * queryFilename()
  231. {
  232. return fullName.str();
  233. }
  234. virtual offset_t size()
  235. {
  236. if (!isExisting)
  237. return (offset_t) -1;
  238. return fileSize;
  239. }
  240. // Directory functions
  241. virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs)
  242. {
  243. if (!isDir || (mask && !*mask)) // Empty mask string means matches nothing - NULL means matches everything
  244. return createNullDirectoryIterator();
  245. else
  246. {
  247. StringBuffer dirName(fullName);
  248. dirName.append(PATHSEPCHAR);
  249. return createGitRepositoryDirectoryIterator(dirName, mask, sub, includeDirs);
  250. }
  251. }
  252. virtual bool getInfo(bool &isdir,offset_t &size,CDateTime &modtime)
  253. {
  254. isdir = isDir;
  255. size = fileSize;
  256. modtime.clear();
  257. return true;
  258. }
  259. virtual IFileIO * open(IFOmode mode, IFEflags extraFlags) override
  260. {
  261. assertex(mode==IFOread && isExisting && !isDir);
  262. return new GitRepositoryFileIO(commitTree, &oid);
  263. }
  264. virtual IFileIO * openShared(IFOmode mode, IFSHmode shmode, IFEflags extraFlags) override
  265. {
  266. assertex(mode==IFOread && isExisting && !isDir);
  267. return new GitRepositoryFileIO(commitTree, &oid);
  268. }
  269. // Not going to be implemented - this IFile interface is too big..
  270. virtual bool setTime(const CDateTime * createTime, const CDateTime * modifiedTime, const CDateTime * accessedTime) { UNIMPLEMENTED; }
  271. virtual bool remove() { UNIMPLEMENTED; }
  272. virtual void rename(const char *newTail) { UNIMPLEMENTED; }
  273. virtual void move(const char *newName) { UNIMPLEMENTED; }
  274. virtual void setReadOnly(bool ro) { UNIMPLEMENTED; }
  275. virtual void setFilePermissions(unsigned fPerms) { UNIMPLEMENTED; }
  276. virtual bool setCompression(bool set) { UNIMPLEMENTED; }
  277. virtual offset_t compressedSize() { UNIMPLEMENTED; }
  278. virtual unsigned getCRC() { UNIMPLEMENTED; }
  279. virtual void setCreateFlags(unsigned short cflags) { UNIMPLEMENTED; }
  280. virtual void setShareMode(IFSHmode shmode) { UNIMPLEMENTED; }
  281. virtual bool createDirectory() { UNIMPLEMENTED; }
  282. virtual IDirectoryDifferenceIterator *monitorDirectory(
  283. IDirectoryIterator *prev=NULL, // in (NULL means use current as baseline)
  284. const char *mask=NULL,
  285. bool sub=false,
  286. bool includedirs=false,
  287. unsigned checkinterval=60*1000,
  288. unsigned timeout=(unsigned)-1,
  289. Semaphore *abortsem=NULL) { UNIMPLEMENTED; }
  290. virtual void copySection(const RemoteFilename &dest, offset_t toOfs=(offset_t)-1, offset_t fromOfs=0, offset_t size=(offset_t)-1, ICopyFileProgress *progress=NULL, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
  291. virtual void copyTo(IFile *dest, size32_t buffersize=DEFAULT_COPY_BLKSIZE, ICopyFileProgress *progress=NULL, bool usetmp=false, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
  292. virtual IMemoryMappedFile *openMemoryMapped(offset_t ofs=0, memsize_t len=(memsize_t)-1, bool write=false) { UNIMPLEMENTED; }
  293. protected:
  294. Linked<GitCommitTree> commitTree;
  295. const git_oid oid;
  296. StringAttr gitDirectory;
  297. StringAttr revision;
  298. StringAttr relFileName;
  299. StringBuffer fullName;
  300. offset_t fileSize;
  301. bool isDir;
  302. bool isExisting;
  303. };
  304. class GitRepositoryDirectoryIterator : implements IDirectoryIterator, public CInterface
  305. {
  306. static int treeCallback(const char *root, const git_tree_entry *entry, void *payload)
  307. {
  308. GitRepositoryDirectoryIterator * self = reinterpret_cast<GitRepositoryDirectoryIterator *>(payload);
  309. return self->noteEntry(root, entry);
  310. }
  311. public:
  312. IMPLEMENT_IINTERFACE;
  313. GitRepositoryDirectoryIterator(const char *_gitFileName, const char *_mask, bool _sub, bool _includeDirs)
  314. : mask(_mask), sub(_sub), includeDirs(_includeDirs)
  315. {
  316. splitGitFileName(_gitFileName, gitDirectory, revision, relDir);
  317. curIndex = 0;
  318. const char * version = revision.length() ? revision.get() : "HEAD";
  319. commitTree.setown(new GitCommitTree(gitDirectory, version));
  320. if (!commitTree->queryTree())
  321. throw makeStringExceptionV(9900 , "Cannot resolve git revision %s", _gitFileName);
  322. }
  323. virtual StringBuffer &getName(StringBuffer &buf)
  324. {
  325. assertex(curFile);
  326. return buf.append(curFile->queryFilename());
  327. }
  328. virtual bool isDir()
  329. {
  330. assertex(curFile);
  331. return curFile->isDirectory()==fileBool::foundYes;
  332. }
  333. virtual __int64 getFileSize()
  334. {
  335. assertex(curFile);
  336. return curFile->size();
  337. }
  338. virtual bool getModifiedTime(CDateTime &ret)
  339. {
  340. UNIMPLEMENTED;
  341. }
  342. virtual bool first()
  343. {
  344. files.kill();
  345. curFile.clear();
  346. curIndex = 0;
  347. matchedPath = 0;
  348. git_tree_walk(commitTree->queryTree(), GIT_TREEWALK_PRE, treeCallback, this);
  349. open();
  350. return isValid();
  351. }
  352. virtual bool next()
  353. {
  354. curIndex++;
  355. open();
  356. return isValid();
  357. }
  358. virtual bool isValid() { return curFile != NULL; }
  359. virtual IFile & query() { return *curFile; }
  360. protected:
  361. int noteEntry(const char *root, const git_tree_entry *entry)
  362. {
  363. const char * filename = git_tree_entry_name(entry);
  364. // BLOB is a file revision object, TREE is a nested directory, COMMIT seems to be used for an empty directory.
  365. git_object_t kind = git_tree_entry_type(entry);
  366. bool isDirectory = kind != GIT_OBJECT_BLOB;
  367. if (matchedPath >= relDir.length())
  368. {
  369. if (relDir)
  370. {
  371. //Check for the root directory changing - if it does we have finished all the matches => abort recursion
  372. size_t lenRoot = strlen(root);
  373. if (lenRoot != relDir.length())
  374. return -1;
  375. if (!strieq(root, relDir.str()))
  376. return -1;
  377. }
  378. //Currently avoid de-referencing the file sizes - may need to revisit if it is required
  379. sizes.append(isDirectory ? (offset_t) -1 : 0);
  380. files.append(filename);
  381. oids.emplace_back(*git_tree_entry_id(entry));
  382. return 1; // do not recurse - only expand a single level of the directory tree
  383. }
  384. unsigned lenFilename = strlen(filename);
  385. unsigned remaining = relDir.length() - matchedPath;
  386. if (lenFilename <= remaining)
  387. {
  388. const char * next = relDir.str() + matchedPath;
  389. if (strnicmp(next, filename, lenFilename) == 0)
  390. {
  391. if (lenFilename == remaining)
  392. {
  393. sizes.append(isDirectory ? (offset_t) -1 : 0);
  394. files.append(filename);
  395. oids.emplace_back(*git_tree_entry_id(entry));
  396. return -1; // found the single match
  397. }
  398. unsigned nextChar = next[lenFilename];
  399. if (isPathSepChar(nextChar))
  400. {
  401. matchedPath += (lenFilename + 1);
  402. return 0; // recurse
  403. }
  404. // filename only matches a substring of the next directory that needs to match
  405. }
  406. }
  407. return 1; // skip
  408. }
  409. protected:
  410. StringAttr gitDirectory;
  411. StringAttr revision;
  412. StringAttr relDir;
  413. StringAttr mask;
  414. Owned<IFile> curFile;
  415. unsigned curIndex = 0;
  416. StringArray files;
  417. UInt64Array sizes;
  418. std::vector<git_oid> oids;
  419. Owned<GitCommitTree> commitTree;
  420. bool includeDirs = true;
  421. bool sub = false;
  422. unsigned matchedPath = 0;
  423. void open()
  424. {
  425. if (files.isItem(curIndex))
  426. {
  427. const char *filename = files.item(curIndex);
  428. offset_t size = sizes.item(curIndex);
  429. const git_oid & oid = oids[curIndex];
  430. StringBuffer gitFileName;
  431. buildGitFileName(gitFileName, gitDirectory, revision, relDir);
  432. // Git ls-tree behaves differently according to whether you put the trailing / on the path you supply.
  433. // With /, it gets all files in that directory
  434. // Without, it will return just a single match (for the file or dir with that name)
  435. // So we are effectively in two different modes according to which we used.
  436. char lastChar = gitFileName.charAt(gitFileName.length()-1);
  437. // NOTE: / or PATHSEPCHAR - we translated to git representation, but root directory is .git{x}<pathsep>
  438. if ((lastChar == '/') || (lastChar == PATHSEPCHAR))
  439. gitFileName.append(filename);
  440. if (size==(offset_t) -1)
  441. curFile.setown(new GitRepositoryFile(gitFileName, 0, true, true, commitTree, oid));
  442. else
  443. curFile.setown(new GitRepositoryFile(gitFileName, size, false, true, commitTree, oid));
  444. }
  445. else
  446. curFile.clear();
  447. }
  448. };
  449. //--------------------------------------------------------------------------------------------------------------------
  450. IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask, bool sub, bool includeDirs)
  451. {
  452. assertex(sub==false); // I don't know what it means!
  453. return new GitRepositoryDirectoryIterator(gitFileName, mask, sub, includeDirs);
  454. }
  455. static IFile *createGitFile(const char *gitFileName)
  456. {
  457. StringBuffer fname(gitFileName);
  458. assertex(fname.length());
  459. removeTrailingPathSepChar(fname);
  460. StringAttr gitDirectory, revision, relDir;
  461. splitGitFileName(fname, gitDirectory, revision, relDir);
  462. if (relDir.isEmpty())
  463. {
  464. // Special case the root - ugly but apparently necessary
  465. return new GitRepositoryFile(fname, 0, true, true, nullptr, nullOid);
  466. }
  467. Owned<IDirectoryIterator> dir = createGitRepositoryDirectoryIterator(fname, NULL, false, true);
  468. if (dir->first())
  469. {
  470. Linked<IFile> file = &dir->query();
  471. assertex(!dir->next());
  472. return file.getClear();
  473. }
  474. else
  475. return new GitRepositoryFile(gitFileName, (offset_t) -1, false, false, nullptr, nullOid);
  476. }
  477. class CGitRepositoryFileHook : public CInterface, implements IContainedFileHook
  478. {
  479. public:
  480. IMPLEMENT_IINTERFACE;
  481. virtual IFile * createIFile(const char *fileName)
  482. {
  483. if (isGitFileName(fileName))
  484. return createGitFile(fileName);
  485. else
  486. return NULL;
  487. }
  488. protected:
  489. static bool isGitFileName(const char *fileName)
  490. {
  491. if (fileName && strstr(fileName, ".git" PATHSEPSTR "{"))
  492. return true;
  493. return false;
  494. }
  495. } *gitRepositoryFileHook;
  496. static CriticalSection *cs;
  497. extern GITFILE_API void installFileHook()
  498. {
  499. CriticalBlock b(*cs); // Probably overkill!
  500. if (!gitRepositoryFileHook)
  501. {
  502. gitRepositoryFileHook = new CGitRepositoryFileHook;
  503. addContainedFileHook(gitRepositoryFileHook);
  504. }
  505. }
  506. extern GITFILE_API void removeFileHook()
  507. {
  508. if (cs)
  509. {
  510. CriticalBlock b(*cs); // Probably overkill!
  511. if (gitRepositoryFileHook)
  512. {
  513. removeContainedFileHook(gitRepositoryFileHook);
  514. delete gitRepositoryFileHook;
  515. gitRepositoryFileHook = NULL;
  516. }
  517. }
  518. }
  519. MODULE_INIT(INIT_PRIORITY_STANDARD)
  520. {
  521. git_libgit2_init();
  522. cs = new CriticalSection;
  523. gitRepositoryFileHook = NULL; // Not really needed, but you have to have a modinit to match a modexit
  524. return true;
  525. }
  526. MODULE_EXIT()
  527. {
  528. if (gitRepositoryFileHook)
  529. {
  530. removeContainedFileHook(gitRepositoryFileHook);
  531. gitRepositoryFileHook = NULL;
  532. }
  533. ::Release(gitRepositoryFileHook);
  534. delete cs;
  535. cs = NULL;
  536. git_libgit2_shutdown();
  537. }