gitfile.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "jlib.hpp"
  15. #include "jio.hpp"
  16. #include "jmutex.hpp"
  17. #include "jfile.hpp"
  18. #include "jregexp.hpp"
  19. #include "gitfile.hpp"
  20. /*
  21. * Direct access to files in git repositories, by revision, without needing to check them out first
  22. * Installs hooks into createIFile, spotting filenames of the form /my/directory/.git/{revision}/path/within/git
  23. * Bare repositories of the form /my/directory.git/{revision}/path/within/git also supported
  24. */
  25. IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask=NULL, bool sub=false,bool includedirs=false);
  26. static void splitGitFileName(const char *fullName, StringAttr &gitDir, StringAttr &revision, StringAttr &relPath)
  27. {
  28. assertex(fullName);
  29. const char *git = strstr(fullName, ".git" PATHSEPSTR "{" );
  30. assertex(git);
  31. const char *tail = git+5;
  32. gitDir.set(fullName, tail-fullName);
  33. assertex (*tail=='{');
  34. tail++;
  35. const char *end = strchr(tail, '}');
  36. if (!end)
  37. throw MakeStringException(0, "Invalid git repository filename - no matching } found");
  38. revision.set(tail, end - tail);
  39. tail = end+1;
  40. if (*tail==PATHSEPCHAR)
  41. tail++;
  42. else if (*tail != 0)
  43. throw MakeStringException(0, "Invalid git repository filename - " PATHSEPSTR " expected after }");
  44. if (tail && *tail)
  45. {
  46. StringBuffer s(tail);
  47. s.replace(PATHSEPCHAR, '/');
  48. relPath.set(s);
  49. }
  50. else
  51. relPath.clear();
  52. // Check it's a valid git repository
  53. StringBuffer configName(gitDir);
  54. configName.append("config");
  55. if (!checkFileExists(configName.str()))
  56. throw MakeStringException(0, "Invalid git repository - config file %s not found", configName.str());
  57. }
  58. static StringBuffer & buildGitFileName(StringBuffer &fullname, const char *gitDir, const char *revision, const char *relPath)
  59. {
  60. fullname.append(gitDir);
  61. fullname.append('{').append(revision).append('}').append(PATHSEPCHAR);
  62. if (relPath && *relPath)
  63. fullname.append(relPath);
  64. return fullname;
  65. }
  66. class GitRepositoryFileIO : public CInterface, implements IFileIO
  67. {
  68. public:
  69. IMPLEMENT_IINTERFACE;
  70. GitRepositoryFileIO(const char * gitDirectory, const char * revision, const char * relFileName)
  71. {
  72. VStringBuffer gitcmd("git --git-dir=%s show %s:%s", gitDirectory, (revision && *revision) ? revision : "HEAD", relFileName);
  73. Owned<IPipeProcess> pipe = createPipeProcess();
  74. if (pipe->run("git", gitcmd, ".", false, true, false, 0))
  75. {
  76. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  77. const size32_t chunkSize = 8192;
  78. for (;;)
  79. {
  80. size32_t sizeRead = pipeReader->read(chunkSize, buf.reserve(chunkSize));
  81. if (sizeRead < chunkSize)
  82. {
  83. buf.setLength(buf.length() - (chunkSize - sizeRead));
  84. break;
  85. }
  86. }
  87. pipe->closeOutput();
  88. }
  89. int retcode = pipe->wait();
  90. if (retcode)
  91. {
  92. buf.clear(); // Can't rely on destructor to clean this for me
  93. throw MakeStringException(0, "git show returned exit status %d", retcode);
  94. }
  95. }
  96. virtual size32_t read(offset_t pos, size32_t len, void * data)
  97. {
  98. if (pos >= buf.length())
  99. return 0;
  100. if (pos+len > buf.length())
  101. len = buf.length()-pos;
  102. memcpy(data, buf.toByteArray()+pos, len);
  103. return len;
  104. }
  105. virtual offset_t size()
  106. {
  107. return buf.length();
  108. }
  109. virtual void close()
  110. {
  111. }
  112. // Write methods not implemented - this is a read-only file
  113. virtual size32_t write(offset_t pos, size32_t len, const void * data)
  114. {
  115. throwUnexpected();
  116. }
  117. virtual offset_t appendFile(IFile *file,offset_t pos=0,offset_t len=(offset_t)-1)
  118. {
  119. throwUnexpected();
  120. }
  121. virtual void setSize(offset_t size)
  122. {
  123. throwUnexpected();
  124. }
  125. virtual void flush()
  126. {
  127. throwUnexpected();
  128. }
  129. protected:
  130. MemoryBuffer buf;
  131. };
  132. class GitRepositoryFile : public CInterface, implements IFile
  133. {
  134. public:
  135. IMPLEMENT_IINTERFACE;
  136. GitRepositoryFile(const char *_gitFileName, offset_t _fileSize, bool _isDir, bool _isExisting)
  137. : fullName(_gitFileName),fileSize(_fileSize), isDir(_isDir), isExisting(_isExisting)
  138. {
  139. splitGitFileName(fullName, gitDirectory, revision, relFileName);
  140. }
  141. virtual bool exists()
  142. {
  143. return isExisting;
  144. }
  145. virtual bool getTime(CDateTime * createTime, CDateTime * modifiedTime, CDateTime * accessedTime)
  146. {
  147. if (createTime)
  148. createTime->clear();
  149. if (modifiedTime)
  150. modifiedTime->clear();
  151. if (accessedTime)
  152. accessedTime->clear();
  153. return false;
  154. }
  155. virtual fileBool isDirectory()
  156. {
  157. if (!isExisting)
  158. return notFound;
  159. return isDir ? foundYes : foundNo;
  160. }
  161. virtual fileBool isFile()
  162. {
  163. if (!isExisting)
  164. return notFound;
  165. return !isDir ? foundYes : foundNo;
  166. }
  167. virtual fileBool isReadOnly()
  168. {
  169. if (!isExisting)
  170. return notFound;
  171. return foundYes;
  172. }
  173. virtual IFileIO * open(IFOmode mode, IFEflags extraFlags=IFEnone)
  174. {
  175. assertex(mode==IFOread && isExisting);
  176. return new GitRepositoryFileIO(gitDirectory, revision, relFileName);
  177. }
  178. virtual IFileAsyncIO * openAsync(IFOmode mode)
  179. {
  180. UNIMPLEMENTED;
  181. }
  182. virtual IFileIO * openShared(IFOmode mode, IFSHmode shmode, IFEflags extraFlags=IFEnone)
  183. {
  184. assertex(mode==IFOread && isExisting);
  185. return new GitRepositoryFileIO(gitDirectory, revision, relFileName);
  186. }
  187. virtual const char * queryFilename()
  188. {
  189. return fullName.str();
  190. }
  191. virtual offset_t size()
  192. {
  193. return fileSize;
  194. }
  195. // Directory functions
  196. virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs)
  197. {
  198. if (!isDir || (mask && !*mask)) // Empty mask string means matches nothing - NULL means matches everything
  199. return createNullDirectoryIterator();
  200. else
  201. {
  202. StringBuffer dirName(fullName);
  203. dirName.append(PATHSEPCHAR);
  204. return createGitRepositoryDirectoryIterator(dirName, mask, sub, includeDirs);
  205. }
  206. }
  207. virtual bool getInfo(bool &isdir,offset_t &size,CDateTime &modtime)
  208. {
  209. isdir = isDir;
  210. size = fileSize;
  211. modtime.clear();
  212. return true;
  213. }
  214. // Not going to be implemented - this IFile interface is too big..
  215. virtual bool setTime(const CDateTime * createTime, const CDateTime * modifiedTime, const CDateTime * accessedTime) { UNIMPLEMENTED; }
  216. virtual bool remove() { UNIMPLEMENTED; }
  217. virtual void rename(const char *newTail) { UNIMPLEMENTED; }
  218. virtual void move(const char *newName) { UNIMPLEMENTED; }
  219. virtual void setReadOnly(bool ro) { UNIMPLEMENTED; }
  220. virtual bool setCompression(bool set) { UNIMPLEMENTED; }
  221. virtual offset_t compressedSize() { UNIMPLEMENTED; }
  222. virtual unsigned getCRC() { UNIMPLEMENTED; }
  223. virtual void setCreateFlags(unsigned cflags) { UNIMPLEMENTED; }
  224. virtual void setShareMode(IFSHmode shmode) { UNIMPLEMENTED; }
  225. virtual bool createDirectory() { UNIMPLEMENTED; }
  226. virtual IDirectoryDifferenceIterator *monitorDirectory(
  227. IDirectoryIterator *prev=NULL, // in (NULL means use current as baseline)
  228. const char *mask=NULL,
  229. bool sub=false,
  230. bool includedirs=false,
  231. unsigned checkinterval=60*1000,
  232. unsigned timeout=(unsigned)-1,
  233. Semaphore *abortsem=NULL) { UNIMPLEMENTED; }
  234. virtual void copySection(const RemoteFilename &dest, offset_t toOfs=(offset_t)-1, offset_t fromOfs=0, offset_t size=(offset_t)-1, ICopyFileProgress *progress=NULL, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
  235. virtual void copyTo(IFile *dest, size32_t buffersize=DEFAULT_COPY_BLKSIZE, ICopyFileProgress *progress=NULL, bool usetmp=false, CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
  236. virtual IMemoryMappedFile *openMemoryMapped(offset_t ofs=0, memsize_t len=(memsize_t)-1, bool write=false) { UNIMPLEMENTED; }
  237. virtual void treeCopyTo(IFile *dest,IpSubNet &subnet,IpAddress &resfrom,bool usetmp=false,CFflags copyFlags=CFnone) { UNIMPLEMENTED; }
  238. protected:
  239. StringAttr gitDirectory;
  240. StringAttr revision;
  241. StringAttr relFileName;
  242. StringBuffer fullName;
  243. offset_t fileSize;
  244. bool isDir;
  245. bool isExisting;
  246. };
  247. static IFile *createGitFile(const char *gitFileName)
  248. {
  249. StringBuffer fname(gitFileName);
  250. assertex(fname.length());
  251. removeTrailingPathSepChar(fname);
  252. StringAttr gitDirectory, revision, relDir;
  253. splitGitFileName(fname, gitDirectory, revision, relDir);
  254. if (relDir.isEmpty())
  255. return new GitRepositoryFile(fname, 0, true, true); // Special case the root - ugly but apparently necessary
  256. Owned<IDirectoryIterator> dir = createGitRepositoryDirectoryIterator(fname, NULL, false, true);
  257. if (dir->first())
  258. {
  259. Linked<IFile> file = &dir->query();
  260. assertex(!dir->next());
  261. return file.getClear();
  262. }
  263. else
  264. return new GitRepositoryFile(gitFileName, 0, false, false);
  265. }
  266. class GitRepositoryDirectoryIterator : public CInterface, implements IDirectoryIterator
  267. {
  268. public:
  269. IMPLEMENT_IINTERFACE;
  270. GitRepositoryDirectoryIterator(const char *_gitFileName, const char *_mask, bool _sub, bool _includeDirs)
  271. : mask(_mask), sub(_sub), includeDirs(_includeDirs)
  272. {
  273. splitGitFileName(_gitFileName, gitDirectory, revision, relDir);
  274. curIndex = 0;
  275. }
  276. virtual StringBuffer &getName(StringBuffer &buf)
  277. {
  278. assertex(curFile);
  279. return buf.append(curFile->queryFilename());
  280. }
  281. virtual bool isDir()
  282. {
  283. assertex(curFile);
  284. return curFile->isDirectory()==foundYes;
  285. }
  286. virtual __int64 getFileSize()
  287. {
  288. assertex(curFile);
  289. return curFile->size();
  290. }
  291. virtual bool getModifiedTime(CDateTime &ret)
  292. {
  293. UNIMPLEMENTED;
  294. }
  295. virtual bool first()
  296. {
  297. files.kill();
  298. curFile.clear();
  299. curIndex = 0;
  300. VStringBuffer gitcmd("git --git-dir=%s ls-tree --long -z %s %s", gitDirectory.get(), revision.length() ? revision.get() : "HEAD", relDir.length() ? relDir.get() : "");
  301. Owned<IPipeProcess> pipe = createPipeProcess();
  302. if (pipe->run("git", gitcmd, ".", false, true, false, 0))
  303. {
  304. Owned<ISimpleReadStream> pipeReader = pipe->getOutputStream();
  305. char c;
  306. StringBuffer thisLine;
  307. while (pipeReader->read(sizeof(c), &c))
  308. {
  309. if (!c)
  310. {
  311. if (thisLine.length())
  312. {
  313. // info from Git looks like this:
  314. // 100644 blob 6c131b5954 36323 sourcedoc.xml
  315. // 040000 tree 6c131b5954 - subdir
  316. char size[32];
  317. char filename[1024];
  318. int ret= sscanf(thisLine, "%*s %*s %*s %31s %1023s", &size[0], &filename[0]);
  319. if (ret != 2)
  320. throw MakeStringException(0, "Unexpected data returned from git ls-tree: %s", thisLine.str());
  321. if (includeDirs || size[0]!='-')
  322. {
  323. const char *tail = strrchr(filename, '/'); // Git uses / even on Windows
  324. if (tail)
  325. tail += 1;
  326. else
  327. tail = filename;
  328. if (!mask.length() || WildMatch(tail, mask, false))
  329. {
  330. files.append(tail);
  331. sizes.append(size[0]=='-' ? (offset_t) -1 : _atoi64(size));
  332. }
  333. }
  334. }
  335. thisLine.clear();
  336. }
  337. else
  338. thisLine.append(c);
  339. }
  340. }
  341. unsigned retCode = pipe->wait();
  342. if (retCode)
  343. {
  344. files.kill();
  345. return false; // Or an exception?
  346. }
  347. open();
  348. return isValid();
  349. }
  350. virtual bool next()
  351. {
  352. curIndex++;
  353. open();
  354. return isValid();
  355. }
  356. virtual bool isValid() { return curFile != NULL; }
  357. virtual IFile & query() { return *curFile; }
  358. protected:
  359. StringAttr gitDirectory;
  360. StringAttr revision;
  361. StringAttr relDir;
  362. StringAttr mask;
  363. Owned<IFile> curFile;
  364. unsigned curIndex;
  365. StringArray files;
  366. UInt64Array sizes;
  367. bool includeDirs;
  368. bool sub;
  369. void open()
  370. {
  371. if (files.isItem(curIndex))
  372. {
  373. const char *filename = files.item(curIndex);
  374. offset_t size = sizes.item(curIndex);
  375. StringBuffer gitFileName;
  376. buildGitFileName(gitFileName, gitDirectory, revision, relDir);
  377. // Git ls-tree behaves differently according to whether you put the trailing / on the path you supply.
  378. // With /, it gets all files in that directory
  379. // Without, it will return just a single match (for the file or dir with that name)
  380. // So we are effectively in two different modes according to which we used.
  381. if (gitFileName.charAt(gitFileName.length()-1)=='/') // NOTE: / not PATHSEPCHAR - we translated to git representation
  382. gitFileName.append(filename);
  383. if (size==(offset_t) -1)
  384. curFile.setown(new GitRepositoryFile(gitFileName, 0, true, true));
  385. else
  386. curFile.setown(new GitRepositoryFile(gitFileName, size, false, true));
  387. }
  388. else
  389. curFile.clear();
  390. }
  391. };
  392. IDirectoryIterator *createGitRepositoryDirectoryIterator(const char *gitFileName, const char *mask, bool sub, bool includeDirs)
  393. {
  394. assertex(sub==false); // I don't know what it means!
  395. return new GitRepositoryDirectoryIterator(gitFileName, mask, sub, includeDirs);
  396. }
  397. class CGitRepositoryFileHook : public CInterface, implements IContainedFileHook
  398. {
  399. public:
  400. IMPLEMENT_IINTERFACE;
  401. virtual IFile * createIFile(const char *fileName)
  402. {
  403. if (isGitFileName(fileName))
  404. return createGitFile(fileName);
  405. else
  406. return NULL;
  407. }
  408. protected:
  409. static bool isGitFileName(const char *fileName)
  410. {
  411. if (fileName && strstr(fileName, ".git" PATHSEPSTR "{"))
  412. return true;
  413. return false;
  414. }
  415. } *gitRepositoryFileHook;
  416. static CriticalSection *cs;
  417. extern GITFILE_API void installFileHook()
  418. {
  419. CriticalBlock b(*cs); // Probably overkill!
  420. if (!gitRepositoryFileHook)
  421. {
  422. gitRepositoryFileHook = new CGitRepositoryFileHook;
  423. addContainedFileHook(gitRepositoryFileHook);
  424. }
  425. }
  426. extern GITFILE_API void removeFileHook()
  427. {
  428. if (cs)
  429. {
  430. CriticalBlock b(*cs); // Probably overkill!
  431. if (gitRepositoryFileHook)
  432. {
  433. removeContainedFileHook(gitRepositoryFileHook);
  434. delete gitRepositoryFileHook;
  435. gitRepositoryFileHook = NULL;
  436. }
  437. }
  438. }
  439. MODULE_INIT(INIT_PRIORITY_STANDARD)
  440. {
  441. cs = new CriticalSection;
  442. gitRepositoryFileHook = NULL; // Not really needed, but you have to have a modinit to match a modexit
  443. return true;
  444. }
  445. MODULE_EXIT()
  446. {
  447. if (gitRepositoryFileHook)
  448. {
  449. removeContainedFileHook(gitRepositoryFileHook);
  450. gitRepositoryFileHook = NULL;
  451. }
  452. ::Release(gitRepositoryFileHook);
  453. delete cs;
  454. cs = NULL;
  455. }