/*##############################################################################
Copyright (C) 2011 HPCC Systems.
All rights reserved. This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
############################################################################## */
#include "platform.h"
#include "jlib.hpp"
#include "jio.hpp"
#include "jmutex.hpp"
#include "jfile.hpp"
#include "jlog.hpp"
#include "jregexp.hpp"
#include "gitfile.hpp"
#include "archive.hpp"
#ifdef _USE_LIBARCHIVE
#include
#include
#include
/*
* Direct access to files in zip archives (and other libarchive-supported formats), without needing to extract them first
* Installs hooks into createIFile, spotting filenames of the form /my/directory/myfile.zip/{password}/path/within/archive
*/
#define ARCHIVE_SIGNATURE "[.]{zip|tar|tar[.]gz|tgz}{$|"PATHSEPSTR"}"
static RegExpr *signature;
static SpinLock *lock;
static const char *splitName(const char *fileName)
{
if (!fileName)
return NULL;
SpinBlock b(*lock);
const char *sig = signature->find(fileName);
if (sig)
return sig+signature->findlen();
else
return NULL;
}
static void splitArchivedFileName(const char *fullName, StringAttr &container, StringAttr &option, StringAttr &relPath)
{
const char *tail = splitName(fullName);
assertex(tail);
size_t containerLen = tail-fullName;
if (fullName[containerLen-1]==PATHSEPCHAR)
containerLen--;
container.set(fullName, containerLen);
if (*tail=='{')
{
tail++;
const char *end = strchr(tail, '}');
if (!end)
throw MakeStringException(0, "Invalid archive-embedded filename - no matching } found");
option.set(tail, end - tail);
tail = end+1;
if (*tail==PATHSEPCHAR)
tail++;
else if (*tail != 0)
throw MakeStringException(0, "Invalid archive-embedded filename - " PATHSEPSTR " expected after }");
}
else
option.clear();
if (tail && *tail)
{
StringBuffer s(tail);
s.replace(PATHSEPCHAR, '/');
relPath.set(s);
}
else
relPath.clear();
}
static StringBuffer & buildArchivedFileName(StringBuffer &fullname, const char *archiveFile, const char *option, const char *relPath)
{
fullname.append(archiveFile);
if (option && *option)
fullname.append(PATHSEPCHAR).append('{').append(option).append('}');
if (relPath && *relPath)
fullname.append(PATHSEPCHAR).append(relPath);
return fullname;
}
IDirectoryIterator *createArchiveDirectoryIterator(const char *gitFileName, const char *mask, bool sub, bool includeDirs);
// Wrapper around libarchive's archive_entry struct to ensure we free them at right time
// Because not clear whether safe to use a struct archive_entry object after the archive has been closed,
// we copy the info we need out of them into something we CAN be sure of the lifespan of
class ArchiveEntry : public CInterface, implements IInterface
{
public:
IMPLEMENT_IINTERFACE;
ArchiveEntry(struct archive_entry *entry)
{
mode = archive_entry_filetype(entry);
filesize = archive_entry_size(entry);
path.set(archive_entry_pathname(entry));
}
bool isDir() const
{
return S_ISDIR(mode);
}
inline offset_t size()
{
return filesize;
}
const char *pathname()
{
return path.get();
}
private:
unsigned mode;
offset_t filesize;
StringAttr path;
};
// IFileIO implementation for reading out of libarchive-supported archives
// Because of the nature of the libarchive this may not be efficient for some archive formats
// Have to read through the entire archive directory to find the bit you want, it seems
// It's possible that we could add some seek support to at least avoid having to do so twice?
class ArchiveFileIO : public CInterface, implements IFileIO
{
public:
IMPLEMENT_IINTERFACE;
ArchiveFileIO(const char *_fullName) : fullName(_fullName)
{
// Sadly it seems we can't use a saved entry to read data from an archive. We have to open a new archive
// object and scan through until we find the matching file, in order to extract it.
StringAttr container, option, relpath;
splitArchivedFileName(_fullName, container, option, relpath);
curPos = 0;
lastPos = 0;
curBuffSize = 0;
curBuff = NULL;
archive = archive_read_new();
archive_read_support_format_all(archive);
archive_read_support_compression_all(archive);
int retcode = archive_read_open_filename(archive, container, 10240);
if (retcode == ARCHIVE_OK)
{
struct archive_entry *entry = archive_entry_new();
while (archive_read_next_header2(archive, entry) == ARCHIVE_OK)
{
const char *filename = archive_entry_pathname(entry);
if (strcmp(filename, relpath.get())==0)
{
fileSize = archive_entry_size(entry);
break;
}
}
archive_entry_free(entry);
}
}
~ArchiveFileIO()
{
archive_read_finish(archive);
}
virtual size32_t read(offset_t pos, size32_t len, void * _data)
{
// NOTE - we don't support multithreaded access (the sequential-only restriction would make that tricky anyway)
if (pos < lastPos)
throw MakeStringException(0, "Only sequential access to contained file %s supported", fullName.get());
byte *data = (byte *) _data;
lastPos = pos;
size32_t lenRequested = len;
while (len > 0 & pos < fileSize)
{
if (pos >= curPos+curBuffSize)
{
int ret = archive_read_data_block(archive, &curBuff, &curBuffSize, &curPos);
if (ret != ARCHIVE_OK)
{
if (ret == ARCHIVE_EOF)
break; // This shouldn't happen if the quoted fileSize was accurate...
else
throw MakeStringException(0, "Read error reading contained file %s", fullName.get());
}
}
else
{
// Copy as much of the current request as we can fulfil from this block
offset_t buffOffset = (pos - curPos);
size_t copyLen = (curBuffSize - buffOffset) > len ? len : curBuffSize - buffOffset; // careful for overflows, we are mixing 64/32bit values
if (curBuff)
memcpy(data, ((const byte *) curBuff) + buffOffset, copyLen);
else
memset(data, 0, copyLen); // Sparse areas of compressed files may be represented with NULL buffers
data += copyLen;
len -= copyLen;
pos += copyLen;
}
}
return lenRequested - len;
}
virtual offset_t size()
{
return fileSize;
}
virtual void close()
{
}
// Write methods not implemented - this is a read-only file
virtual size32_t write(offset_t pos, size32_t len, const void * data)
{
throwUnexpected();
}
virtual offset_t appendFile(IFile *file,offset_t pos=0,offset_t len=(offset_t)-1)
{
throwUnexpected();
}
virtual void setSize(offset_t size)
{
throwUnexpected();
}
virtual void flush()
{
throwUnexpected();
}
protected:
struct archive *archive;
offset_t fileSize;
#if ARCHIVE_VERSION_NUMBER < 3000000
off_t curPos;
#else
unsigned __int64 curPos;
#endif
offset_t lastPos;
size_t curBuffSize;
const void *curBuff;
StringAttr fullName;
};
// IFile implementation for reading out of libarchive-supported archives
// These use the struct_archive_entry objects allocated in the directory iterator
// in the hope they might be useful for directly seeking to the file to be extracted
// at some point.
class ArchiveFile : public CInterface, implements IFile
{
public:
IMPLEMENT_IINTERFACE;
ArchiveFile(const char *_fileName, ArchiveEntry *_entry)
: fullName(_fileName),entry(_entry)
{
}
virtual bool exists()
{
return entry != NULL;
}
virtual bool getTime(CDateTime * createTime, CDateTime * modifiedTime, CDateTime * accessedTime)
{
UNIMPLEMENTED; // MORE - maybe could implement if required
}
virtual fileBool isDirectory()
{
if (!entry)
return notFound;
return entry->isDir() ? foundYes : foundNo;
}
virtual fileBool isFile()
{
if (!entry)
return notFound;
return entry->isDir() ? foundNo : foundYes;
}
virtual fileBool isReadOnly()
{
if (!entry)
return notFound;
return foundYes;
}
virtual IFileIO * open(IFOmode mode)
{
assertex(mode==IFOread && entry != NULL);
return new ArchiveFileIO(fullName.str());
}
virtual IFileAsyncIO * openAsync(IFOmode mode)
{
UNIMPLEMENTED;
}
virtual IFileIO * openShared(IFOmode mode, IFSHmode shmode)
{
assertex(mode==IFOread && entry != NULL);
return new ArchiveFileIO(fullName.str());
}
virtual const char * queryFilename()
{
return fullName.str();
}
virtual offset_t size()
{
if (!entry)
return 0;
return entry->size();
}
// Directory functions
virtual IDirectoryIterator *directoryFiles(const char *mask, bool sub, bool includeDirs)
{
if (isDirectory() != foundYes || (mask && !*mask)) // Empty mask string means matches nothing - NULL means matches everything
return createNullDirectoryIterator();
else
{
StringBuffer dirName(fullName);
dirName.append(PATHSEPCHAR);
return createArchiveDirectoryIterator(dirName, mask, sub, includeDirs);
}
}
virtual bool getInfo(bool &_isdir,offset_t &_size,CDateTime &_modtime)
{
_isdir = isDirectory()==foundYes;
_size = size();
_modtime.clear(); // MORE could probably do better
return true; // MORE should this be false if not existing?
}
// Not going to be implemented - this IFile interface is too big..
virtual bool setTime(const CDateTime * createTime, const CDateTime * modifiedTime, const CDateTime * accessedTime) { UNIMPLEMENTED; }
virtual bool remove() { UNIMPLEMENTED; }
virtual void rename(const char *newTail) { UNIMPLEMENTED; }
virtual void move(const char *newName) { UNIMPLEMENTED; }
virtual void setReadOnly(bool ro) { UNIMPLEMENTED; }
virtual bool setCompression(bool set) { UNIMPLEMENTED; }
virtual offset_t compressedSize() { UNIMPLEMENTED; }
virtual unsigned getCRC() { UNIMPLEMENTED; }
virtual void setCreateFlags(unsigned cflags) { UNIMPLEMENTED; }
virtual void setShareMode(IFSHmode shmode) { UNIMPLEMENTED; }
virtual bool createDirectory() { UNIMPLEMENTED; }
virtual IDirectoryDifferenceIterator *monitorDirectory(
IDirectoryIterator *prev=NULL, // in (NULL means use current as baseline)
const char *mask=NULL,
bool sub=false,
bool includedirs=false,
unsigned checkinterval=60*1000,
unsigned timeout=(unsigned)-1,
Semaphore *abortsem=NULL) { UNIMPLEMENTED; }
virtual void copySection(const RemoteFilename &dest, offset_t toOfs=(offset_t)-1, offset_t fromOfs=0, offset_t size=(offset_t)-1, ICopyFileProgress *progress=NULL) { UNIMPLEMENTED; }
virtual void copyTo(IFile *dest, size32_t buffersize=0x100000, ICopyFileProgress *progress=NULL, bool usetmp=false) { UNIMPLEMENTED; }
virtual IMemoryMappedFile *openMemoryMapped(offset_t ofs=0, memsize_t len=(memsize_t)-1, bool write=false) { UNIMPLEMENTED; }
virtual void treeCopyTo(IFile *dest,IpSubNet &subnet,IpAddress &resfrom,bool usetmp=false) { UNIMPLEMENTED; }
protected:
StringBuffer fullName;
Linked entry;
};
extern REMOTE_API IFile *createIFileInArchive(const char *containedFileName)
{
StringBuffer fname(containedFileName);
assertex(fname.length());
removeTrailingPathSepChar(fname);
StringBuffer dirPath, dirTail;
splitFilename(fname.str(), &dirPath, &dirPath, &dirTail, &dirTail);
Owned dir = createArchiveDirectoryIterator(dirPath.str(), dirTail.str(), false, true);
if (dir->first())
{
Linked file = &dir->query();
assertex(!dir->next());
return file.getClear();
}
else
return new ArchiveFile(containedFileName, NULL);
}
class ArchiveDirectoryIterator : public CInterface, implements IDirectoryIterator
{
public:
IMPLEMENT_IINTERFACE;
ArchiveDirectoryIterator(const char *_containedFileName, const char *_mask, bool _sub, bool _includeDirs)
: mask(_mask), sub(_sub), includeDirs(_includeDirs)
{
splitArchivedFileName(_containedFileName, container, option, relDir);
curIndex = 0;
}
virtual StringBuffer &getName(StringBuffer &buf)
{
assertex(curFile);
return buf.append(curFile->queryFilename());
}
virtual bool isDir()
{
assertex(curFile);
return curFile->isDirectory();
}
virtual __int64 getFileSize()
{
assertex(curFile);
return curFile->size();
}
virtual bool getModifiedTime(CDateTime &ret)
{
UNIMPLEMENTED;
}
virtual bool first()
{
curFile.clear();
entries.kill();
curIndex = 0;
struct archive *archive = archive_read_new();
archive_read_support_format_all(archive);
archive_read_support_compression_all(archive);
int retcode = archive_read_open_filename(archive, container, 10240);
if (retcode == ARCHIVE_OK)
{
struct archive_entry *entry = archive_entry_new();
while (archive_read_next_header2(archive, entry) == ARCHIVE_OK)
{
unsigned mode = archive_entry_filetype(entry);
bool isDir = S_ISDIR(mode);
if (includeDirs || !isDir)
{
const char *filename = archive_entry_pathname(entry);
if (memcmp(filename, relDir.get(), relDir.length())==0)
{
StringBuffer tail(filename + relDir.length());
if (tail.length())
{
if (tail.charAt(tail.length()-1)=='/' || tail.charAt(tail.length()-1)==PATHSEPCHAR)
tail.remove(tail.length()-1, 1);
}
else
{
assert(isDir);
tail.append(".");
}
// Strip off a trailing /, then check that there is no / in the tail
if (strchr(tail, PATHSEPCHAR) == NULL && (!mask.length() || WildMatch(tail, mask, false)))
{
DBGLOG("found file %s %s %s", container.get(), relDir.get(), tail.str());
entries.append(*new ArchiveEntry(entry));
}
}
}
}
archive_entry_free(entry);
}
archive_read_finish(archive);
return next();
}
virtual bool next()
{
if (entries.isItem(curIndex))
{
ArchiveEntry &entry = entries.item(curIndex);
curIndex++;
const char *filename = entry.pathname();
StringBuffer containedFileName;
buildArchivedFileName(containedFileName, container, option, filename);
removeTrailingPathSepChar(containedFileName);
curFile.setown(new ArchiveFile(containedFileName, &entry));
return true;
}
else
{
curFile.clear();
return false;
}
}
virtual bool isValid() { return curFile != NULL; }
virtual IFile & query() { return *curFile; }
protected:
StringAttr container;
StringAttr option;
StringAttr relDir;
StringAttr mask;
Owned curFile;
unsigned curIndex;
IArrayOf entries; // The entries that matched
bool includeDirs;
bool sub;
};
IDirectoryIterator *createArchiveDirectoryIterator(const char *gitFileName, const char *mask, bool sub, bool includeDirs)
{
assertex(sub==false); // I don't know what it means!
return new ArchiveDirectoryIterator(gitFileName, mask, sub, includeDirs);
}
class CArchiveFileHook : public CInterface, implements IContainedFileHook
{
public:
IMPLEMENT_IINTERFACE;
virtual IFile * createIFile(const char *fileName)
{
if (isArchiveFileName(fileName))
return createIFileInArchive(fileName);
else
return NULL;
}
protected:
static bool isArchiveFileName(const char *fileName)
{
if (fileName)
return splitName(fileName) != NULL;
return false;
}
} *archiveFileHook;
extern REMOTE_API void installArchiveFileHook()
{
SpinBlock b(*lock); // Probably overkill!
if (!archiveFileHook)
{
archiveFileHook = new CArchiveFileHook;
addContainedFileHook(archiveFileHook);
}
}
extern REMOTE_API void removeArchiveFileHook()
{
SpinBlock b(*lock); // Probably overkill!
if (archiveFileHook)
{
removeContainedFileHook(archiveFileHook);
archiveFileHook = NULL;
}
}
MODULE_INIT(INIT_PRIORITY_REMOTE_RMTFILE)
{
lock = new SpinLock;
signature = new RegExpr(ARCHIVE_SIGNATURE);
archiveFileHook = NULL;
return true;
}
MODULE_EXIT()
{
removeArchiveFileHook();
delete signature;
delete lock;
::Release(archiveFileHook);
}
#else
extern REMOTE_API void installArchiveFileHook()
{
}
extern REMOTE_API void removeArchiveFileHook()
{
}
extern REMOTE_API IFile *createIFileInArchive(const char *containedFileName)
{
throw MakeStringException(0, "System was built without archive file support");
}
#endif