mime.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #pragma warning( disable : 4786 )
  14. #include "esphttp.hpp"
  15. //Jlib
  16. #include "jliball.hpp"
  17. //ESP Bindings
  18. #include "http/platform/mime.hpp"
  19. #include "bindutil.hpp"
  20. #include "httptransport.hpp"
  21. CMimeBodyPart::CMimeBodyPart(const char* content_type, const char* encoding, const char* cid, const char* location, StringBuffer* content, const char* content_disposition)
  22. {
  23. m_content_type.set(content_type);
  24. m_content_disposition.set(content_disposition);
  25. m_encoding.set(encoding);
  26. m_cid.set(cid);
  27. m_location.set(location);
  28. if(content != NULL)
  29. m_content.append(content->length(), content->str());
  30. }
  31. CMimeBodyPart::~CMimeBodyPart()
  32. {
  33. }
  34. const char* CMimeBodyPart::getContentType()
  35. {
  36. return m_content_type.get();
  37. }
  38. const char* CMimeBodyPart::getContentDisposition()
  39. {
  40. return m_content_disposition.get();
  41. }
  42. const char* CMimeBodyPart::getEncoding()
  43. {
  44. return m_encoding.get();
  45. }
  46. const char* CMimeBodyPart::getCid()
  47. {
  48. return m_cid.get();
  49. }
  50. const char* CMimeBodyPart::getLocation()
  51. {
  52. return m_location.get();
  53. }
  54. StringBuffer& CMimeBodyPart::getContent(StringBuffer& content)
  55. {
  56. return content.append(m_content.length(), m_content.str());
  57. }
  58. void CMimeBodyPart::setContent(int len, const char* buffer)
  59. {
  60. m_content.append(len, buffer);
  61. }
  62. void CMimeBodyPart::serialize(StringBuffer & buffer)
  63. {
  64. buffer.append("Content-Type: ").append(m_content_type.get()).append("\r\n");
  65. buffer.append("Content-Transfer-Encoding: ").append(m_encoding.get()).append("\r\n");
  66. buffer.append("Content-ID: <").append(m_cid.get()).append(">\r\n");
  67. buffer.append("\r\n");
  68. buffer.append(m_content.str());
  69. buffer.append("\r\n");
  70. }
  71. /***********************************************************************************/
  72. CMimeMultiPart::CMimeMultiPart(const char* mime_version, const char* content_type, const char* boundary, const char* type, const char* start)
  73. {
  74. m_mime_version.set(mime_version);
  75. m_content_type.set(content_type);
  76. m_boundary.set(boundary);
  77. m_type.set(type);
  78. m_start.set(start);
  79. }
  80. CMimeMultiPart::~CMimeMultiPart()
  81. {
  82. }
  83. const char* CMimeMultiPart::getContentType()
  84. {
  85. return m_content_type.get();
  86. }
  87. void CMimeMultiPart::addBodyPart(CMimeBodyPart* part)
  88. {
  89. m_parts.append(*part);
  90. }
  91. CMimeBodyPart* CMimeMultiPart::getBodyPart(const char* cid)
  92. {
  93. ForEachItemIn(x, m_parts)
  94. {
  95. CMimeBodyPart& onepart = m_parts.item(x);
  96. if(!strcmp(onepart.getCid(), cid))
  97. {
  98. return LINK(&onepart);
  99. }
  100. }
  101. return NULL;
  102. }
  103. CMimeBodyPart* CMimeMultiPart::queryBodyPart(const char* cid)
  104. {
  105. ForEachItemIn(x, m_parts)
  106. {
  107. CMimeBodyPart& onepart = m_parts.item(x);
  108. if(!strcmp(onepart.getCid(), cid))
  109. {
  110. return &onepart;
  111. }
  112. }
  113. return NULL;
  114. }
  115. CMimeBodyPart* CMimeMultiPart::getBodyPart(unsigned int seq)
  116. {
  117. if(seq < m_parts.length())
  118. {
  119. CMimeBodyPart& onepart = m_parts.item(seq);
  120. return LINK(&onepart);
  121. }
  122. else
  123. return NULL;
  124. }
  125. CMimeBodyPart* CMimeMultiPart::queryBodyPart(unsigned int seq)
  126. {
  127. if(seq < m_parts.length())
  128. {
  129. CMimeBodyPart& onepart = m_parts.item(seq);
  130. return &onepart;
  131. }
  132. else
  133. return NULL;
  134. }
  135. void CMimeMultiPart::setRootPart(CMimeBodyPart* part)
  136. {
  137. const char* cid = part->getCid();
  138. m_start.set(cid);
  139. m_parts.append(*part);
  140. }
  141. CMimeBodyPart* CMimeMultiPart::getRootPart()
  142. {
  143. if(m_start.length() > 0)
  144. {
  145. ForEachItemIn(x, m_parts)
  146. {
  147. CMimeBodyPart& onepart = m_parts.item(x);
  148. if(!strcmp(onepart.getCid(), m_start.get()))
  149. {
  150. return LINK(&onepart);
  151. }
  152. }
  153. }
  154. else
  155. {
  156. // If the root part is not specified, return the first one.
  157. return LINK(&m_parts.item(0));
  158. }
  159. return NULL;
  160. }
  161. CMimeBodyPart* CMimeMultiPart::queryRootPart()
  162. {
  163. if(m_start.length() > 0)
  164. {
  165. ForEachItemIn(x, m_parts)
  166. {
  167. CMimeBodyPart& onepart = m_parts.item(x);
  168. if(!strcmp(onepart.getCid(), m_start.get()))
  169. {
  170. return &onepart;
  171. }
  172. }
  173. }
  174. else
  175. {
  176. return &m_parts.item(0);
  177. }
  178. return NULL;
  179. }
  180. int CMimeMultiPart::getBodyCount()
  181. {
  182. return m_parts.ordinality();
  183. }
  184. void CMimeMultiPart::setContentType(const char* content_type)
  185. {
  186. m_content_type.clear();
  187. m_content_type.set(content_type);
  188. }
  189. void CMimeMultiPart::serialize(StringBuffer& contenttype, StringBuffer & buffer)
  190. {
  191. if(m_parts.ordinality() > 1)
  192. {
  193. contenttype.append(m_content_type.get()).append("; boundary=").append(m_boundary.get());
  194. contenttype.append("; type=\"").append(m_type.get()).append("\"; start=\"<").append(m_start.get()).append(">\"");
  195. ForEachItemIn(x, m_parts)
  196. {
  197. buffer.append("--").append(m_boundary.get()).append("\r\n");
  198. CMimeBodyPart& onepart = m_parts.item(x);
  199. onepart.serialize(buffer);
  200. buffer.append("\r\n");
  201. }
  202. buffer.append("--").append(m_boundary.get()).append("\r\n");
  203. }
  204. else
  205. {
  206. contenttype.append(m_content_type.length() == 0 ? HTTP_TYPE_SOAP_UTF8 : m_content_type.get());
  207. CMimeBodyPart* rootpart = queryRootPart();
  208. rootpart->getContent(buffer);
  209. }
  210. }
  211. void CMimeMultiPart::unserialize(const char* contenttype, __int64 text_length, const char* text)
  212. {
  213. char* typebuf = new char[strlen(contenttype) + 1];
  214. strcpy(typebuf, contenttype);
  215. char* ptr = typebuf;
  216. char* oneword = NULL;
  217. // parse content type to get boundary, type and start
  218. ptr = Utils::getWord(ptr, oneword, "; ");
  219. while(oneword != NULL)
  220. {
  221. //DBGLOG(oneword);
  222. if(!Utils::strncasecmp(oneword, "boundary", strlen("boundary")))
  223. {
  224. if(oneword[strlen(oneword) - 1] == '"')
  225. oneword[strlen(oneword) - 1] = '\0';
  226. oneword += strlen("boundary");
  227. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  228. oneword++;
  229. if(oneword[0] == '"')
  230. oneword++;
  231. m_boundary.set(oneword);
  232. }
  233. else if(!Utils::strncasecmp(oneword, "type", strlen("type")))
  234. {
  235. if(oneword[strlen(oneword) - 1] == '"')
  236. oneword[strlen(oneword) - 1] = '\0';
  237. oneword += strlen("type");
  238. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  239. oneword++;
  240. if(oneword[0] == '"')
  241. oneword++;
  242. m_type.set(oneword);
  243. }
  244. else if(!Utils::strncasecmp(oneword, "start", strlen("start")))
  245. {
  246. if(oneword[strlen(oneword) - 1] == '"')
  247. oneword[strlen(oneword) - 1] = '\0';
  248. if(oneword[strlen(oneword) - 1] == '>')
  249. oneword[strlen(oneword) - 1] = '\0';
  250. oneword += strlen("start");
  251. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  252. oneword++;
  253. if(oneword[0] == '"')
  254. oneword++;
  255. if(oneword[0] == '<')
  256. oneword++;
  257. m_start.set(oneword);
  258. //DBGLOG("start=%s", m_start.get());
  259. }
  260. ptr = Utils::getWord(ptr, oneword, "; ");
  261. }
  262. delete [] typebuf;
  263. int oneline_len = 0;
  264. __int64 cur_pos = 0;
  265. __int64 next_pos = Utils::getLine(text_length, cur_pos, text, oneline_len);
  266. const char* curline = text;
  267. int boundarylen = m_boundary.length();
  268. // Skip possible text before the first boundary
  269. while(next_pos < text_length && !(oneline_len >= 2 && !Utils::strncasecmp(m_boundary.get(), curline + 2, boundarylen)))
  270. {
  271. cur_pos = next_pos;
  272. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  273. curline = text + cur_pos;
  274. }
  275. // Parse all the mime parts
  276. while(next_pos < text_length)
  277. {
  278. cur_pos = next_pos;
  279. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  280. if(next_pos >= text_length)
  281. break;
  282. StringBuffer ctype, encoding, cid, body, cdisp;
  283. //parse the headers of one mime part
  284. while(next_pos < text_length && oneline_len != 0)
  285. {
  286. if(!Utils::strncasecmp(text + cur_pos, "Content-Type", strlen("Content-Type")))
  287. {
  288. int namelen = strlen("Content-Type");
  289. cur_pos += namelen;
  290. while(cur_pos < next_pos && (text[cur_pos] == ' ' || text[cur_pos] == ':'))
  291. {
  292. cur_pos++;
  293. namelen++;
  294. }
  295. ctype.append(oneline_len - namelen, text+cur_pos);
  296. }
  297. else if(!Utils::strncasecmp(text + cur_pos, "Content-Transfer-Encoding", strlen("Content-Transfer-Encoding")))
  298. {
  299. int namelen = strlen("Content-Transfer-Encoding");
  300. cur_pos += namelen;
  301. while(cur_pos < next_pos && (text[cur_pos] == ' ' || text[cur_pos] == ':'))
  302. {
  303. cur_pos++;
  304. namelen++;
  305. }
  306. encoding.append(oneline_len - namelen, text+cur_pos);
  307. }
  308. else if(!Utils::strncasecmp(text + cur_pos, "Content-Disposition", strlen("Content-Disposition")))
  309. {
  310. int namelen = strlen("Content-Disposition");
  311. cur_pos += namelen;
  312. while(cur_pos < next_pos && (text[cur_pos] == ' ' || text[cur_pos] == ':'))
  313. {
  314. cur_pos++;
  315. namelen++;
  316. }
  317. cdisp.append(oneline_len - namelen, text+cur_pos);
  318. }
  319. if(!Utils::strncasecmp(text + cur_pos, "Content-ID", strlen("Content-ID")))
  320. {
  321. int namelen = strlen("Content-ID");
  322. cur_pos += namelen;
  323. while(cur_pos < next_pos && (text[cur_pos] == ' ' || text[cur_pos] == ':' || text[cur_pos] == '<'))
  324. {
  325. cur_pos++;
  326. namelen++;
  327. }
  328. int val_len = oneline_len - namelen;
  329. if(text[cur_pos + val_len - 1] == '>')
  330. val_len -= 1;
  331. cid.append(val_len, text+cur_pos);
  332. }
  333. cur_pos = next_pos;
  334. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  335. }
  336. // Got to the end of mulitpart message
  337. if(next_pos >= text_length)
  338. break;
  339. // Read in the content of one mime part
  340. cur_pos = next_pos;
  341. __int64 bb = cur_pos;
  342. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  343. const char* curline = text + cur_pos;
  344. while(next_pos < text_length && !(oneline_len >= 2 && !Utils::strncasecmp(m_boundary.get(), curline + 2, boundarylen)))
  345. {
  346. cur_pos = next_pos;
  347. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  348. curline = text + cur_pos;
  349. }
  350. // Get rid of CR/LF at the end of the content
  351. __int64 be = cur_pos - 1;
  352. if(be >0 && (text[be] == '\r' || text[be] == '\n'))
  353. be--;
  354. //If the body is empty, ignore it
  355. if(be <= bb)
  356. continue;
  357. body.append((unsigned)(be - bb), text + bb);
  358. CMimeBodyPart* onepart = new CMimeBodyPart(ctype.str(), encoding.str(), cid.str(), "", &body, cdisp.str());
  359. addBodyPart(onepart);
  360. }
  361. }
  362. void CMimeMultiPart::parseContentType(const char* contenttype)
  363. {
  364. char* typebuf = new char[strlen(contenttype) + 1];
  365. strcpy(typebuf, contenttype);
  366. char* ptr = typebuf;
  367. char* oneword = NULL;
  368. // parse content type to get boundary, type and start
  369. ptr = Utils::getWord(ptr, oneword, "; ");
  370. while(oneword != NULL)
  371. {
  372. //DBGLOG(oneword);
  373. if(!Utils::strncasecmp(oneword, "boundary", strlen("boundary")))
  374. {
  375. if(oneword[strlen(oneword) - 1] == '"')
  376. oneword[strlen(oneword) - 1] = '\0';
  377. oneword += strlen("boundary");
  378. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  379. oneword++;
  380. if(oneword[0] == '"')
  381. oneword++;
  382. m_boundary.set(oneword);
  383. }
  384. else if(!Utils::strncasecmp(oneword, "type", strlen("type")))
  385. {
  386. if(oneword[strlen(oneword) - 1] == '"')
  387. oneword[strlen(oneword) - 1] = '\0';
  388. oneword += strlen("type");
  389. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  390. oneword++;
  391. if(oneword[0] == '"')
  392. oneword++;
  393. m_type.set(oneword);
  394. }
  395. else if(!Utils::strncasecmp(oneword, "start", strlen("start")))
  396. {
  397. if(oneword[strlen(oneword) - 1] == '"')
  398. oneword[strlen(oneword) - 1] = '\0';
  399. if(oneword[strlen(oneword) - 1] == '>')
  400. oneword[strlen(oneword) - 1] = '\0';
  401. oneword += strlen("start");
  402. while(*oneword != '\0' && (*oneword == ' ' || *oneword == '='))
  403. oneword++;
  404. if(oneword[0] == '"')
  405. oneword++;
  406. if(oneword[0] == '<')
  407. oneword++;
  408. m_start.set(oneword);
  409. }
  410. ptr = Utils::getWord(ptr, oneword, "; ");
  411. }
  412. delete [] typebuf;
  413. return;
  414. }
  415. enum BoundaryCheckState { BoundaryNotFound, BoundaryFound, PossibleBoundary };
  416. bool CMimeMultiPart::separateMultiParts(MemoryBuffer& firstPart, MemoryBuffer& remainder, __int64 contentNotRead)
  417. {
  418. int boundaryLen = m_boundary.length();
  419. if (boundaryLen < 1)
  420. return false;
  421. int totalLength = firstPart.length();
  422. if (totalLength < boundaryLen)
  423. return false;
  424. BoundaryCheckState boundaryCheckState = BoundaryNotFound;
  425. const char* startPos = firstPart.toByteArray();
  426. int offset = 0;
  427. while(offset < totalLength)
  428. {
  429. if ((totalLength - offset) < (boundaryLen + 2))
  430. {//Do not check this line now since buffer size is not longer than boundary line.
  431. //The boundary line has two extra '-'s before the boundary ID
  432. if (contentNotRead > 0)
  433. {
  434. boundaryCheckState = PossibleBoundary;//a boundary line may be cut into two parts
  435. }
  436. break;
  437. }
  438. int lineLength = 0;
  439. int nextOffset = Utils::getLine(totalLength, offset, startPos, lineLength);
  440. //skip two extra '-' before checking the boundary
  441. if ((lineLength > 2) && (!Utils::strncasecmp(m_boundary.get(), startPos + offset + 2, boundaryLen)))
  442. {
  443. boundaryCheckState = BoundaryFound;//Found a m_boundary
  444. break;
  445. }
  446. offset = nextOffset;
  447. }
  448. if (boundaryCheckState == BoundaryNotFound)
  449. return false;
  450. offset -= 2;//the crlf in the front of the boundary line should not be included into file content
  451. remainder.append(totalLength - offset, startPos + offset);
  452. firstPart.setLength(offset);
  453. return (boundaryCheckState==BoundaryFound);
  454. }
  455. void CMimeMultiPart::readUploadFileName(MemoryBuffer& fileContent, StringBuffer& fileName)
  456. {
  457. int text_length = fileContent.length();
  458. if (text_length < 1)
  459. return;
  460. MemoryBuffer fileContentIn;
  461. fileContentIn.append(fileContent.length(), fileContent.toByteArray());
  462. char* text = (char*) fileContentIn.toByteArray();
  463. int oneline_len = 0;
  464. int cur_pos = 0;
  465. int next_pos = Utils::getLine(text_length, 0, text, oneline_len);
  466. const char* curline = text;
  467. int boundarylen = m_boundary.length();
  468. // Skip possible text before the first boundary
  469. while(next_pos < text_length && !(oneline_len >= 2 && !Utils::strncasecmp(m_boundary.get(), curline + 2, boundarylen)))
  470. {
  471. cur_pos = next_pos;
  472. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  473. curline = text + cur_pos;
  474. }
  475. // Parse all the mime parts
  476. while(next_pos < text_length)
  477. {
  478. cur_pos = next_pos;
  479. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  480. if(next_pos >= text_length)
  481. break;
  482. //parse the headers of one mime part
  483. while(next_pos < text_length && oneline_len != 0)
  484. {
  485. if(!Utils::strncasecmp(text + cur_pos, "Content-Disposition", strlen("Content-Disposition")))
  486. {
  487. StringBuffer cdisp;
  488. int namelen = strlen("Content-Disposition");
  489. cur_pos += namelen;
  490. while(cur_pos < next_pos && (text[cur_pos] == ' ' || text[cur_pos] == ':'))
  491. {
  492. cur_pos++;
  493. namelen++;
  494. }
  495. cdisp.append(oneline_len - namelen, text+cur_pos);
  496. if (cdisp.length() > 0)
  497. {
  498. char* ptr = (char*) cdisp.str();
  499. while (ptr && *ptr)
  500. {
  501. char* pptr = strchr(ptr, ';');
  502. if (Utils::strncasecmp(ptr, "filename=", 9))
  503. {
  504. if (!pptr)
  505. break;
  506. else
  507. {
  508. ptr = pptr + 1;
  509. while (ptr && (ptr[0] == ' ')) //skip space after ';'
  510. ptr++;
  511. }
  512. }
  513. else
  514. {
  515. fileName.append(ptr + 10); //filename="abc.txt"
  516. unsigned len = fileName.length() - 1;
  517. if (pptr)
  518. {
  519. len = pptr - ptr - 11;
  520. }
  521. fileName.remove(len, fileName.length() - len);
  522. break;
  523. }
  524. }
  525. }
  526. }
  527. cur_pos = next_pos;
  528. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  529. }
  530. // Got to the end of mulitpart message
  531. if(next_pos >= text_length)
  532. break;
  533. // Read in the content of one mime part
  534. cur_pos = next_pos;
  535. int bb = cur_pos;
  536. int be = text_length;
  537. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  538. if (next_pos < text_length)
  539. {
  540. const char* curline = text + cur_pos;
  541. while(next_pos < text_length && !(oneline_len >= 2 && !Utils::strncasecmp(m_boundary.get(), curline + 2, boundarylen)))
  542. {
  543. cur_pos = next_pos;
  544. next_pos = Utils::getLine(text_length, next_pos, text, oneline_len);
  545. curline = text + cur_pos;
  546. }
  547. // Get rid of CR/LF at the end of the content
  548. int be = cur_pos - 1;
  549. if(be >0 && (text[be] == '\r' || text[be] == '\n'))
  550. be--;
  551. }
  552. //If the body is empty, ignore it
  553. if(fileName.length() > 0)
  554. {
  555. fileContent.clear();
  556. if(be > bb)
  557. {
  558. fileContent.append(text_length - bb, text + bb);
  559. }
  560. break;
  561. }
  562. }
  563. return;
  564. }