fabfile.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import json
  5. CONFIG = json.load(open('config.json'))
  6. ## NOTES
  7. ## 1. This assumes that you have already created the S3 bucket whose name
  8. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  9. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  10. ## above as SHORT_PROJECT_NAME.
  11. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  12. ## SHORT_PROJECT_NAME/assets.
  13. ##### Imports ####################################
  14. import os
  15. import subprocess
  16. import copy
  17. from xmlrpclib import ServerProxy
  18. import boto
  19. import boto.s3.bucket
  20. import boto.s3.key
  21. from bs4 import BeautifulSoup
  22. from fabric.api import task, local
  23. ##### Start with checks ##########################
  24. for chapter in CONFIG['MARKDOWN_FILES']:
  25. assert (chapter['slug'].lower() == chapter['slug']), \
  26. "Slug must be lower case : {}".format(chapter['slug'])
  27. if str(os.environ.get('AWS_ENABLED')).lower() == 'false':
  28. AWS_ENABLED = False
  29. elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  30. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  31. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  32. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  33. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  34. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  35. AWS_ENABLED = True
  36. else:
  37. AWS_ENABLED = False
  38. print("NOTE: S3 uploading is disabled because of missing " +
  39. "AWS key environment variables.")
  40. # In my case, they are the same - 'files.swaroopch.com'
  41. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  42. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  43. # else
  44. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  45. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  46. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  47. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  48. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  49. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  50. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  51. and os.environ.get('WORDPRESS_USERNAME') is not None \
  52. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  53. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  54. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  55. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  56. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  57. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  58. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  59. WORDPRESS_ENABLED = True
  60. else:
  61. WORDPRESS_ENABLED = False
  62. print("NOTE: Wordpress uploading is disabled because of " +
  63. "missing environment variables.")
  64. ##### Helper methods #############################
  65. def _upload_to_s3(filename, key):
  66. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  67. conn = boto.connect_s3()
  68. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  69. k = boto.s3.key.Key(b)
  70. k.key = key
  71. k.set_contents_from_filename(filename)
  72. k.set_acl('public-read')
  73. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  74. print("Uploaded to S3 : {}".format(url))
  75. return url
  76. def upload_output_to_s3(filename):
  77. key = "{}/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  78. filename.split('/')[-1])
  79. return _upload_to_s3(filename, key)
  80. def upload_asset_to_s3(filename):
  81. key = "{}/assets/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  82. filename.split('/')[-1])
  83. return _upload_to_s3(filename, key)
  84. def replace_images_with_s3_urls(text):
  85. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  86. soup = BeautifulSoup(text)
  87. for image in soup.find_all('img'):
  88. image['src'] = upload_asset_to_s3(image['src'])
  89. return unicode(soup)
  90. def markdown_to_html(source_text, upload_assets_to_s3=False):
  91. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  92. args = ['pandoc',
  93. '-f', 'markdown',
  94. '-t', 'html5']
  95. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  96. output = p.communicate(source_text)[0]
  97. # http://wordpress.org/extend/plugins/raw-html/
  98. output = '<!--raw-->\n' + output + '\n<!--/raw-->'
  99. # NOTE: Also assumes that you have added the CSS from
  100. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  101. if upload_assets_to_s3:
  102. output = replace_images_with_s3_urls(output)
  103. return output
  104. def _wordpress_get_pages():
  105. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  106. print("Fetching list of pages from WP")
  107. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  108. os.environ['WORDPRESS_USERNAME'],
  109. os.environ['WORDPRESS_PASSWORD'],
  110. {
  111. 'post_type': 'page',
  112. 'number': pow(10, 5),
  113. })
  114. def wordpress_new_page(slug, title, content):
  115. """Create a new Wordpress page.
  116. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  117. https://codex.wordpress.org/Function_Reference/wp_insert_post
  118. http://docs.python.org/library/xmlrpclib.html
  119. """
  120. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  121. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  122. os.environ['WORDPRESS_USERNAME'],
  123. os.environ['WORDPRESS_PASSWORD'],
  124. {
  125. 'post_name': slug,
  126. 'post_content': content,
  127. 'post_title': title,
  128. 'post_parent':
  129. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  130. 'post_type': 'page',
  131. 'post_status': 'publish',
  132. 'comment_status': 'closed',
  133. 'ping_status': 'closed',
  134. })
  135. def wordpress_edit_page(post_id, title, content):
  136. """Edit a Wordpress page.
  137. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  138. https://codex.wordpress.org/Function_Reference/wp_insert_post
  139. http://docs.python.org/library/xmlrpclib.html
  140. """
  141. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  142. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  143. os.environ['WORDPRESS_USERNAME'],
  144. os.environ['WORDPRESS_PASSWORD'],
  145. post_id,
  146. {
  147. 'post_content': content,
  148. 'post_title': title,
  149. })
  150. def collect_header_anchors(chapter, i, all_headers):
  151. soup = BeautifulSoup(chapter['html'])
  152. for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
  153. if 'id' in header.attrs:
  154. all_headers[header['id']] = i
  155. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  156. """Fix links to other sections with Wordpress page URL."""
  157. soup = BeautifulSoup(chapter['html'])
  158. for link in soup.find_all('a'):
  159. if 'href' in link.attrs:
  160. if link['href'].startswith('#'):
  161. header_id = link['href'][1:]
  162. assert header_id in all_headers, \
  163. "#{} does not exist, referred in {}".format(
  164. header_id, chapter['file'])
  165. other_chapter = chapters[all_headers[header_id]]
  166. link['href'] = '{}#{}'.format(
  167. other_chapter['link'],
  168. header_id)
  169. chapter['html'] = unicode(soup)
  170. ##### Tasks ######################################
  171. @task
  172. def wp():
  173. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  174. if WORDPRESS_ENABLED:
  175. chapters = copy.deepcopy(CONFIG['MARKDOWN_FILES'])
  176. # header anchor id -> index in MARKDOWN_FILES
  177. all_headers = {}
  178. # Render html
  179. print("Rendering html")
  180. for (i, chapter) in enumerate(chapters):
  181. chapter['html'] = markdown_to_html(open(chapter['file']).read(),
  182. upload_assets_to_s3=AWS_ENABLED)
  183. collect_header_anchors(chapter, i, all_headers)
  184. chapter['link'] = "{}/{}/{}".format(
  185. os.environ['WORDPRESS_BASE_URL'],
  186. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  187. chapter['slug'])
  188. # Fix cross-links
  189. for chapter in chapters:
  190. fix_links_to_other_chapters(chapter, chapters, all_headers)
  191. # Add previous and next links at end of html
  192. for (i, chapter) in enumerate(chapters):
  193. previous_link = None
  194. if i > 0:
  195. previous_link = chapters[i - 1]['link']
  196. next_link = None
  197. if i < len(chapters) - 1:
  198. next_link = chapters[i + 1]['link']
  199. if previous_link is not None or next_link is not None:
  200. chapter['html'] += "\n"
  201. if previous_link is not None:
  202. chapter['html'] += """\
  203. <a href="{}">&lArr; Previous chapter</a>\
  204. """.format(previous_link)
  205. if previous_link is not None and next_link is not None:
  206. chapter['html'] += '&nbsp;' * 5
  207. if next_link is not None:
  208. chapter['html'] += """\
  209. <a href="{}">Next chapter &rArr;</a>\
  210. """.format(next_link)
  211. # Fetch list of pages on the server and determine which already exist
  212. existing_pages = _wordpress_get_pages()
  213. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  214. def page_slug_to_id(slug):
  215. pages = [i for i in existing_pages if i.get('post_name') == slug]
  216. page = pages[0]
  217. return page['post_id']
  218. for chapter in chapters:
  219. if chapter['slug'] in existing_page_slugs:
  220. chapter['page_id'] = page_slug_to_id(chapter['slug'])
  221. # Send to WP
  222. print("Uploading to WordPress")
  223. for chapter in chapters:
  224. if chapter['slug'] in existing_page_slugs:
  225. print("Existing page: {}".format(chapter['link']))
  226. assert wordpress_edit_page(chapter['page_id'],
  227. chapter['title'],
  228. chapter['html'])
  229. else:
  230. print("New page: {}".format(chapter['link']))
  231. assert wordpress_new_page(chapter['slug'],
  232. chapter['title'],
  233. chapter['html'])
  234. @task
  235. def html():
  236. """HTML5 output."""
  237. args = ['pandoc',
  238. '-f', 'markdown',
  239. '-t', 'html5',
  240. '-o', '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  241. '-s',
  242. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  243. local(' '.join(args))
  244. local('open {}.html'.format(CONFIG['FULL_PROJECT_NAME']))
  245. @task
  246. def epub():
  247. """http://johnmacfarlane.net/pandoc/epub.html"""
  248. args = ['pandoc',
  249. '-f', 'markdown',
  250. '-t', 'epub',
  251. '-o', '{}.epub'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  252. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  253. # TODO --epub-cover-image
  254. # TODO --epub-metadata
  255. # TODO --epub-stylesheet
  256. local(' '.join(args))
  257. if AWS_ENABLED:
  258. upload_output_to_s3('{}.epub'.format(CONFIG['FULL_PROJECT_NAME']))
  259. @task
  260. def pdf():
  261. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  262. args = ['pandoc',
  263. '-f', 'markdown',
  264. # https://github.com/jgm/pandoc/issues/571
  265. #'-t', 'pdf',
  266. '-o', '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  267. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  268. local(' '.join(args))
  269. if AWS_ENABLED:
  270. upload_output_to_s3('{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']))
  271. @task
  272. def docx():
  273. """OOXML document format."""
  274. args = ['pandoc',
  275. '-f', 'markdown',
  276. '-t', 'docx',
  277. '-o', '{}.docx'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  278. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  279. local(' '.join(args))
  280. if AWS_ENABLED:
  281. upload_output_to_s3('{}.docx'.format(CONFIG['FULL_PROJECT_NAME']))
  282. @task
  283. def odt():
  284. """OpenDocument document format."""
  285. args = ['pandoc',
  286. '-f', 'markdown',
  287. '-t', 'odt',
  288. '-o', '{}.odt'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  289. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  290. local(' '.join(args))
  291. if AWS_ENABLED:
  292. upload_output_to_s3('{}.odt'.format(CONFIG['FULL_PROJECT_NAME']))
  293. @task
  294. def clean():
  295. """Remove generated output files"""
  296. possible_outputs = (
  297. '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  298. '{}.epub'.format(CONFIG['FULL_PROJECT_NAME']),
  299. '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  300. '{}.docx'.format(CONFIG['FULL_PROJECT_NAME']),
  301. '{}.odt'.format(CONFIG['FULL_PROJECT_NAME']),
  302. )
  303. for filename in possible_outputs:
  304. if os.path.exists(filename):
  305. os.remove(filename)
  306. print("Removed {}".format(filename))