fabfile.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import json
  5. CONFIG = json.load(open('config.json'))
  6. ## NOTES
  7. ## 1. This assumes that you have already created the S3 bucket whose name
  8. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  9. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  10. ## above as SHORT_PROJECT_NAME.
  11. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  12. ## SHORT_PROJECT_NAME/assets.
  13. ##### Imports ####################################
  14. import os
  15. import subprocess
  16. import copy
  17. from xmlrpclib import ServerProxy
  18. import boto
  19. import boto.s3.bucket
  20. import boto.s3.key
  21. from bs4 import BeautifulSoup
  22. from fabric.api import task, local
  23. ##### Start with checks ##########################
  24. for chapter in CONFIG['MARKDOWN_FILES']:
  25. assert (chapter['slug'].lower() == chapter['slug']), \
  26. "Slug must be lower case : {}".format(chapter['slug'])
  27. if str(os.environ.get('AWS_ENABLED')).lower() == 'false':
  28. AWS_ENABLED = False
  29. elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  30. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  31. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  32. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  33. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  34. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  35. AWS_ENABLED = True
  36. else:
  37. AWS_ENABLED = False
  38. print("NOTE: S3 uploading is disabled because of missing " +
  39. "AWS key environment variables.")
  40. # In my case, they are the same - 'files.swaroopch.com'
  41. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  42. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  43. # else
  44. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  45. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  46. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  47. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  48. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  49. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  50. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  51. and os.environ.get('WORDPRESS_USERNAME') is not None \
  52. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  53. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  54. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  55. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  56. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  57. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  58. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  59. WORDPRESS_ENABLED = True
  60. else:
  61. WORDPRESS_ENABLED = False
  62. print("NOTE: Wordpress uploading is disabled because of " +
  63. "missing environment variables.")
  64. ##### Helper methods #############################
  65. def _upload_to_s3(filename, key):
  66. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  67. conn = boto.connect_s3()
  68. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  69. k = boto.s3.key.Key(b)
  70. k.key = key
  71. k.set_contents_from_filename(filename)
  72. k.set_acl('public-read')
  73. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  74. print("Uploaded to S3 : {}".format(url))
  75. return url
  76. def upload_output_to_s3(filename):
  77. key = "{}/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  78. filename.split('/')[-1])
  79. return _upload_to_s3(filename, key)
  80. def upload_asset_to_s3(filename):
  81. key = "{}/assets/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  82. filename.split('/')[-1])
  83. return _upload_to_s3(filename, key)
  84. def replace_images_with_s3_urls(text):
  85. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  86. soup = BeautifulSoup(text)
  87. for image in soup.find_all('img'):
  88. image['src'] = upload_asset_to_s3(image['src'])
  89. return unicode(soup)
  90. def markdown_to_html(source_text, upload_assets_to_s3=False):
  91. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  92. args = ['pandoc',
  93. '-f', 'markdown',
  94. '-t', 'html5']
  95. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  96. output = p.communicate(source_text)[0]
  97. # http://wordpress.org/extend/plugins/raw-html/
  98. output = '<!--raw-->\n' + output + '\n<!--/raw-->'
  99. # NOTE: Also assumes that you have added the CSS from
  100. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  101. if upload_assets_to_s3:
  102. output = replace_images_with_s3_urls(output)
  103. return output
  104. def _wordpress_get_pages():
  105. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  106. print("Fetching list of pages from WordPress")
  107. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  108. os.environ['WORDPRESS_USERNAME'],
  109. os.environ['WORDPRESS_PASSWORD'],
  110. {
  111. 'post_type': 'page',
  112. 'number': pow(10, 5),
  113. })
  114. def wordpress_new_page(slug, title, content):
  115. """Create a new Wordpress page.
  116. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  117. https://codex.wordpress.org/Function_Reference/wp_insert_post
  118. http://docs.python.org/library/xmlrpclib.html
  119. """
  120. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  121. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  122. os.environ['WORDPRESS_USERNAME'],
  123. os.environ['WORDPRESS_PASSWORD'],
  124. {
  125. 'post_name': slug,
  126. 'post_content': content,
  127. 'post_title': title,
  128. 'post_parent':
  129. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  130. 'post_type': 'page',
  131. 'post_status': 'publish',
  132. 'comment_status': 'closed',
  133. 'ping_status': 'closed',
  134. })
  135. def wordpress_edit_page(post_id, title, content):
  136. """Edit a Wordpress page.
  137. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  138. https://codex.wordpress.org/Function_Reference/wp_insert_post
  139. http://docs.python.org/library/xmlrpclib.html
  140. """
  141. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  142. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  143. os.environ['WORDPRESS_USERNAME'],
  144. os.environ['WORDPRESS_PASSWORD'],
  145. post_id,
  146. {
  147. 'post_content': content,
  148. 'post_title': title,
  149. })
  150. def collect_header_anchors(chapter, i, all_headers):
  151. soup = BeautifulSoup(chapter['html'])
  152. for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
  153. if 'id' in header.attrs:
  154. all_headers[header['id']] = i
  155. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  156. """Fix links to other sections with Wordpress page URL."""
  157. soup = BeautifulSoup(chapter['html'])
  158. for link in soup.find_all('a'):
  159. if 'href' in link.attrs:
  160. if link['href'].startswith('#'):
  161. header_id = link['href'][1:]
  162. assert header_id in all_headers, \
  163. "#{} does not exist, referred in {}".format(
  164. header_id, chapter['file'])
  165. other_chapter = chapters[all_headers[header_id]]
  166. link['href'] = '{}#{}'.format(
  167. other_chapter['link'],
  168. header_id)
  169. chapter['html'] = unicode(soup)
  170. def add_previous_next_links(chapter, i, chapters):
  171. previous_link = None
  172. if i > 0:
  173. previous_link = chapters[i - 1]['link']
  174. next_link = None
  175. if i < len(chapters) - 1:
  176. next_link = chapters[i + 1]['link']
  177. if previous_link is not None or next_link is not None:
  178. chapter['html'] += "\n"
  179. if previous_link is not None:
  180. chapter['html'] += """\
  181. <a href="{}">&lArr; Previous chapter</a>\
  182. """.format(previous_link)
  183. if previous_link is not None and next_link is not None:
  184. chapter['html'] += '&nbsp;' * 5
  185. if next_link is not None:
  186. chapter['html'] += """\
  187. <a href="{}">Next chapter &rArr;</a>\
  188. """.format(next_link)
  189. ##### Tasks ######################################
  190. @task
  191. def wp():
  192. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  193. if WORDPRESS_ENABLED:
  194. chapters = copy.deepcopy(CONFIG['MARKDOWN_FILES'])
  195. # header anchor id -> index in MARKDOWN_FILES
  196. all_headers = {}
  197. # Render html
  198. print("Rendering html")
  199. for (i, chapter) in enumerate(chapters):
  200. chapter['html'] = markdown_to_html(open(chapter['file']).read(),
  201. upload_assets_to_s3=AWS_ENABLED)
  202. collect_header_anchors(chapter, i, all_headers)
  203. chapter['link'] = "{}/{}/{}".format(
  204. os.environ['WORDPRESS_BASE_URL'],
  205. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  206. chapter['slug'])
  207. # Fix cross-links
  208. for chapter in chapters:
  209. fix_links_to_other_chapters(chapter, chapters, all_headers)
  210. # Add previous and next links at end of html
  211. for (i, chapter) in enumerate(chapters):
  212. add_previous_next_links(chapter, i, chapters)
  213. # Fetch list of pages on the server and determine which already exist
  214. existing_pages = _wordpress_get_pages()
  215. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  216. def page_slug_to_id(slug):
  217. pages = [i for i in existing_pages if i.get('post_name') == slug]
  218. page = pages[0]
  219. return page['post_id']
  220. for chapter in chapters:
  221. if chapter['slug'] in existing_page_slugs:
  222. chapter['page_id'] = page_slug_to_id(chapter['slug'])
  223. # Send to WP
  224. print("Uploading to WordPress")
  225. for chapter in chapters:
  226. if chapter['slug'] in existing_page_slugs:
  227. print("Existing page: {}".format(chapter['link']))
  228. assert wordpress_edit_page(chapter['page_id'],
  229. chapter['title'],
  230. chapter['html'])
  231. else:
  232. print("New page: {}".format(chapter['link']))
  233. assert wordpress_new_page(chapter['slug'],
  234. chapter['title'],
  235. chapter['html'])
  236. @task
  237. def html():
  238. """HTML5 output."""
  239. args = ['pandoc',
  240. '-f', 'markdown',
  241. '-t', 'html5',
  242. '-o', '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  243. '-s',
  244. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  245. local(' '.join(args))
  246. local('open {}.html'.format(CONFIG['FULL_PROJECT_NAME']))
  247. @task
  248. def epub():
  249. """http://johnmacfarlane.net/pandoc/epub.html"""
  250. args = ['pandoc',
  251. '-f', 'markdown',
  252. '-t', 'epub',
  253. '-o', '{}.epub'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  254. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  255. # TODO --epub-cover-image
  256. # TODO --epub-metadata
  257. # TODO --epub-stylesheet
  258. local(' '.join(args))
  259. if AWS_ENABLED:
  260. upload_output_to_s3('{}.epub'.format(CONFIG['FULL_PROJECT_NAME']))
  261. @task
  262. def pdf():
  263. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  264. args = ['pandoc',
  265. '-f', 'markdown',
  266. # https://github.com/jgm/pandoc/issues/571
  267. #'-t', 'pdf',
  268. '-o', '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  269. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  270. local(' '.join(args))
  271. if AWS_ENABLED:
  272. upload_output_to_s3('{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']))
  273. @task
  274. def push():
  275. wp()
  276. epub()
  277. pdf()
  278. @task
  279. def docx():
  280. """OOXML document format."""
  281. args = ['pandoc',
  282. '-f', 'markdown',
  283. '-t', 'docx',
  284. '-o', '{}.docx'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  285. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  286. local(' '.join(args))
  287. if AWS_ENABLED:
  288. upload_output_to_s3('{}.docx'.format(CONFIG['FULL_PROJECT_NAME']))
  289. @task
  290. def odt():
  291. """OpenDocument document format."""
  292. args = ['pandoc',
  293. '-f', 'markdown',
  294. '-t', 'odt',
  295. '-o', '{}.odt'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  296. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  297. local(' '.join(args))
  298. if AWS_ENABLED:
  299. upload_output_to_s3('{}.odt'.format(CONFIG['FULL_PROJECT_NAME']))
  300. @task
  301. def clean():
  302. """Remove generated output files"""
  303. possible_outputs = (
  304. '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  305. '{}.epub'.format(CONFIG['FULL_PROJECT_NAME']),
  306. '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  307. '{}.docx'.format(CONFIG['FULL_PROJECT_NAME']),
  308. '{}.odt'.format(CONFIG['FULL_PROJECT_NAME']),
  309. )
  310. for filename in possible_outputs:
  311. if os.path.exists(filename):
  312. os.remove(filename)
  313. print("Removed {}".format(filename))