fabfile.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. SHORT_PROJECT_NAME = 'python'
  5. FULL_PROJECT_NAME = 'byte_of_{}'.format(SHORT_PROJECT_NAME)
  6. # NOTE Slugs MUST be lower-case
  7. MARKDOWN_FILES = [
  8. {
  9. 'file': '01-frontpage.md',
  10. 'slug': "python",
  11. 'title': "Python",
  12. },
  13. ]
  14. ## NOTES
  15. ## 1. This assumes that you have already created the S3 bucket whose name
  16. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  17. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  18. ## above as SHORT_PROJECT_NAME.
  19. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  20. ## SHORT_PROJECT_NAME/assets.
  21. ##### Imports ####################################
  22. import os
  23. import glob
  24. import subprocess
  25. try:
  26. from xmlrpc.client import ServerProxy
  27. except ImportError:
  28. from xmlrpclib import ServerProxy
  29. from pprint import pprint
  30. import boto
  31. import boto.s3.bucket
  32. import boto.s3.key
  33. from bs4 import BeautifulSoup
  34. from fabric.api import task, local
  35. ##### Start with checks ##########################
  36. for chapter in MARKDOWN_FILES:
  37. assert (chapter['slug'].lower() == chapter['slug']), \
  38. "Slug must be lower case : {}".format(chapter['slug'])
  39. if os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  40. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  41. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  42. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  43. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  44. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  45. AWS_ENABLED = True
  46. else:
  47. AWS_ENABLED = False
  48. print("NOTE: S3 uploading is disabled because of missing " +
  49. "AWS key environment variables.")
  50. # In my case, they are the same - 'files.swaroopch.com'
  51. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  52. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  53. # else
  54. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  55. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  56. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  57. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  58. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  59. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  60. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  61. and os.environ.get('WORDPRESS_USERNAME') is not None \
  62. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  63. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  64. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  65. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  66. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  67. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  68. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  69. WORDPRESS_ENABLED = True
  70. else:
  71. WORDPRESS_ENABLED = False
  72. print("NOTE: Wordpress uploading is disabled because of " +
  73. "missing environment variables.")
  74. ##### Helper methods #############################
  75. def _upload_to_s3(filename, key):
  76. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  77. conn = boto.connect_s3()
  78. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  79. k = boto.s3.key.Key(b)
  80. k.key = key
  81. k.set_contents_from_filename(filename)
  82. k.set_acl('public-read')
  83. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  84. print("Uploaded to S3 : {}".format(url))
  85. return url
  86. def upload_output_to_s3(filename):
  87. key = "{}/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  88. return _upload_to_s3(filename, key)
  89. def upload_asset_to_s3(filename):
  90. key = "{}/assets/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  91. return _upload_to_s3(filename, key)
  92. def replace_images_with_s3_urls(text):
  93. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  94. soup = BeautifulSoup(text)
  95. for image in soup.find_all('img'):
  96. image['src'] = upload_asset_to_s3(image['src'])
  97. return soup.prettify()
  98. def markdown_to_html(source_text, upload_assets_to_s3=False):
  99. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  100. args = ['pandoc',
  101. '-f', 'markdown',
  102. '-t', 'html5',
  103. '-S']
  104. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  105. output = p.communicate(source_text)[0]
  106. if upload_assets_to_s3:
  107. output = replace_images_with_s3_urls(output)
  108. return output
  109. def _wordpress_get_pages():
  110. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  111. print("(Fetching list of pages from WP)")
  112. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  113. os.environ['WORDPRESS_USERNAME'],
  114. os.environ['WORDPRESS_PASSWORD'],
  115. {
  116. 'post_type': 'page',
  117. 'number': pow(10, 5),
  118. })
  119. def wordpress_new_page(slug, title, content):
  120. """Create a new Wordpress page.
  121. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  122. https://codex.wordpress.org/Function_Reference/wp_insert_post
  123. http://docs.python.org/library/xmlrpclib.html
  124. """
  125. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  126. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  127. os.environ['WORDPRESS_USERNAME'],
  128. os.environ['WORDPRESS_PASSWORD'],
  129. {
  130. 'post_name': slug,
  131. 'post_content': content,
  132. 'post_title': title,
  133. 'post_parent':
  134. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  135. 'post_type': 'page',
  136. 'post_status': 'publish',
  137. 'comment_status': 'closed',
  138. 'ping_status': 'closed',
  139. })
  140. def wordpress_edit_page(post_id, content):
  141. """Edit a Wordpress page.
  142. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  143. https://codex.wordpress.org/Function_Reference/wp_insert_post
  144. http://docs.python.org/library/xmlrpclib.html
  145. """
  146. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  147. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  148. os.environ['WORDPRESS_USERNAME'],
  149. os.environ['WORDPRESS_PASSWORD'],
  150. post_id,
  151. {
  152. 'post_content': content,
  153. })
  154. ##### Tasks ######################################
  155. @task
  156. def wp():
  157. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  158. if WORDPRESS_ENABLED:
  159. existing_pages = _wordpress_get_pages()
  160. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  161. def page_slug_to_id(slug):
  162. pages = [i for i in existing_pages if i.get('post_name') == slug]
  163. page = pages[0]
  164. return page['post_id']
  165. for chapter in MARKDOWN_FILES:
  166. html = markdown_to_html(open(chapter['file']).read(),
  167. upload_assets_to_s3=True)
  168. # TODO Add previous and next links at end of html
  169. if chapter['slug'] in existing_page_slugs:
  170. page_id = page_slug_to_id(chapter['slug'])
  171. print("Existing page to be updated: {} : {}".format(
  172. chapter['slug'],
  173. page_id))
  174. result = wordpress_edit_page(page_id, html)
  175. print("Result: {}".format(result))
  176. else:
  177. print("New page to be created: {}".format(chapter['slug']))
  178. result = wordpress_new_page(chapter['slug'],
  179. chapter['title'],
  180. html)
  181. print("Result: {}".format(result))
  182. page_url = "{}/{}/{}".format(os.environ['WORDPRESS_BASE_URL'],
  183. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  184. chapter['slug'])
  185. print(page_url)
  186. print()
  187. @task
  188. def html():
  189. """HTML5 output."""
  190. args = ['pandoc',
  191. '-f', 'markdown',
  192. '-t', 'html5',
  193. '-o', '{}.html'.format(FULL_PROJECT_NAME),
  194. '-S',
  195. '-s',
  196. '--toc'] + [i['file'] for i in MARKDOWN_FILES]
  197. local(' '.join(args))
  198. local('open {}.html'.format(FULL_PROJECT_NAME))
  199. @task
  200. def epub():
  201. """http://johnmacfarlane.net/pandoc/epub.html"""
  202. args = ['pandoc',
  203. '-f', 'markdown',
  204. '-t', 'epub',
  205. '-o', '{}.epub'.format(FULL_PROJECT_NAME),
  206. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  207. # TODO --epub-cover-image
  208. # TODO --epub-metadata
  209. # TODO --epub-stylesheet
  210. local(' '.join(args))
  211. if AWS_ENABLED:
  212. upload_output_to_s3('{}.epub'.format(FULL_PROJECT_NAME))
  213. @task
  214. def pdf():
  215. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  216. args = ['pandoc',
  217. '-f', 'markdown',
  218. # https://github.com/jgm/pandoc/issues/571
  219. #'-t', 'pdf',
  220. '-o', '{}.pdf'.format(FULL_PROJECT_NAME),
  221. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  222. local(' '.join(args))
  223. if AWS_ENABLED:
  224. upload_output_to_s3('{}.pdf'.format(FULL_PROJECT_NAME))
  225. @task
  226. def docx():
  227. """OOXML document format."""
  228. args = ['pandoc',
  229. '-f', 'markdown',
  230. '-t', 'docx',
  231. '-o', '{}.docx'.format(FULL_PROJECT_NAME),
  232. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  233. local(' '.join(args))
  234. if AWS_ENABLED:
  235. upload_output_to_s3('{}.docx'.format(FULL_PROJECT_NAME))
  236. @task
  237. def odt():
  238. """OpenDocument document format."""
  239. args = ['pandoc',
  240. '-f', 'markdown',
  241. '-t', 'odt',
  242. '-o', '{}.odt'.format(FULL_PROJECT_NAME),
  243. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  244. local(' '.join(args))
  245. if AWS_ENABLED:
  246. upload_output_to_s3('{}.odt'.format(FULL_PROJECT_NAME))
  247. @task
  248. def clean():
  249. """Remove generated output files"""
  250. possible_outputs = (
  251. '{}.html'.format(FULL_PROJECT_NAME),
  252. '{}.epub'.format(FULL_PROJECT_NAME),
  253. '{}.pdf'.format(FULL_PROJECT_NAME),
  254. '{}.docx'.format(FULL_PROJECT_NAME),
  255. '{}.odt'.format(FULL_PROJECT_NAME),
  256. )
  257. for filename in possible_outputs:
  258. if os.path.exists(filename):
  259. os.remove(filename)
  260. print("Removed {}".format(filename))