fabfile.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #!/usr/bin/env python
  2. import os
  3. import glob
  4. import subprocess
  5. try:
  6. from xmlrpc.client import ServerProxy
  7. except ImportError:
  8. from xmlrpclib import ServerProxy
  9. from pprint import pprint
  10. import boto
  11. import boto.s3.bucket
  12. import boto.s3.key
  13. from bs4 import BeautifulSoup
  14. from fabric.api import task, local
  15. ##################################################
  16. SHORT_PROJECT_NAME = 'python'
  17. FULL_PROJECT_NAME = 'byte_of_{}'.format(SHORT_PROJECT_NAME)
  18. # NOTE Slugs MUST be lower-case
  19. MARKDOWN_FILES = [
  20. {
  21. 'file' : '01-frontpage.md',
  22. 'slug' : "python",
  23. 'title' : "Python",
  24. },
  25. {
  26. 'file' : '02-table-of-contents.md',
  27. 'slug' : "python_en-table_of_contents",
  28. 'title' : "Table of Contents of A Byte of Python",
  29. },
  30. ]
  31. ## NOTES
  32. ## 1. This assumes that you have already created the S3 bucket whose name is stored in
  33. ## AWS_S3_BUCKET_NAME environment variable.
  34. ## 2. Under that S3 bucket, you have created a folder whose name is stored above as
  35. ## SHORT_PROJECT_NAME.
  36. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  37. ## SHORT_PROJECT_NAME/assets.
  38. ##################################################
  39. for chapter in MARKDOWN_FILES:
  40. assert (chapter['slug'].lower() == chapter['slug']), "Slug must be lower case : {}".format(chapter['slug'])
  41. if os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  42. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  43. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  44. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  45. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  46. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  47. AWS_ENABLED = True
  48. else:
  49. AWS_ENABLED = False
  50. print("NOTE: S3 uploading is disabled because of missing AWS key environment variables.")
  51. # In my case, they are the same - 'files.swaroopch.com'
  52. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  53. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  54. # else
  55. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  56. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  57. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  58. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  59. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  60. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  61. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  62. and os.environ.get('WORDPRESS_USERNAME') is not None \
  63. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  64. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  65. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  66. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  67. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  68. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  69. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  70. WORDPRESS_ENABLED = True
  71. else:
  72. WORDPRESS_ENABLED = False
  73. print("NOTE: Wordpress uploading is disabled because of missing environment variables.")
  74. ##################################################
  75. def _upload_to_s3(filename, key):
  76. conn = boto.connect_s3()
  77. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  78. k = boto.s3.key.Key(b)
  79. k.key = key
  80. k.set_contents_from_filename(filename)
  81. k.set_acl('public-read')
  82. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  83. print("Uploaded to S3 : {}".format(url))
  84. return url
  85. def upload_output_to_s3(filename):
  86. key = "{}/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  87. return _upload_to_s3(filename, key)
  88. def upload_asset_to_s3(filename):
  89. key = "{}/assets/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  90. return _upload_to_s3(filename, key)
  91. def replace_images_with_s3_urls(text):
  92. soup = BeautifulSoup(text)
  93. for image in soup.find_all('img'):
  94. image['src'] = upload_asset_to_s3(image['src'])
  95. return soup.prettify()
  96. def markdown_to_html(source_text, upload_assets_to_s3=False):
  97. """Convert from Markdown to HTML; upload images to S3.
  98. http://www.crummy.com/software/BeautifulSoup/bs4/doc/
  99. """
  100. args = ['pandoc',
  101. '-f', 'markdown',
  102. '-t', 'html',
  103. '-S']
  104. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  105. output = p.communicate(source_text)[0]
  106. if upload_assets_to_s3:
  107. output = replace_images_with_s3_urls(output)
  108. return output
  109. def _wordpress_get_pages():
  110. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  111. print("(Fetching list of pages from WP)")
  112. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  113. os.environ['WORDPRESS_USERNAME'],
  114. os.environ['WORDPRESS_PASSWORD'],
  115. {
  116. 'post_type' : 'page',
  117. 'number' : pow(10, 5),
  118. })
  119. def wordpress_new_page(slug, title, content):
  120. """Create a new Wordpress page.
  121. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  122. https://codex.wordpress.org/Function_Reference/wp_insert_post
  123. http://docs.python.org/library/xmlrpclib.html
  124. """
  125. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  126. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  127. os.environ['WORDPRESS_USERNAME'],
  128. os.environ['WORDPRESS_PASSWORD'],
  129. {
  130. 'post_name' : slug,
  131. 'post_content' : content,
  132. 'post_title' : title,
  133. 'post_parent' : os.environ['WORDPRESS_PARENT_PAGE_ID'],
  134. 'post_type' : 'page',
  135. 'post_status' : 'publish',
  136. 'comment_status' : 'closed',
  137. 'ping_status' : 'closed',
  138. })
  139. def wordpress_edit_page(post_id, content):
  140. """Edit a Wordpress page.
  141. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  142. https://codex.wordpress.org/Function_Reference/wp_insert_post
  143. http://docs.python.org/library/xmlrpclib.html
  144. """
  145. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  146. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  147. os.environ['WORDPRESS_USERNAME'],
  148. os.environ['WORDPRESS_PASSWORD'],
  149. post_id,
  150. {
  151. 'post_content' : content,
  152. })
  153. @task
  154. def wp():
  155. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  156. if WORDPRESS_ENABLED:
  157. existing_pages = _wordpress_get_pages()
  158. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  159. for chapter in MARKDOWN_FILES:
  160. html = markdown_to_html(open(chapter['file']).read(), upload_assets_to_s3=True)
  161. if chapter['slug'] in existing_page_slugs:
  162. page_id = [i for i in existing_pages \
  163. if i.get('post_name') == chapter['slug']] \
  164. [0] \
  165. ['post_id']
  166. print("Existing page to be updated: {} : {}".format(chapter['slug'], page_id))
  167. result = wordpress_edit_page(page_id, html)
  168. print("Result: {}".format(result))
  169. else:
  170. print("New page to be created: {}".format(chapter['slug']))
  171. result = wordpress_new_page(chapter['slug'], chapter['title'], html)
  172. print("Result: {}".format(result))
  173. page_url = "{}/{}/{}".format(os.environ['WORDPRESS_BASE_URL'],
  174. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  175. chapter['slug'])
  176. print(page_url)
  177. print
  178. @task
  179. def epub():
  180. """http://johnmacfarlane.net/pandoc/epub.html"""
  181. args = ['pandoc',
  182. '-f', 'markdown',
  183. '-t', 'epub',
  184. '-o', '{}.epub'.format(FULL_PROJECT_NAME),
  185. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  186. local(' '.join(args))
  187. if AWS_ENABLED:
  188. upload_output_to_s3('{}.epub'.format(FULL_PROJECT_NAME))
  189. @task
  190. def pdf():
  191. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  192. args = ['pandoc',
  193. '-f', 'markdown',
  194. ##'-t', 'pdf', # Intentionally commented out due to https://github.com/jgm/pandoc/issues/571
  195. '-o', '{}.pdf'.format(FULL_PROJECT_NAME),
  196. '-S'] + [i['file'] for i in MARKDOWN_FILES]
  197. local(' '.join(args))
  198. if AWS_ENABLED:
  199. upload_output_to_s3('{}.pdf'.format(FULL_PROJECT_NAME))
  200. POSSIBLE_OUTPUTS = (
  201. '{}.epub'.format(FULL_PROJECT_NAME),
  202. '{}.pdf'.format(FULL_PROJECT_NAME),
  203. )
  204. @task
  205. def clean():
  206. """Remove generated output files"""
  207. for filename in POSSIBLE_OUTPUTS:
  208. if os.path.exists(filename):
  209. os.remove(filename)
  210. print("Removed {}".format(filename))