fabfile.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import json
  5. CONFIG = json.load(open('config.json'))
  6. ## NOTES
  7. ## 1. This assumes that you have already created the S3 bucket whose name
  8. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  9. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  10. ## above as SHORT_PROJECT_NAME.
  11. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  12. ## SHORT_PROJECT_NAME/assets.
  13. ##### Imports ####################################
  14. import os
  15. import datetime
  16. import subprocess
  17. import copy
  18. from xmlrpclib import ServerProxy
  19. import boto
  20. import boto.s3.bucket
  21. import boto.s3.key
  22. from bs4 import BeautifulSoup
  23. from fabric.api import task, local
  24. ##### Start with checks ##########################
  25. for chapter in CONFIG['MARKDOWN_FILES']:
  26. assert (chapter['slug'].lower() == chapter['slug']), \
  27. "Slug must be lower case : {}".format(chapter['slug'])
  28. if str(os.environ.get('AWS_ENABLED')).lower() == 'false':
  29. AWS_ENABLED = False
  30. elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  31. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  32. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  33. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  34. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  35. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  36. AWS_ENABLED = True
  37. else:
  38. AWS_ENABLED = False
  39. print("NOTE: S3 uploading is disabled because of missing " +
  40. "AWS key environment variables.")
  41. # In my case, they are the same - 'files.swaroopch.com'
  42. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  43. if AWS_ENABLED:
  44. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  45. #else
  46. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(
  47. #os.environ['AWS_S3_BUCKET_NAME'])
  48. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  49. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  50. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  51. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  52. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  53. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  54. and os.environ.get('WORDPRESS_USERNAME') is not None \
  55. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  56. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  57. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  58. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  59. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  60. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  61. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  62. WORDPRESS_ENABLED = True
  63. else:
  64. WORDPRESS_ENABLED = False
  65. print("NOTE: Wordpress uploading is disabled because of " +
  66. "missing environment variables.")
  67. ##### Helper methods #############################
  68. def _upload_to_s3(filename, key):
  69. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  70. conn = boto.connect_s3()
  71. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  72. k = boto.s3.key.Key(b)
  73. k.key = key
  74. k.set_contents_from_filename(filename)
  75. k.set_acl('public-read')
  76. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  77. print("Uploaded to S3 : {}".format(url))
  78. return url
  79. def upload_output_to_s3(filename):
  80. key = "{}/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  81. filename.split('/')[-1])
  82. return _upload_to_s3(filename, key)
  83. def upload_asset_to_s3(filename):
  84. key = "{}/assets/{}".format(CONFIG['SHORT_PROJECT_NAME'],
  85. filename.split('/')[-1])
  86. return _upload_to_s3(filename, key)
  87. def replace_images_with_s3_urls(text):
  88. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  89. soup = BeautifulSoup(text)
  90. for image in soup.find_all('img'):
  91. image['src'] = upload_asset_to_s3(image['src'])
  92. return unicode(soup)
  93. def markdown_to_html(source_text, upload_assets_to_s3=False):
  94. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  95. args = ['pandoc',
  96. '-f', 'markdown',
  97. '-t', 'html5']
  98. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  99. output = p.communicate(source_text)[0]
  100. # http://wordpress.org/extend/plugins/raw-html/
  101. output = '<!--raw-->\n' + output + '\n<!--/raw-->'
  102. # NOTE: Also assumes that you have added the CSS from
  103. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  104. if upload_assets_to_s3:
  105. output = replace_images_with_s3_urls(output)
  106. return output
  107. def _wordpress_get_pages():
  108. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  109. print("Fetching list of pages from WordPress")
  110. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  111. os.environ['WORDPRESS_USERNAME'],
  112. os.environ['WORDPRESS_PASSWORD'],
  113. {
  114. 'post_type': 'page',
  115. 'number': pow(10, 5),
  116. })
  117. def wordpress_new_page(slug, title, content):
  118. """Create a new Wordpress page.
  119. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  120. https://codex.wordpress.org/Function_Reference/wp_insert_post
  121. http://docs.python.org/library/xmlrpclib.html
  122. """
  123. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  124. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  125. os.environ['WORDPRESS_USERNAME'],
  126. os.environ['WORDPRESS_PASSWORD'],
  127. {
  128. 'post_name': slug,
  129. 'post_content': content,
  130. 'post_title': title,
  131. 'post_parent':
  132. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  133. 'post_type': 'page',
  134. 'post_status': 'publish',
  135. 'comment_status': 'closed',
  136. 'ping_status': 'closed',
  137. })
  138. def wordpress_edit_page(post_id, title, content):
  139. """Edit a Wordpress page.
  140. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  141. https://codex.wordpress.org/Function_Reference/wp_insert_post
  142. http://docs.python.org/library/xmlrpclib.html
  143. """
  144. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  145. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  146. os.environ['WORDPRESS_USERNAME'],
  147. os.environ['WORDPRESS_PASSWORD'],
  148. post_id,
  149. {
  150. 'post_content': content,
  151. 'post_title': title,
  152. })
  153. def collect_header_anchors(chapter, i, all_headers):
  154. soup = BeautifulSoup(chapter['html'])
  155. for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
  156. if 'id' in header.attrs:
  157. all_headers[header['id']] = i
  158. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  159. """Fix links to other sections with Wordpress page URL."""
  160. soup = BeautifulSoup(chapter['html'])
  161. for link in soup.find_all('a'):
  162. if 'href' in link.attrs:
  163. if link['href'].startswith('#'):
  164. header_id = link['href'][1:]
  165. assert header_id in all_headers, \
  166. "#{} does not exist, referred in {}".format(
  167. header_id, chapter['file'])
  168. other_chapter = chapters[all_headers[header_id]]
  169. link['href'] = '{}#{}'.format(
  170. other_chapter['link'],
  171. header_id)
  172. chapter['html'] = unicode(soup)
  173. def add_previous_next_links(chapter, i, chapters):
  174. previous_link = None
  175. if i > 0:
  176. previous_link = chapters[i - 1]['link']
  177. next_link = None
  178. if i < len(chapters) - 1:
  179. next_link = chapters[i + 1]['link']
  180. if previous_link is not None or next_link is not None:
  181. chapter['html'] += "\n"
  182. if previous_link is not None:
  183. chapter['html'] += """\
  184. <a href="{}">&lArr; Previous chapter</a>\
  185. """.format(previous_link)
  186. if previous_link is not None and next_link is not None:
  187. chapter['html'] += '&nbsp;' * 5
  188. if next_link is not None:
  189. chapter['html'] += """\
  190. <a href="{}">Next chapter &rArr;</a>\
  191. """.format(next_link)
  192. ##### Tasks ######################################
  193. @task
  194. def prepare():
  195. frontpage = CONFIG['MARKDOWN_FILES'][0]
  196. content = open(frontpage['file']).read()
  197. content = content.replace("$$date$$",
  198. datetime.datetime.now().strftime("%d %b %Y"))
  199. with open(frontpage['file'], 'w') as output:
  200. output.write(content)
  201. @task
  202. def wp():
  203. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  204. if WORDPRESS_ENABLED:
  205. chapters = copy.deepcopy(CONFIG['MARKDOWN_FILES'])
  206. # header anchor id -> index in MARKDOWN_FILES
  207. all_headers = {}
  208. # Render html
  209. print("Rendering html")
  210. for (i, chapter) in enumerate(chapters):
  211. chapter['html'] = markdown_to_html(open(chapter['file']).read(),
  212. upload_assets_to_s3=AWS_ENABLED)
  213. collect_header_anchors(chapter, i, all_headers)
  214. chapter['link'] = "{}/{}/{}".format(
  215. os.environ['WORDPRESS_BASE_URL'],
  216. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  217. chapter['slug'])
  218. # Fix cross-links
  219. for chapter in chapters:
  220. fix_links_to_other_chapters(chapter, chapters, all_headers)
  221. # Add previous and next links at end of html
  222. for (i, chapter) in enumerate(chapters):
  223. add_previous_next_links(chapter, i, chapters)
  224. # Fetch list of pages on the server and determine which already exist
  225. existing_pages = _wordpress_get_pages()
  226. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  227. def page_slug_to_id(slug):
  228. pages = [i for i in existing_pages if i.get('post_name') == slug]
  229. page = pages[0]
  230. return page['post_id']
  231. for chapter in chapters:
  232. if chapter['slug'] in existing_page_slugs:
  233. chapter['page_id'] = page_slug_to_id(chapter['slug'])
  234. # Send to WP
  235. print("Uploading to WordPress")
  236. for chapter in chapters:
  237. if chapter['slug'] in existing_page_slugs:
  238. print("Existing page: {}".format(chapter['link']))
  239. assert wordpress_edit_page(chapter['page_id'],
  240. chapter['title'],
  241. chapter['html'])
  242. else:
  243. print("New page: {}".format(chapter['link']))
  244. assert wordpress_new_page(chapter['slug'],
  245. chapter['title'],
  246. chapter['html'])
  247. @task
  248. def html():
  249. """HTML5 output."""
  250. args = ['pandoc',
  251. '-f', 'markdown',
  252. '-t', 'html5',
  253. '-o', '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  254. '-s',
  255. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  256. local(' '.join(args))
  257. local('firefox -new-tab {}.html'.format(CONFIG['FULL_PROJECT_NAME']))
  258. @task
  259. def epub():
  260. """http://johnmacfarlane.net/pandoc/epub.html"""
  261. args = ['pandoc',
  262. '-f', 'markdown',
  263. '-t', 'epub',
  264. '-o', '{}.epub'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  265. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  266. # TODO --epub-cover-image
  267. # TODO --epub-metadata
  268. # TODO --epub-stylesheet
  269. local(' '.join(args))
  270. if AWS_ENABLED:
  271. upload_output_to_s3('{}.epub'.format(CONFIG['FULL_PROJECT_NAME']))
  272. @task
  273. def pdf():
  274. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  275. args = ['pandoc',
  276. '-f', 'markdown',
  277. # https://github.com/jgm/pandoc/issues/571
  278. #'-t', 'pdf',
  279. '-o', '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  280. '--toc'] + [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  281. local(' '.join(args))
  282. if AWS_ENABLED:
  283. upload_output_to_s3('{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']))
  284. @task
  285. def push():
  286. wp()
  287. epub()
  288. pdf()
  289. @task
  290. def docx():
  291. """OOXML document format."""
  292. args = ['pandoc',
  293. '-f', 'markdown',
  294. '-t', 'docx',
  295. '-o', '{}.docx'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  296. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  297. local(' '.join(args))
  298. if AWS_ENABLED:
  299. upload_output_to_s3('{}.docx'.format(CONFIG['FULL_PROJECT_NAME']))
  300. @task
  301. def odt():
  302. """OpenDocument document format."""
  303. args = ['pandoc',
  304. '-f', 'markdown',
  305. '-t', 'odt',
  306. '-o', '{}.odt'.format(CONFIG['FULL_PROJECT_NAME'])] + \
  307. [i['file'] for i in CONFIG['MARKDOWN_FILES']]
  308. local(' '.join(args))
  309. if AWS_ENABLED:
  310. upload_output_to_s3('{}.odt'.format(CONFIG['FULL_PROJECT_NAME']))
  311. @task
  312. def clean():
  313. """Remove generated output files"""
  314. possible_outputs = (
  315. '{}.html'.format(CONFIG['FULL_PROJECT_NAME']),
  316. '{}.epub'.format(CONFIG['FULL_PROJECT_NAME']),
  317. '{}.pdf'.format(CONFIG['FULL_PROJECT_NAME']),
  318. '{}.docx'.format(CONFIG['FULL_PROJECT_NAME']),
  319. '{}.odt'.format(CONFIG['FULL_PROJECT_NAME']),
  320. )
  321. for filename in possible_outputs:
  322. if os.path.exists(filename):
  323. os.remove(filename)
  324. print("Removed {}".format(filename))