fabfile.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. SHORT_PROJECT_NAME = 'python'
  5. FULL_PROJECT_NAME = 'byte_of_{}'.format(SHORT_PROJECT_NAME)
  6. # NOTE Slugs MUST be lower-case
  7. MARKDOWN_FILES = [
  8. {
  9. 'file': '01-frontpage.pd',
  10. 'slug': "python",
  11. 'title': "Python",
  12. },
  13. {
  14. 'file': '02-preface.pd',
  15. 'slug': "python_en-preface",
  16. 'title': "Python : Preface",
  17. },
  18. {
  19. 'file': '03-intro.pd',
  20. 'slug': "python_en-introduction",
  21. 'title': "Python : Introduction",
  22. },
  23. {
  24. 'file': '04-installation.pd',
  25. 'slug': "python_en-installation",
  26. 'title': "Python : Installation",
  27. },
  28. {
  29. 'file': '05-first-steps.pd',
  30. 'slug': "python_en-first_steps",
  31. 'title': "Python : First Steps",
  32. },
  33. {
  34. 'file': '06-basics.pd',
  35. 'slug': "python_en-basics",
  36. 'title': "Python : Basics",
  37. },
  38. {
  39. 'file': '07-operators-expressions.pd',
  40. 'slug': "python_en-operators_and_expressions",
  41. 'title': "Python : Operators and Expressions",
  42. },
  43. {
  44. 'file': '08-control-flow.pd',
  45. 'slug': "python_en-control_flow",
  46. 'title': "Python : Control Flow",
  47. },
  48. {
  49. 'file': '09-functions.pd',
  50. 'slug': "python_en-functions",
  51. 'title': "Python : Functions",
  52. },
  53. {
  54. 'file': '10-modules.pd',
  55. 'slug': "python_en-modules",
  56. 'title': "Python : Modules",
  57. },
  58. {
  59. 'file': '11-data-structures.pd',
  60. 'slug': "python_en-data_structures",
  61. 'title': "Python : Data Structures",
  62. },
  63. {
  64. 'file': '12-problem-solving.pd',
  65. 'slug': "python_en-problem_solving",
  66. 'title': "Python : Problem Solving",
  67. },
  68. {
  69. 'file': '13-oop.pd',
  70. 'slug': "python_en-object_oriented_programming",
  71. 'title': "Python : Object Oriented Programming",
  72. },
  73. {
  74. 'file': '14-io.pd',
  75. 'slug': "python_en-input_output",
  76. 'title': "Python : Input Output",
  77. },
  78. {
  79. 'file': '15-exceptions.pd',
  80. 'slug': "python_en-exceptions",
  81. 'title': "Python : Exceptions",
  82. },
  83. {
  84. 'file': '16-standard-library.pd',
  85. 'slug': "python_en-standard_library",
  86. 'title': "Python : Standard Library",
  87. },
  88. {
  89. 'file': '17-more.pd',
  90. 'slug': "python_en-more",
  91. 'title': "Python : More",
  92. },
  93. {
  94. 'file': '18-what-next.pd',
  95. 'slug': "python_en-what_next",
  96. 'title': "Python : What Next",
  97. },
  98. {
  99. 'file': '19-appendix-floss.pd',
  100. 'slug': "python_en-appendix_floss",
  101. 'title': "Python : Appendix : FLOSS",
  102. },
  103. {
  104. 'file': '20-appendix-about.pd',
  105. 'slug': "python_en-appendix_about",
  106. 'title': "Python : Appendix : About",
  107. },
  108. {
  109. 'file': '21-revision-history.pd',
  110. 'slug': "python_en-appendix_revision_history",
  111. 'title': "Python : Appendix : Revision History",
  112. },
  113. ]
  114. ## NOTES
  115. ## 1. This assumes that you have already created the S3 bucket whose name
  116. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  117. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  118. ## above as SHORT_PROJECT_NAME.
  119. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  120. ## SHORT_PROJECT_NAME/assets.
  121. ##### Imports ####################################
  122. import os
  123. import subprocess
  124. import copy
  125. from xmlrpclib import ServerProxy
  126. import boto
  127. import boto.s3.bucket
  128. import boto.s3.key
  129. from bs4 import BeautifulSoup
  130. from fabric.api import task, local
  131. ##### Start with checks ##########################
  132. for chapter in MARKDOWN_FILES:
  133. assert (chapter['slug'].lower() == chapter['slug']), \
  134. "Slug must be lower case : {}".format(chapter['slug'])
  135. if str(os.environ.get('AWS_ENABLED')).lower() == 'false':
  136. AWS_ENABLED = False
  137. elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  138. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  139. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  140. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  141. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  142. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  143. AWS_ENABLED = True
  144. else:
  145. AWS_ENABLED = False
  146. print("NOTE: S3 uploading is disabled because of missing " +
  147. "AWS key environment variables.")
  148. # In my case, they are the same - 'files.swaroopch.com'
  149. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  150. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  151. # else
  152. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  153. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  154. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  155. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  156. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  157. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  158. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  159. and os.environ.get('WORDPRESS_USERNAME') is not None \
  160. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  161. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  162. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  163. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  164. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  165. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  166. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  167. WORDPRESS_ENABLED = True
  168. else:
  169. WORDPRESS_ENABLED = False
  170. print("NOTE: Wordpress uploading is disabled because of " +
  171. "missing environment variables.")
  172. ##### Helper methods #############################
  173. def _upload_to_s3(filename, key):
  174. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  175. conn = boto.connect_s3()
  176. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  177. k = boto.s3.key.Key(b)
  178. k.key = key
  179. k.set_contents_from_filename(filename)
  180. k.set_acl('public-read')
  181. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  182. print("Uploaded to S3 : {}".format(url))
  183. return url
  184. def upload_output_to_s3(filename):
  185. key = "{}/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  186. return _upload_to_s3(filename, key)
  187. def upload_asset_to_s3(filename):
  188. key = "{}/assets/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  189. return _upload_to_s3(filename, key)
  190. def replace_images_with_s3_urls(text):
  191. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  192. soup = BeautifulSoup(text)
  193. for image in soup.find_all('img'):
  194. image['src'] = upload_asset_to_s3(image['src'])
  195. return unicode(soup)
  196. def markdown_to_html(source_text, upload_assets_to_s3=False):
  197. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  198. args = ['pandoc',
  199. '-f', 'markdown',
  200. '-t', 'html5']
  201. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  202. output = p.communicate(source_text)[0]
  203. # http://wordpress.org/extend/plugins/raw-html/
  204. output = '<!--raw-->\n' + output + '\n<!--/raw-->'
  205. # NOTE: Also assumes that you have added the CSS from
  206. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  207. if upload_assets_to_s3:
  208. output = replace_images_with_s3_urls(output)
  209. return output
  210. def _wordpress_get_pages():
  211. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  212. print("Fetching list of pages from WP")
  213. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  214. os.environ['WORDPRESS_USERNAME'],
  215. os.environ['WORDPRESS_PASSWORD'],
  216. {
  217. 'post_type': 'page',
  218. 'number': pow(10, 5),
  219. })
  220. def wordpress_new_page(slug, title, content):
  221. """Create a new Wordpress page.
  222. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  223. https://codex.wordpress.org/Function_Reference/wp_insert_post
  224. http://docs.python.org/library/xmlrpclib.html
  225. """
  226. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  227. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  228. os.environ['WORDPRESS_USERNAME'],
  229. os.environ['WORDPRESS_PASSWORD'],
  230. {
  231. 'post_name': slug,
  232. 'post_content': content,
  233. 'post_title': title,
  234. 'post_parent':
  235. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  236. 'post_type': 'page',
  237. 'post_status': 'publish',
  238. 'comment_status': 'closed',
  239. 'ping_status': 'closed',
  240. })
  241. def wordpress_edit_page(post_id, title, content):
  242. """Edit a Wordpress page.
  243. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  244. https://codex.wordpress.org/Function_Reference/wp_insert_post
  245. http://docs.python.org/library/xmlrpclib.html
  246. """
  247. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  248. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  249. os.environ['WORDPRESS_USERNAME'],
  250. os.environ['WORDPRESS_PASSWORD'],
  251. post_id,
  252. {
  253. 'post_content': content,
  254. 'post_title': title,
  255. })
  256. ##### Tasks ######################################
  257. @task
  258. def wp():
  259. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  260. if WORDPRESS_ENABLED:
  261. chapters = copy.deepcopy(MARKDOWN_FILES)
  262. # Render html
  263. print("Rendering html")
  264. for chapter in chapters:
  265. chapter['html'] = markdown_to_html(open(chapter['file']).read(),
  266. upload_assets_to_s3=True)
  267. chapter['link'] = "{}/{}/{}".format(
  268. os.environ['WORDPRESS_BASE_URL'],
  269. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  270. chapter['slug'])
  271. # Add previous and next links at end of html
  272. for (i, chapter) in enumerate(chapters):
  273. previous_link = None
  274. if i > 0:
  275. previous_link = chapters[i - 1]['link']
  276. next_link = None
  277. if i < len(chapters) - 1:
  278. next_link = chapters[i + 1]['link']
  279. if previous_link is not None or next_link is not None:
  280. chapter['html'] += "\n"
  281. if previous_link is not None:
  282. chapter['html'] += """\
  283. <a href="{}">&lArr; Previous chapter</a>\
  284. """.format(previous_link)
  285. if previous_link is not None and next_link is not None:
  286. chapter['html'] += '&nbsp;' * 5
  287. if next_link is not None:
  288. chapter['html'] += """\
  289. <a href="{}">Next chapter &rArr;</a>\
  290. """.format(next_link)
  291. # Fetch list of pages on the server and determine which already exist
  292. existing_pages = _wordpress_get_pages()
  293. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  294. def page_slug_to_id(slug):
  295. pages = [i for i in existing_pages if i.get('post_name') == slug]
  296. page = pages[0]
  297. return page['post_id']
  298. for chapter in chapters:
  299. if chapter['slug'] in existing_page_slugs:
  300. chapter['page_id'] = page_slug_to_id(chapter['slug'])
  301. # Send to WP
  302. print("Uploading to WordPress")
  303. for chapter in chapters:
  304. if chapter['slug'] in existing_page_slugs:
  305. print("Existing page: {}".format(chapter['link']))
  306. assert wordpress_edit_page(chapter['page_id'],
  307. chapter['title'],
  308. chapter['html'])
  309. else:
  310. print("New page: {}".format(chapter['link']))
  311. assert wordpress_new_page(chapter['slug'],
  312. chapter['title'],
  313. chapter['html'])
  314. @task
  315. def html():
  316. """HTML5 output."""
  317. args = ['pandoc',
  318. '-f', 'markdown',
  319. '-t', 'html5',
  320. '-o', '{}.html'.format(FULL_PROJECT_NAME),
  321. '-s',
  322. '--toc'] + [i['file'] for i in MARKDOWN_FILES]
  323. local(' '.join(args))
  324. local('open {}.html'.format(FULL_PROJECT_NAME))
  325. @task
  326. def epub():
  327. """http://johnmacfarlane.net/pandoc/epub.html"""
  328. args = ['pandoc',
  329. '-f', 'markdown',
  330. '-t', 'epub',
  331. '-o', '{}.epub'.format(FULL_PROJECT_NAME)] + \
  332. [i['file'] for i in MARKDOWN_FILES]
  333. # TODO --epub-cover-image
  334. # TODO --epub-metadata
  335. # TODO --epub-stylesheet
  336. local(' '.join(args))
  337. if AWS_ENABLED:
  338. upload_output_to_s3('{}.epub'.format(FULL_PROJECT_NAME))
  339. @task
  340. def pdf():
  341. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  342. args = ['pandoc',
  343. '-f', 'markdown',
  344. # https://github.com/jgm/pandoc/issues/571
  345. #'-t', 'pdf',
  346. '-o', '{}.pdf'.format(FULL_PROJECT_NAME)] + \
  347. [i['file'] for i in MARKDOWN_FILES]
  348. local(' '.join(args))
  349. if AWS_ENABLED:
  350. upload_output_to_s3('{}.pdf'.format(FULL_PROJECT_NAME))
  351. @task
  352. def docx():
  353. """OOXML document format."""
  354. args = ['pandoc',
  355. '-f', 'markdown',
  356. '-t', 'docx',
  357. '-o', '{}.docx'.format(FULL_PROJECT_NAME)] + \
  358. [i['file'] for i in MARKDOWN_FILES]
  359. local(' '.join(args))
  360. if AWS_ENABLED:
  361. upload_output_to_s3('{}.docx'.format(FULL_PROJECT_NAME))
  362. @task
  363. def odt():
  364. """OpenDocument document format."""
  365. args = ['pandoc',
  366. '-f', 'markdown',
  367. '-t', 'odt',
  368. '-o', '{}.odt'.format(FULL_PROJECT_NAME)] + \
  369. [i['file'] for i in MARKDOWN_FILES]
  370. local(' '.join(args))
  371. if AWS_ENABLED:
  372. upload_output_to_s3('{}.odt'.format(FULL_PROJECT_NAME))
  373. @task
  374. def clean():
  375. """Remove generated output files"""
  376. possible_outputs = (
  377. '{}.html'.format(FULL_PROJECT_NAME),
  378. '{}.epub'.format(FULL_PROJECT_NAME),
  379. '{}.pdf'.format(FULL_PROJECT_NAME),
  380. '{}.docx'.format(FULL_PROJECT_NAME),
  381. '{}.odt'.format(FULL_PROJECT_NAME),
  382. )
  383. for filename in possible_outputs:
  384. if os.path.exists(filename):
  385. os.remove(filename)
  386. print("Removed {}".format(filename))