fabfile.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. SHORT_PROJECT_NAME = 'python'
  5. FULL_PROJECT_NAME = 'byte_of_{}'.format(SHORT_PROJECT_NAME)
  6. # NOTE Slugs MUST be lower-case
  7. MARKDOWN_FILES = [
  8. {
  9. 'file': '01-frontpage.pd',
  10. 'slug': "python",
  11. 'title': "Python",
  12. },
  13. {
  14. 'file': '02-preface.pd',
  15. 'slug': "python_en-preface",
  16. 'title': "Python : Preface",
  17. },
  18. {
  19. 'file': '03-intro.pd',
  20. 'slug': "python_en-introduction",
  21. 'title': "Python : Introduction",
  22. },
  23. {
  24. 'file': '04-installation.pd',
  25. 'slug': "python_en-installation",
  26. 'title': "Python : Installation",
  27. },
  28. {
  29. 'file': '05-first-steps.pd',
  30. 'slug': "python_en-first_steps",
  31. 'title': "Python : First Steps",
  32. },
  33. {
  34. 'file': '06-basics.pd',
  35. 'slug': "python_en-basics",
  36. 'title': "Python : Basics",
  37. },
  38. {
  39. 'file': '07-operators-expressions.pd',
  40. 'slug': "python_en-operators_and_expressions",
  41. 'title': "Python : Operators and Expressions",
  42. },
  43. {
  44. 'file': '08-control-flow.pd',
  45. 'slug': "python_en-control_flow",
  46. 'title': "Python : Control Flow",
  47. },
  48. {
  49. 'file': '09-functions.pd',
  50. 'slug': "python_en-functions",
  51. 'title': "Python : Functions",
  52. },
  53. {
  54. 'file': '10-modules.pd',
  55. 'slug': "python_en-modules",
  56. 'title': "Python : Modules",
  57. },
  58. {
  59. 'file': '11-data-structures.pd',
  60. 'slug': "python_en-data_structures",
  61. 'title': "Python : Data Structures",
  62. },
  63. {
  64. 'file': '12-problem-solving.pd',
  65. 'slug': "python_en-problem_solving",
  66. 'title': "Python : Problem Solving",
  67. },
  68. {
  69. 'file': '13-oop.pd',
  70. 'slug': "python_en-object_oriented_programming",
  71. 'title': "Python : Object Oriented Programming",
  72. },
  73. {
  74. 'file': '14-io.pd',
  75. 'slug': "python_en-input_output",
  76. 'title': "Python : Input Output",
  77. },
  78. {
  79. 'file': '15-exceptions.pd',
  80. 'slug': "python_en-exceptions",
  81. 'title': "Python : Exceptions",
  82. },
  83. {
  84. 'file': '16-standard-library.pd',
  85. 'slug': "python_en-standard_library",
  86. 'title': "Python : Standard Library",
  87. },
  88. {
  89. 'file': '17-more.pd',
  90. 'slug': "python_en-more",
  91. 'title': "Python : More",
  92. },
  93. {
  94. 'file': '18-what-next.pd',
  95. 'slug': "python_en-what_next",
  96. 'title': "Python : What Next",
  97. },
  98. {
  99. 'file': '19-appendix-floss.pd',
  100. 'slug': "python_en-appendix_floss",
  101. 'title': "Python : Appendix : FLOSS",
  102. },
  103. {
  104. 'file': '20-appendix-about.pd',
  105. 'slug': "python_en-appendix_about",
  106. 'title': "Python : Appendix : About",
  107. },
  108. {
  109. 'file': '21-revision-history.pd',
  110. 'slug': "python_en-appendix_revision_history",
  111. 'title': "Python : Appendix : Revision History",
  112. },
  113. {
  114. 'file': '22-translations.pd',
  115. 'slug': "python_en-translations",
  116. 'title': "Python : Appendix : Translations",
  117. },
  118. {
  119. 'file': '23-translation-howto.pd',
  120. 'slug': "python_en-translation_howto",
  121. 'title': "Python : Appendix : Translation Howto",
  122. },
  123. ]
  124. ## NOTES
  125. ## 1. This assumes that you have already created the S3 bucket whose name
  126. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  127. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  128. ## above as SHORT_PROJECT_NAME.
  129. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  130. ## SHORT_PROJECT_NAME/assets.
  131. ##### Imports ####################################
  132. import os
  133. import subprocess
  134. import copy
  135. from xmlrpclib import ServerProxy
  136. import boto
  137. import boto.s3.bucket
  138. import boto.s3.key
  139. from bs4 import BeautifulSoup
  140. from fabric.api import task, local
  141. ##### Start with checks ##########################
  142. for chapter in MARKDOWN_FILES:
  143. assert (chapter['slug'].lower() == chapter['slug']), \
  144. "Slug must be lower case : {}".format(chapter['slug'])
  145. if str(os.environ.get('AWS_ENABLED')).lower() == 'false':
  146. AWS_ENABLED = False
  147. elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \
  148. and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \
  149. and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \
  150. and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \
  151. and os.environ.get('AWS_S3_BUCKET_NAME') is not None \
  152. and len(os.environ['AWS_S3_BUCKET_NAME']) > 0:
  153. AWS_ENABLED = True
  154. else:
  155. AWS_ENABLED = False
  156. print("NOTE: S3 uploading is disabled because of missing " +
  157. "AWS key environment variables.")
  158. # In my case, they are the same - 'files.swaroopch.com'
  159. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  160. S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME']
  161. # else
  162. #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME'])
  163. if os.environ.get('WORDPRESS_RPC_URL') is not None \
  164. and len(os.environ['WORDPRESS_RPC_URL']) > 0 \
  165. and os.environ.get('WORDPRESS_BASE_URL') is not None \
  166. and len(os.environ['WORDPRESS_BASE_URL']) > 0 \
  167. and os.environ.get('WORDPRESS_BLOG_ID') is not None \
  168. and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \
  169. and os.environ.get('WORDPRESS_USERNAME') is not None \
  170. and len(os.environ['WORDPRESS_USERNAME']) > 0 \
  171. and os.environ.get('WORDPRESS_PASSWORD') is not None \
  172. and len(os.environ['WORDPRESS_PASSWORD']) > 0 \
  173. and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \
  174. and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \
  175. and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \
  176. and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0:
  177. WORDPRESS_ENABLED = True
  178. else:
  179. WORDPRESS_ENABLED = False
  180. print("NOTE: Wordpress uploading is disabled because of " +
  181. "missing environment variables.")
  182. ##### Helper methods #############################
  183. def _upload_to_s3(filename, key):
  184. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  185. conn = boto.connect_s3()
  186. b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME'])
  187. k = boto.s3.key.Key(b)
  188. k.key = key
  189. k.set_contents_from_filename(filename)
  190. k.set_acl('public-read')
  191. url = 'http://{}/{}'.format(S3_PUBLIC_URL, key)
  192. print("Uploaded to S3 : {}".format(url))
  193. return url
  194. def upload_output_to_s3(filename):
  195. key = "{}/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  196. return _upload_to_s3(filename, key)
  197. def upload_asset_to_s3(filename):
  198. key = "{}/assets/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1])
  199. return _upload_to_s3(filename, key)
  200. def replace_images_with_s3_urls(text):
  201. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  202. soup = BeautifulSoup(text)
  203. for image in soup.find_all('img'):
  204. image['src'] = upload_asset_to_s3(image['src'])
  205. return unicode(soup)
  206. def markdown_to_html(source_text, upload_assets_to_s3=False):
  207. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  208. args = ['pandoc',
  209. '-f', 'markdown',
  210. '-t', 'html5']
  211. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  212. output = p.communicate(source_text)[0]
  213. # http://wordpress.org/extend/plugins/raw-html/
  214. output = '<!--raw-->\n' + output + '\n<!--/raw-->'
  215. # NOTE: Also assumes that you have added the CSS from
  216. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  217. if upload_assets_to_s3:
  218. output = replace_images_with_s3_urls(output)
  219. return output
  220. def _wordpress_get_pages():
  221. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  222. print("Fetching list of pages from WP")
  223. return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'],
  224. os.environ['WORDPRESS_USERNAME'],
  225. os.environ['WORDPRESS_PASSWORD'],
  226. {
  227. 'post_type': 'page',
  228. 'number': pow(10, 5),
  229. })
  230. def wordpress_new_page(slug, title, content):
  231. """Create a new Wordpress page.
  232. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost
  233. https://codex.wordpress.org/Function_Reference/wp_insert_post
  234. http://docs.python.org/library/xmlrpclib.html
  235. """
  236. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  237. return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'],
  238. os.environ['WORDPRESS_USERNAME'],
  239. os.environ['WORDPRESS_PASSWORD'],
  240. {
  241. 'post_name': slug,
  242. 'post_content': content,
  243. 'post_title': title,
  244. 'post_parent':
  245. os.environ['WORDPRESS_PARENT_PAGE_ID'],
  246. 'post_type': 'page',
  247. 'post_status': 'publish',
  248. 'comment_status': 'closed',
  249. 'ping_status': 'closed',
  250. })
  251. def wordpress_edit_page(post_id, title, content):
  252. """Edit a Wordpress page.
  253. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost
  254. https://codex.wordpress.org/Function_Reference/wp_insert_post
  255. http://docs.python.org/library/xmlrpclib.html
  256. """
  257. server = ServerProxy(os.environ['WORDPRESS_RPC_URL'])
  258. return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'],
  259. os.environ['WORDPRESS_USERNAME'],
  260. os.environ['WORDPRESS_PASSWORD'],
  261. post_id,
  262. {
  263. 'post_content': content,
  264. 'post_title': title,
  265. })
  266. def collect_header_anchors(chapter, i, all_headers):
  267. soup = BeautifulSoup(chapter['html'])
  268. for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
  269. if 'id' in header.attrs:
  270. all_headers[header['id']] = i
  271. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  272. """Fix links to other sections with Wordpress page URL."""
  273. soup = BeautifulSoup(chapter['html'])
  274. for link in soup.find_all('a'):
  275. if 'href' in link.attrs:
  276. if link['href'].startswith('#'):
  277. header_id = link['href'][1:]
  278. assert header_id in all_headers, \
  279. "#{} does not exist, referred in {}".format(
  280. header_id, chapter['file'])
  281. other_chapter = chapters[all_headers[header_id]]
  282. previous = link['href']
  283. link['href'] = '{}#{}'.format(
  284. other_chapter['link'],
  285. header_id)
  286. print("Replacing {} with {}".format(previous, link['href']))
  287. chapter['html'] = unicode(soup)
  288. ##### Tasks ######################################
  289. @task
  290. def wp():
  291. """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts"""
  292. if WORDPRESS_ENABLED:
  293. chapters = copy.deepcopy(MARKDOWN_FILES)
  294. # header anchor id -> index in MARKDOWN_FILES
  295. all_headers = {}
  296. # Render html
  297. print("Rendering html")
  298. for (i, chapter) in enumerate(chapters):
  299. chapter['html'] = markdown_to_html(open(chapter['file']).read(),
  300. upload_assets_to_s3=AWS_ENABLED)
  301. collect_header_anchors(chapter, i, all_headers)
  302. chapter['link'] = "{}/{}/{}".format(
  303. os.environ['WORDPRESS_BASE_URL'],
  304. os.environ['WORDPRESS_PARENT_PAGE_SLUG'],
  305. chapter['slug'])
  306. # Fix cross-links
  307. for chapter in chapters:
  308. fix_links_to_other_chapters(chapter, chapters, all_headers)
  309. # Add previous and next links at end of html
  310. for (i, chapter) in enumerate(chapters):
  311. previous_link = None
  312. if i > 0:
  313. previous_link = chapters[i - 1]['link']
  314. next_link = None
  315. if i < len(chapters) - 1:
  316. next_link = chapters[i + 1]['link']
  317. if previous_link is not None or next_link is not None:
  318. chapter['html'] += "\n"
  319. if previous_link is not None:
  320. chapter['html'] += """\
  321. <a href="{}">&lArr; Previous chapter</a>\
  322. """.format(previous_link)
  323. if previous_link is not None and next_link is not None:
  324. chapter['html'] += '&nbsp;' * 5
  325. if next_link is not None:
  326. chapter['html'] += """\
  327. <a href="{}">Next chapter &rArr;</a>\
  328. """.format(next_link)
  329. # Fetch list of pages on the server and determine which already exist
  330. existing_pages = _wordpress_get_pages()
  331. existing_page_slugs = [i.get('post_name') for i in existing_pages]
  332. def page_slug_to_id(slug):
  333. pages = [i for i in existing_pages if i.get('post_name') == slug]
  334. page = pages[0]
  335. return page['post_id']
  336. for chapter in chapters:
  337. if chapter['slug'] in existing_page_slugs:
  338. chapter['page_id'] = page_slug_to_id(chapter['slug'])
  339. # Send to WP
  340. print("Uploading to WordPress")
  341. for chapter in chapters:
  342. if chapter['slug'] in existing_page_slugs:
  343. print("Existing page: {}".format(chapter['link']))
  344. assert wordpress_edit_page(chapter['page_id'],
  345. chapter['title'],
  346. chapter['html'])
  347. else:
  348. print("New page: {}".format(chapter['link']))
  349. assert wordpress_new_page(chapter['slug'],
  350. chapter['title'],
  351. chapter['html'])
  352. @task
  353. def html():
  354. """HTML5 output."""
  355. args = ['pandoc',
  356. '-f', 'markdown',
  357. '-t', 'html5',
  358. '-o', '{}.html'.format(FULL_PROJECT_NAME),
  359. '-s',
  360. '--toc'] + [i['file'] for i in MARKDOWN_FILES]
  361. local(' '.join(args))
  362. local('open {}.html'.format(FULL_PROJECT_NAME))
  363. @task
  364. def epub():
  365. """http://johnmacfarlane.net/pandoc/epub.html"""
  366. args = ['pandoc',
  367. '-f', 'markdown',
  368. '-t', 'epub',
  369. '-o', '{}.epub'.format(FULL_PROJECT_NAME)] + \
  370. [i['file'] for i in MARKDOWN_FILES]
  371. # TODO --epub-cover-image
  372. # TODO --epub-metadata
  373. # TODO --epub-stylesheet
  374. local(' '.join(args))
  375. if AWS_ENABLED:
  376. upload_output_to_s3('{}.epub'.format(FULL_PROJECT_NAME))
  377. @task
  378. def pdf():
  379. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  380. args = ['pandoc',
  381. '-f', 'markdown',
  382. # https://github.com/jgm/pandoc/issues/571
  383. #'-t', 'pdf',
  384. '-o', '{}.pdf'.format(FULL_PROJECT_NAME)] + \
  385. [i['file'] for i in MARKDOWN_FILES]
  386. local(' '.join(args))
  387. if AWS_ENABLED:
  388. upload_output_to_s3('{}.pdf'.format(FULL_PROJECT_NAME))
  389. @task
  390. def docx():
  391. """OOXML document format."""
  392. args = ['pandoc',
  393. '-f', 'markdown',
  394. '-t', 'docx',
  395. '-o', '{}.docx'.format(FULL_PROJECT_NAME)] + \
  396. [i['file'] for i in MARKDOWN_FILES]
  397. local(' '.join(args))
  398. if AWS_ENABLED:
  399. upload_output_to_s3('{}.docx'.format(FULL_PROJECT_NAME))
  400. @task
  401. def odt():
  402. """OpenDocument document format."""
  403. args = ['pandoc',
  404. '-f', 'markdown',
  405. '-t', 'odt',
  406. '-o', '{}.odt'.format(FULL_PROJECT_NAME)] + \
  407. [i['file'] for i in MARKDOWN_FILES]
  408. local(' '.join(args))
  409. if AWS_ENABLED:
  410. upload_output_to_s3('{}.odt'.format(FULL_PROJECT_NAME))
  411. @task
  412. def clean():
  413. """Remove generated output files"""
  414. possible_outputs = (
  415. '{}.html'.format(FULL_PROJECT_NAME),
  416. '{}.epub'.format(FULL_PROJECT_NAME),
  417. '{}.pdf'.format(FULL_PROJECT_NAME),
  418. '{}.docx'.format(FULL_PROJECT_NAME),
  419. '{}.odt'.format(FULL_PROJECT_NAME),
  420. )
  421. for filename in possible_outputs:
  422. if os.path.exists(filename):
  423. os.remove(filename)
  424. print("Removed {}".format(filename))