#!/usr/bin/env python from __future__ import print_function ##### Configuration ############################## SHORT_PROJECT_NAME = 'python' FULL_PROJECT_NAME = 'byte_of_{}'.format(SHORT_PROJECT_NAME) # NOTE Slugs MUST be lower-case MARKDOWN_FILES = [ { 'file': '01-frontpage.pd', 'slug': "python", 'title': "Python", }, { 'file': '02-preface.pd', 'slug': "python_en-preface", 'title': "Python : Preface", }, { 'file': '03-intro.pd', 'slug': "python_en-introduction", 'title': "Python : Introduction", }, { 'file': '04-installation.pd', 'slug': "python_en-installation", 'title': "Python : Installation", }, { 'file': '05-first-steps.pd', 'slug': "python_en-first_steps", 'title': "Python : First Steps", }, { 'file': '06-basics.pd', 'slug': "python_en-basics", 'title': "Python : Basics", }, { 'file': '07-operators-expressions.pd', 'slug': "python_en-operators_and_expressions", 'title': "Python : Operators and Expressions", }, { 'file': '08-control-flow.pd', 'slug': "python_en-control_flow", 'title': "Python : Control Flow", }, { 'file': '09-functions.pd', 'slug': "python_en-functions", 'title': "Python : Functions", }, { 'file': '10-modules.pd', 'slug': "python_en-modules", 'title': "Python : Modules", }, { 'file': '11-data-structures.pd', 'slug': "python_en-data_structures", 'title': "Python : Data Structures", }, { 'file': '12-problem-solving.pd', 'slug': "python_en-problem_solving", 'title': "Python : Problem Solving", }, { 'file': '13-oop.pd', 'slug': "python_en-object_oriented_programming", 'title': "Python : Object Oriented Programming", }, { 'file': '14-io.pd', 'slug': "python_en-input_output", 'title': "Python : Input Output", }, { 'file': '15-exceptions.pd', 'slug': "python_en-exceptions", 'title': "Python : Exceptions", }, { 'file': '16-standard-library.pd', 'slug': "python_en-standard_library", 'title': "Python : Standard Library", }, { 'file': '17-more.pd', 'slug': "python_en-more", 'title': "Python : More", }, { 'file': '18-what-next.pd', 'slug': "python_en-what_next", 'title': "Python : What Next", }, { 'file': '19-appendix-floss.pd', 'slug': "python_en-appendix_floss", 'title': "Python : Appendix : FLOSS", }, { 'file': '20-appendix-about.pd', 'slug': "python_en-appendix_about", 'title': "Python : Appendix : About", }, { 'file': '21-revision-history.pd', 'slug': "python_en-appendix_revision_history", 'title': "Python : Appendix : Revision History", }, { 'file': '22-translations.pd', 'slug': "python_en-translations", 'title': "Python : Appendix : Translations", }, { 'file': '23-translation-howto.pd', 'slug': "python_en-translation_howto", 'title': "Python : Appendix : Translation Howto", }, ] ## NOTES ## 1. This assumes that you have already created the S3 bucket whose name ## is stored in AWS_S3_BUCKET_NAME environment variable. ## 2. Under that S3 bucket, you have created a folder whose name is stored ## above as SHORT_PROJECT_NAME. ## 3. Under that S3 bucket, you have created a folder whose name is stored as ## SHORT_PROJECT_NAME/assets. ##### Imports #################################### import os import subprocess import copy from xmlrpclib import ServerProxy import boto import boto.s3.bucket import boto.s3.key from bs4 import BeautifulSoup from fabric.api import task, local ##### Start with checks ########################## for chapter in MARKDOWN_FILES: assert (chapter['slug'].lower() == chapter['slug']), \ "Slug must be lower case : {}".format(chapter['slug']) if str(os.environ.get('AWS_ENABLED')).lower() == 'false': AWS_ENABLED = False elif os.environ.get('AWS_ACCESS_KEY_ID') is not None \ and len(os.environ['AWS_ACCESS_KEY_ID']) > 0 \ and os.environ.get('AWS_SECRET_ACCESS_KEY') is not None \ and len(os.environ['AWS_SECRET_ACCESS_KEY']) > 0 \ and os.environ.get('AWS_S3_BUCKET_NAME') is not None \ and len(os.environ['AWS_S3_BUCKET_NAME']) > 0: AWS_ENABLED = True else: AWS_ENABLED = False print("NOTE: S3 uploading is disabled because of missing " + "AWS key environment variables.") # In my case, they are the same - 'files.swaroopch.com' # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs S3_PUBLIC_URL = os.environ['AWS_S3_BUCKET_NAME'] # else #S3_PUBLIC_URL = 's3.amazonaws.com/{}'.format(os.environ['AWS_S3_BUCKET_NAME']) if os.environ.get('WORDPRESS_RPC_URL') is not None \ and len(os.environ['WORDPRESS_RPC_URL']) > 0 \ and os.environ.get('WORDPRESS_BASE_URL') is not None \ and len(os.environ['WORDPRESS_BASE_URL']) > 0 \ and os.environ.get('WORDPRESS_BLOG_ID') is not None \ and len(os.environ['WORDPRESS_BLOG_ID']) > 0 \ and os.environ.get('WORDPRESS_USERNAME') is not None \ and len(os.environ['WORDPRESS_USERNAME']) > 0 \ and os.environ.get('WORDPRESS_PASSWORD') is not None \ and len(os.environ['WORDPRESS_PASSWORD']) > 0 \ and os.environ.get('WORDPRESS_PARENT_PAGE_ID') is not None \ and len(os.environ['WORDPRESS_PARENT_PAGE_ID']) > 0 \ and os.environ.get('WORDPRESS_PARENT_PAGE_SLUG') is not None \ and len(os.environ['WORDPRESS_PARENT_PAGE_SLUG']) > 0: WORDPRESS_ENABLED = True else: WORDPRESS_ENABLED = False print("NOTE: Wordpress uploading is disabled because of " + "missing environment variables.") ##### Helper methods ############################# def _upload_to_s3(filename, key): """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data""" conn = boto.connect_s3() b = boto.s3.bucket.Bucket(conn, os.environ['AWS_S3_BUCKET_NAME']) k = boto.s3.key.Key(b) k.key = key k.set_contents_from_filename(filename) k.set_acl('public-read') url = 'http://{}/{}'.format(S3_PUBLIC_URL, key) print("Uploaded to S3 : {}".format(url)) return url def upload_output_to_s3(filename): key = "{}/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1]) return _upload_to_s3(filename, key) def upload_asset_to_s3(filename): key = "{}/assets/{}".format(SHORT_PROJECT_NAME, filename.split('/')[-1]) return _upload_to_s3(filename, key) def replace_images_with_s3_urls(text): """http://www.crummy.com/software/BeautifulSoup/bs4/doc/""" soup = BeautifulSoup(text) for image in soup.find_all('img'): image['src'] = upload_asset_to_s3(image['src']) return unicode(soup) def markdown_to_html(source_text, upload_assets_to_s3=False): """Convert from Markdown to HTML; optional: upload images, etc. to S3.""" args = ['pandoc', '-f', 'markdown', '-t', 'html5'] p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) output = p.communicate(source_text)[0] # http://wordpress.org/extend/plugins/raw-html/ output = '\n' + output + '\n' # NOTE: Also assumes that you have added the CSS from # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme. if upload_assets_to_s3: output = replace_images_with_s3_urls(output) return output def _wordpress_get_pages(): server = ServerProxy(os.environ['WORDPRESS_RPC_URL']) print("Fetching list of pages from WP") return server.wp.getPosts(os.environ['WORDPRESS_BLOG_ID'], os.environ['WORDPRESS_USERNAME'], os.environ['WORDPRESS_PASSWORD'], { 'post_type': 'page', 'number': pow(10, 5), }) def wordpress_new_page(slug, title, content): """Create a new Wordpress page. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.newPost https://codex.wordpress.org/Function_Reference/wp_insert_post http://docs.python.org/library/xmlrpclib.html """ server = ServerProxy(os.environ['WORDPRESS_RPC_URL']) return server.wp.newPost(os.environ['WORDPRESS_BLOG_ID'], os.environ['WORDPRESS_USERNAME'], os.environ['WORDPRESS_PASSWORD'], { 'post_name': slug, 'post_content': content, 'post_title': title, 'post_parent': os.environ['WORDPRESS_PARENT_PAGE_ID'], 'post_type': 'page', 'post_status': 'publish', 'comment_status': 'closed', 'ping_status': 'closed', }) def wordpress_edit_page(post_id, title, content): """Edit a Wordpress page. https://codex.wordpress.org/XML-RPC_WordPress_API/Posts#wp.editPost https://codex.wordpress.org/Function_Reference/wp_insert_post http://docs.python.org/library/xmlrpclib.html """ server = ServerProxy(os.environ['WORDPRESS_RPC_URL']) return server.wp.editPost(os.environ['WORDPRESS_BLOG_ID'], os.environ['WORDPRESS_USERNAME'], os.environ['WORDPRESS_PASSWORD'], post_id, { 'post_content': content, 'post_title': title, }) def collect_header_anchors(chapter, i, all_headers): soup = BeautifulSoup(chapter['html']) for header in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']): if 'id' in header.attrs: all_headers[header['id']] = i def fix_links_to_other_chapters(chapter, chapters, all_headers): """Fix links to other sections with Wordpress page URL.""" soup = BeautifulSoup(chapter['html']) for link in soup.find_all('a'): if 'href' in link.attrs: if link['href'].startswith('#'): header_id = link['href'][1:] assert header_id in all_headers, \ "#{} does not exist, referred in {}".format( header_id, chapter['file']) other_chapter = chapters[all_headers[header_id]] previous = link['href'] link['href'] = '{}#{}'.format( other_chapter['link'], header_id) print("Replacing {} with {}".format(previous, link['href'])) chapter['html'] = unicode(soup) ##### Tasks ###################################### @task def wp(): """https://codex.wordpress.org/XML-RPC_WordPress_API/Posts""" if WORDPRESS_ENABLED: chapters = copy.deepcopy(MARKDOWN_FILES) # header anchor id -> index in MARKDOWN_FILES all_headers = {} # Render html print("Rendering html") for (i, chapter) in enumerate(chapters): chapter['html'] = markdown_to_html(open(chapter['file']).read(), upload_assets_to_s3=AWS_ENABLED) collect_header_anchors(chapter, i, all_headers) chapter['link'] = "{}/{}/{}".format( os.environ['WORDPRESS_BASE_URL'], os.environ['WORDPRESS_PARENT_PAGE_SLUG'], chapter['slug']) # Fix cross-links for chapter in chapters: fix_links_to_other_chapters(chapter, chapters, all_headers) # Add previous and next links at end of html for (i, chapter) in enumerate(chapters): previous_link = None if i > 0: previous_link = chapters[i - 1]['link'] next_link = None if i < len(chapters) - 1: next_link = chapters[i + 1]['link'] if previous_link is not None or next_link is not None: chapter['html'] += "\n" if previous_link is not None: chapter['html'] += """\ ⇐ Previous chapter\ """.format(previous_link) if previous_link is not None and next_link is not None: chapter['html'] += ' ' * 5 if next_link is not None: chapter['html'] += """\ Next chapter ⇒\ """.format(next_link) # Fetch list of pages on the server and determine which already exist existing_pages = _wordpress_get_pages() existing_page_slugs = [i.get('post_name') for i in existing_pages] def page_slug_to_id(slug): pages = [i for i in existing_pages if i.get('post_name') == slug] page = pages[0] return page['post_id'] for chapter in chapters: if chapter['slug'] in existing_page_slugs: chapter['page_id'] = page_slug_to_id(chapter['slug']) # Send to WP print("Uploading to WordPress") for chapter in chapters: if chapter['slug'] in existing_page_slugs: print("Existing page: {}".format(chapter['link'])) assert wordpress_edit_page(chapter['page_id'], chapter['title'], chapter['html']) else: print("New page: {}".format(chapter['link'])) assert wordpress_new_page(chapter['slug'], chapter['title'], chapter['html']) @task def html(): """HTML5 output.""" args = ['pandoc', '-f', 'markdown', '-t', 'html5', '-o', '{}.html'.format(FULL_PROJECT_NAME), '-s', '--toc'] + [i['file'] for i in MARKDOWN_FILES] local(' '.join(args)) local('open {}.html'.format(FULL_PROJECT_NAME)) @task def epub(): """http://johnmacfarlane.net/pandoc/epub.html""" args = ['pandoc', '-f', 'markdown', '-t', 'epub', '-o', '{}.epub'.format(FULL_PROJECT_NAME)] + \ [i['file'] for i in MARKDOWN_FILES] # TODO --epub-cover-image # TODO --epub-metadata # TODO --epub-stylesheet local(' '.join(args)) if AWS_ENABLED: upload_output_to_s3('{}.epub'.format(FULL_PROJECT_NAME)) @task def pdf(): """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf""" args = ['pandoc', '-f', 'markdown', # https://github.com/jgm/pandoc/issues/571 #'-t', 'pdf', '-o', '{}.pdf'.format(FULL_PROJECT_NAME)] + \ [i['file'] for i in MARKDOWN_FILES] local(' '.join(args)) if AWS_ENABLED: upload_output_to_s3('{}.pdf'.format(FULL_PROJECT_NAME)) @task def docx(): """OOXML document format.""" args = ['pandoc', '-f', 'markdown', '-t', 'docx', '-o', '{}.docx'.format(FULL_PROJECT_NAME)] + \ [i['file'] for i in MARKDOWN_FILES] local(' '.join(args)) if AWS_ENABLED: upload_output_to_s3('{}.docx'.format(FULL_PROJECT_NAME)) @task def odt(): """OpenDocument document format.""" args = ['pandoc', '-f', 'markdown', '-t', 'odt', '-o', '{}.odt'.format(FULL_PROJECT_NAME)] + \ [i['file'] for i in MARKDOWN_FILES] local(' '.join(args)) if AWS_ENABLED: upload_output_to_s3('{}.odt'.format(FULL_PROJECT_NAME)) @task def clean(): """Remove generated output files""" possible_outputs = ( '{}.html'.format(FULL_PROJECT_NAME), '{}.epub'.format(FULL_PROJECT_NAME), '{}.pdf'.format(FULL_PROJECT_NAME), '{}.docx'.format(FULL_PROJECT_NAME), '{}.odt'.format(FULL_PROJECT_NAME), ) for filename in possible_outputs: if os.path.exists(filename): os.remove(filename) print("Removed {}".format(filename))