radu
/
python-tutorials


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
							#!/usr/bin/env python

from __future__ import print_function

##### Configuration ##############################

import io
import os
import json

os.environ["PYTHONIOENCODING"] = "utf-8"

CONFIG_FILE = "config.json"
CONFIG = json.load(io.open(CONFIG_FILE, "r", encoding="utf-8"))

OAUTH_CONFIG_FILE = "oauth.json"
OAUTH_CONFIG = None
if os.path.exists(OAUTH_CONFIG_FILE):
    OAUTH_CONFIG = json.load(io.open(OAUTH_CONFIG_FILE, "r", encoding="utf-8"))

## NOTES
## 1. This assumes that you have already created the S3 bucket whose name
##    is stored in AWS_S3_BUCKET_NAME environment variable.
## 2. Under that S3 bucket, you have created a folder whose name is stored
##    above as SHORT_PROJECT_NAME.
## 3. Under that S3 bucket, you have created a folder whose name is stored as
##    SHORT_PROJECT_NAME/assets.


##### Imports ####################################

import datetime
import subprocess
import copy
import webbrowser
import urllib
import time
from functools import wraps

import boto
import boto.s3.bucket
import boto.s3.key
from bs4 import BeautifulSoup
import requests

from fabric.api import task, local
from fabric.utils import abort

import logging


##### Start with checks ##########################

logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)

for chapter in CONFIG["MARKDOWN_FILES"]:
    assert (chapter["slug"].lower() == chapter["slug"]), \
        "Slug must be lower case : {}".format(chapter["slug"])

if str(os.environ.get("AWS_ENABLED")).lower() == "false":
    AWS_ENABLED = False
elif os.environ.get("AWS_ACCESS_KEY_ID") is not None \
        and len(os.environ["AWS_ACCESS_KEY_ID"]) > 0 \
        and os.environ.get("AWS_SECRET_ACCESS_KEY") is not None \
        and len(os.environ["AWS_SECRET_ACCESS_KEY"]) > 0 \
        and os.environ.get("AWS_S3_BUCKET_NAME") is not None \
        and len(os.environ["AWS_S3_BUCKET_NAME"]) > 0:
    AWS_ENABLED = True
else:
    AWS_ENABLED = False
    print("NOTE: S3 uploading is disabled because of missing " +
          "AWS key environment variables.")

# In my case, they are the same - "files.swaroopch.com"
# http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
if AWS_ENABLED:
    S3_PUBLIC_URL = os.environ["AWS_S3_BUCKET_NAME"]
    #else
    #S3_PUBLIC_URL = "s3.amazonaws.com/{}".format(
        #os.environ["AWS_S3_BUCKET_NAME"])


if OAUTH_CONFIG is not None:
    WORDPRESS_ENABLED = True
    WORDPRESS_CLIENT_ID = os.environ["WORDPRESS_CLIENT_ID"]
    WORDPRESS_CLIENT_SECRET = os.environ["WORDPRESS_CLIENT_SECRET"]
    WORDPRESS_PARENT_PAGE_ID = int(os.environ["WORDPRESS_PARENT_PAGE_ID"])
    WORDPRESS_PARENT_PAGE_SLUG = os.environ["WORDPRESS_PARENT_PAGE_SLUG"]
    WORDPRESS_BASE_URL = os.environ["WORDPRESS_BASE_URL"]
else:
    WORDPRESS_ENABLED = False
    print("NOTE: Wordpress uploading is disabled because of " +
          "missing environment variables.")


##### Helper methods #############################

def retry(f):
    @wraps(f)
    def wrapped_f(*args, **kwargs):
        MAX_ATTEMPTS = 5
        for attempt in range(1, MAX_ATTEMPTS + 1):
            try:
                return f(*args, **kwargs)
            except:
                log.exception("Attempt %s/%s failed : %s",
                              attempt,
                              MAX_ATTEMPTS,
                              (args, kwargs))
                time.sleep(10 * attempt)
        log.critical("All %s attempts failed : %s",
                     MAX_ATTEMPTS,
                     (args, kwargs))
    return wrapped_f


def _upload_to_s3(filename, key):
    """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
    conn = boto.connect_s3()
    b = boto.s3.bucket.Bucket(conn, os.environ["AWS_S3_BUCKET_NAME"])
    k = boto.s3.key.Key(b)
    k.key = key
    k.set_contents_from_filename(filename)
    k.set_acl("public-read")

    url = "http://{}/{}".format(S3_PUBLIC_URL, key)
    print("Uploaded to S3 : {}".format(url))
    return url


def upload_output_to_s3(filename):
    key = "{}/{}".format(CONFIG["SHORT_PROJECT_NAME"],
                         filename.split("/")[-1])
    return _upload_to_s3(filename, key)


def upload_asset_to_s3(filename):
    key = "{}/assets/{}".format(CONFIG["SHORT_PROJECT_NAME"],
                                filename.split("/")[-1])
    return _upload_to_s3(filename, key)


def replace_images_with_s3_urls(text):
    """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
    soup = BeautifulSoup(text)
    for image in soup.find_all("img"):
        image["src"] = upload_asset_to_s3(image["src"])
    return str(soup)


def markdown_to_html(source_text, upload_assets_to_s3=False):
    """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
    args = ["pandoc",
            "-f", "markdown",
            "-t", "html5"]
    p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    output = p.communicate(source_text.encode("utf-8", "ignore"))[0]

    # http://wordpress.org/extend/plugins/raw-html/
    output = u"<!--raw-->\n" + \
             output.decode("utf-8", "ignore") + \
             u"\n<!--/raw-->"

    # NOTE: Also assumes that you have added the CSS from
    # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.

    if upload_assets_to_s3:
        output = replace_images_with_s3_urls(output)

    return output.decode("utf-8", "ignore")


def collect_header_anchors(chapter, i, all_headers):
    soup = BeautifulSoup(chapter["html"])
    for header in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
        if "id" in header.attrs:
            all_headers[header["id"]] = i


def fix_links_to_other_chapters(chapter, chapters, all_headers):
    """Fix links to other sections with Wordpress page URL."""
    soup = BeautifulSoup(chapter["html"])
    for link in soup.find_all("a"):
        if "href" in link.attrs:
            if link["href"].startswith("#"):
                header_id = link["href"][1:]
                assert header_id in all_headers, \
                    "#{} does not exist, referred in {}".format(
                    header_id, chapter["file"])
                other_chapter = chapters[all_headers[header_id]]
                link["href"] = "{}#{}".format(
                    other_chapter["link"],
                    header_id)
    chapter["html"] = unicode(soup)


def add_previous_next_links(chapter, i, chapters):
    previous_link = None
    if i > 0:
        previous_link = chapters[i - 1]["link"]

    next_link = None
    if i < len(chapters) - 1:
        next_link = chapters[i + 1]["link"]

    if previous_link is not None or next_link is not None:
        chapter["html"] += u"\n"

    if previous_link is not None:
        chapter["html"] += u"""\
<a href="{}">&lArr; Previous chapter</a>\
""".format(previous_link)

    if previous_link is not None and next_link is not None:
        chapter["html"] += u"&nbsp;" * 5

    if next_link is not None:
        chapter["html"] += u"""\
<a href="{}">Next chapter &rArr;</a>\
""".format(next_link)


##### Tasks ######################################


@task
def prepare():
    frontpage = CONFIG["MARKDOWN_FILES"][0]
    content = io.open(frontpage["file"], "r", encoding="utf-8").read()
    # TODO Can I make this always go change the third line instead?
    # TODO And then go back and change it to "$$date$$" so that it
    # is not inadvertently committed to the git repo.
    today = unicode(datetime.datetime.now().strftime("%d %b %Y"))
    content = content.replace(u"$$date$$", today)
    with io.open(frontpage["file"], "w", encoding="utf-8") as output:
        output.write(content)


@task
def html():
    """HTML5 output."""
    prepare()

    args = ["pandoc",
            "-f", "markdown",
            "-t", "html5",
            "-o", "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
            "-s",
            "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
    local(" ".join(args))
    local("open {}.html".format(CONFIG["FULL_PROJECT_NAME"]))


@task
def epub():
    """http://johnmacfarlane.net/pandoc/epub.html"""
    prepare()

    args = ["pandoc",
            "-f", "markdown",
            "-t", "epub",
            "-o", "{}.epub".format(CONFIG["FULL_PROJECT_NAME"])] + \
        [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
    # TODO --epub-cover-image
    # TODO --epub-metadata
    # TODO --epub-stylesheet
    local(" ".join(args))
    if AWS_ENABLED:
        upload_output_to_s3("{}.epub".format(CONFIG["FULL_PROJECT_NAME"]))


@task
def pdf():
    """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
    prepare()

    args = ["pandoc",
            "-f", "markdown",
            # https://github.com/jgm/pandoc/issues/571
            #"-t", "pdf",
            "-o", "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
            "-N",
            # https://github.com/jgm/pandoc/issues/600
            "-V", "papersize:\"a4paper\"",
            "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
    local(" ".join(args))
    if AWS_ENABLED:
        upload_output_to_s3("{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]))


@task
def clean():
    """Remove generated output files"""
    possible_outputs = (
        "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
        "{}.epub".format(CONFIG["FULL_PROJECT_NAME"]),
        "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
    )

    for filename in possible_outputs:
        if os.path.exists(filename):
            os.remove(filename)
            print("Removed {}".format(filename))


@task
def push():
    """Upload Wordpress, EPUB, PDF."""
    clean()
    wp()
    epub()
    pdf()


########## WordPress ##########
## http://developer.wordpress.com/docs/api/ ##

@task
def oauth_step1():
    """Fetch OAuth2 token.

    http://developer.wordpress.com/docs/oauth2/"""
    if os.path.exists(OAUTH_CONFIG_FILE):
        os.remove(OAUTH_CONFIG_FILE)

    request_url = "https://public-api.wordpress.com/oauth2/authorize"
    params = {
        "client_id": WORDPRESS_CLIENT_ID,
        "redirect_uri": "http://swaroopch.com",
        "response_type": "code",
    }
    url = "{}?{}".format(request_url, urllib.urlencode(params))
    print("""\
1. After authorization, it will redirect, for e.g.
   http://swaroopch.com/?code=8D1Gq1tLQy&state
2. Extract the code from the URL and run:
   fab oauth_step2:8D1Gq1tLQy
3. See generated OAUTH_CONFIG_FILE file
""")
    try:
        proceed = raw_input("Proceed? (y/n) ")
        if proceed.lower().startswith("y"):
            webbrowser.open(url)
        else:
            abort("Okay, bye.")
    except SyntaxError:
        abort("Okay, bye.")


@task
def oauth_step2(code):
    """Use fetched token to generate OAuth access token."""
    request_url = "https://public-api.wordpress.com/oauth2/token"
    params = {
        "client_id": WORDPRESS_CLIENT_ID,
        "client_secret": WORDPRESS_CLIENT_SECRET,
        "code": code,
        "redirect_uri": "http://swaroopch.com",
        "grant_type": "authorization_code",
    }

    response = requests.post(request_url, data=params)
    response.raise_for_status()
    response = response.json()

    print(response)

    with io.open(OAUTH_CONFIG_FILE, "w", encoding="utf-8") as output_file:
        json.dump(response, output_file, sort_keys=True, indent=2)


@task
def wp():
    """http://developer.wordpress.com/docs/api/"""
    if WORDPRESS_ENABLED:
        prepare()

        chapters = copy.deepcopy(CONFIG["MARKDOWN_FILES"])

        # header anchor id -> index in MARKDOWN_FILES
        all_headers = {}

        # Render html
        print("Rendering html")
        for (i, chapter) in enumerate(chapters):
            chapter_content = io.open(chapter["file"],
                                      "r",
                                      encoding="utf-8").read()
            chapter["html"] = markdown_to_html(
                chapter_content,
                upload_assets_to_s3=AWS_ENABLED)

            collect_header_anchors(chapter, i, all_headers)

            chapter["link"] = "{}/{}/{}".format(
                WORDPRESS_BASE_URL,
                WORDPRESS_PARENT_PAGE_SLUG,
                chapter["slug"])

        # Fix cross-links
        for chapter in chapters:
            fix_links_to_other_chapters(chapter, chapters, all_headers)

        # Add previous and next links at end of html
        for (i, chapter) in enumerate(chapters):
            add_previous_next_links(chapter, i, chapters)

        # Fetch list of pages on the server and determine which already exist
        existing_pages = _wordpress_get_pages()
        page_slug_to_id = dict([(i.get("slug"), i.get("ID"))
                                for i in existing_pages])

        for chapter in chapters:
            if chapter["slug"] in page_slug_to_id:
                chapter["page_id"] = page_slug_to_id[chapter["slug"]]

        # Send to WP
        print("Uploading to WordPress")
        for chapter in chapters:
            if chapter["slug"] in page_slug_to_id:
                print("Existing page: {}".format(chapter["link"]))
                assert wordpress_edit_page(chapter["page_id"],
                                           chapter["title"],
                                           chapter["html"])
            else:
                print("New page: {}".format(chapter["link"]))
                assert wordpress_new_page(chapter["slug"],
                                          chapter["title"],
                                          chapter["html"])


def _wordpress_headers():
    assert WORDPRESS_ENABLED

    return {
        "Authorization": "Bearer {}".format(OAUTH_CONFIG["access_token"]),
    }


@retry
def _wordpress_get_pages():
    url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/"
    url = url.format(OAUTH_CONFIG["blog_id"])

    offset = 0
    number = 100
    posts = []

    while True:
        print("offset = {}".format(offset))
        response = requests.get(url,
                                params={"context": "edit",
                                        "type": "page",
                                        "status": "publish",
                                        "number": number,
                                        # TODO Use a proper category instead
                                        "search": "python_en",
                                        "offset": offset},
                                headers=_wordpress_headers())
        response.raise_for_status()
        new_posts = response.json()["posts"]
        posts.extend(new_posts)
        if len(new_posts) < number:
            break
        offset += 100

    return posts


@retry
def wordpress_new_page(slug, title, content):
    """Create a new Wordpress page."""
    url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/new"
    url = url.format(OAUTH_CONFIG["blog_id"])

    response = requests.post(url,
                             data={"slug": slug,
                                   "title": title,
                                   "content": content,
                                   "parent": WORDPRESS_PARENT_PAGE_ID,
                                   "type": "page",
                                   # TODO Use a proper category instead
                                   "tags": [CONFIG["FULL_PROJECT_NAME"]],
                                   "comments_open": False,
                                   "pings_open": False,
                                   "publicize": False},
                             headers=_wordpress_headers())
    response.raise_for_status()
    return response.json()


@retry
def wordpress_edit_page(post_id, title, content):
    """Edit a Wordpress page."""
    url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/{}"
    url = url.format(OAUTH_CONFIG["blog_id"], post_id)

    response = requests.post(url,
                             data={"title": title,
                                   "content": content,
                                   # TODO Use a proper category instead
                                   "tags": [CONFIG["FULL_PROJECT_NAME"]]},
                             headers=_wordpress_headers())
    response.raise_for_status()
    return response.json()