fabfile.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import io
  5. import os
  6. import json
  7. os.environ["PYTHONIOENCODING"] = "utf-8"
  8. CONFIG_FILE = "config.json"
  9. CONFIG = json.load(io.open(CONFIG_FILE, "r", encoding="utf-8"))
  10. OAUTH_CONFIG_FILE = "oauth.json"
  11. OAUTH_CONFIG = None
  12. if os.path.exists(OAUTH_CONFIG_FILE):
  13. OAUTH_CONFIG = json.load(io.open(OAUTH_CONFIG_FILE, "r", encoding="utf-8"))
  14. ## NOTES
  15. ## 1. This assumes that you have already created the S3 bucket whose name
  16. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  17. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  18. ## above as SHORT_PROJECT_NAME.
  19. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  20. ## SHORT_PROJECT_NAME/assets.
  21. ##### Imports ####################################
  22. import datetime
  23. import subprocess
  24. import copy
  25. import webbrowser
  26. import urllib
  27. import time
  28. from functools import wraps
  29. import boto
  30. import boto.s3.bucket
  31. import boto.s3.key
  32. from bs4 import BeautifulSoup
  33. import requests
  34. from fabric.api import task, local
  35. from fabric.utils import abort
  36. import logging
  37. ##### Start with checks ##########################
  38. logging.basicConfig(level=logging.DEBUG)
  39. log = logging.getLogger(__name__)
  40. for chapter in CONFIG["MARKDOWN_FILES"]:
  41. assert (chapter["slug"].lower() == chapter["slug"]), \
  42. "Slug must be lower case : {}".format(chapter["slug"])
  43. if str(os.environ.get("AWS_ENABLED")).lower() == "false":
  44. AWS_ENABLED = False
  45. elif os.environ.get("AWS_ACCESS_KEY_ID") is not None \
  46. and len(os.environ["AWS_ACCESS_KEY_ID"]) > 0 \
  47. and os.environ.get("AWS_SECRET_ACCESS_KEY") is not None \
  48. and len(os.environ["AWS_SECRET_ACCESS_KEY"]) > 0 \
  49. and os.environ.get("AWS_S3_BUCKET_NAME") is not None \
  50. and len(os.environ["AWS_S3_BUCKET_NAME"]) > 0:
  51. AWS_ENABLED = True
  52. else:
  53. AWS_ENABLED = False
  54. print("NOTE: S3 uploading is disabled because of missing " +
  55. "AWS key environment variables.")
  56. # In my case, they are the same - "files.swaroopch.com"
  57. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  58. if AWS_ENABLED:
  59. S3_PUBLIC_URL = os.environ["AWS_S3_BUCKET_NAME"]
  60. #else
  61. #S3_PUBLIC_URL = "s3.amazonaws.com/{}".format(
  62. #os.environ["AWS_S3_BUCKET_NAME"])
  63. if OAUTH_CONFIG is not None:
  64. WORDPRESS_ENABLED = True
  65. WORDPRESS_CLIENT_ID = os.environ["WORDPRESS_CLIENT_ID"]
  66. WORDPRESS_CLIENT_SECRET = os.environ["WORDPRESS_CLIENT_SECRET"]
  67. WORDPRESS_PARENT_PAGE_ID = int(os.environ["WORDPRESS_PARENT_PAGE_ID"])
  68. WORDPRESS_PARENT_PAGE_SLUG = os.environ["WORDPRESS_PARENT_PAGE_SLUG"]
  69. WORDPRESS_BASE_URL = os.environ["WORDPRESS_BASE_URL"]
  70. else:
  71. WORDPRESS_ENABLED = False
  72. print("NOTE: Wordpress uploading is disabled because of " +
  73. "missing environment variables.")
  74. ##### Helper methods #############################
  75. def retry(f):
  76. @wraps(f)
  77. def wrapped_f(*args, **kwargs):
  78. MAX_ATTEMPTS = 5
  79. for attempt in range(1, MAX_ATTEMPTS + 1):
  80. try:
  81. return f(*args, **kwargs)
  82. except:
  83. log.exception("Attempt %s/%s failed : %s",
  84. attempt,
  85. MAX_ATTEMPTS,
  86. (args, kwargs))
  87. time.sleep(10 * attempt)
  88. log.critical("All %s attempts failed : %s",
  89. MAX_ATTEMPTS,
  90. (args, kwargs))
  91. return wrapped_f
  92. def _upload_to_s3(filename, key):
  93. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  94. conn = boto.connect_s3()
  95. b = boto.s3.bucket.Bucket(conn, os.environ["AWS_S3_BUCKET_NAME"])
  96. k = boto.s3.key.Key(b)
  97. k.key = key
  98. k.set_contents_from_filename(filename)
  99. k.set_acl("public-read")
  100. url = "http://{}/{}".format(S3_PUBLIC_URL, key)
  101. print("Uploaded to S3 : {}".format(url))
  102. return url
  103. def upload_output_to_s3(filename):
  104. key = "{}/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  105. filename.split("/")[-1])
  106. return _upload_to_s3(filename, key)
  107. def upload_asset_to_s3(filename):
  108. key = "{}/assets/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  109. filename.split("/")[-1])
  110. return _upload_to_s3(filename, key)
  111. def replace_images_with_s3_urls(text):
  112. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  113. soup = BeautifulSoup(text)
  114. for image in soup.find_all("img"):
  115. image["src"] = upload_asset_to_s3(image["src"])
  116. return str(soup)
  117. def markdown_to_html(source_text, upload_assets_to_s3=False):
  118. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  119. args = ["pandoc",
  120. "-f", "markdown",
  121. "-t", "html5"]
  122. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  123. output = p.communicate(source_text.encode("utf-8", "ignore"))[0]
  124. # http://wordpress.org/extend/plugins/raw-html/
  125. output = u"<!--raw-->\n" + \
  126. output.decode("utf-8", "ignore") + \
  127. u"\n<!--/raw-->"
  128. # NOTE: Also assumes that you have added the CSS from
  129. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  130. if upload_assets_to_s3:
  131. output = replace_images_with_s3_urls(output)
  132. return output.decode("utf-8", "ignore")
  133. def collect_header_anchors(chapter, i, all_headers):
  134. soup = BeautifulSoup(chapter["html"])
  135. for header in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
  136. if "id" in header.attrs:
  137. all_headers[header["id"]] = i
  138. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  139. """Fix links to other sections with Wordpress page URL."""
  140. soup = BeautifulSoup(chapter["html"])
  141. for link in soup.find_all("a"):
  142. if "href" in link.attrs:
  143. if link["href"].startswith("#"):
  144. header_id = link["href"][1:]
  145. assert header_id in all_headers, \
  146. "#{} does not exist, referred in {}".format(
  147. header_id, chapter["file"])
  148. other_chapter = chapters[all_headers[header_id]]
  149. link["href"] = "{}#{}".format(
  150. other_chapter["link"],
  151. header_id)
  152. chapter["html"] = unicode(soup)
  153. def add_previous_next_links(chapter, i, chapters):
  154. previous_link = None
  155. if i > 0:
  156. previous_link = chapters[i - 1]["link"]
  157. next_link = None
  158. if i < len(chapters) - 1:
  159. next_link = chapters[i + 1]["link"]
  160. if previous_link is not None or next_link is not None:
  161. chapter["html"] += u"\n"
  162. if previous_link is not None:
  163. chapter["html"] += u"""\
  164. <a href="{}">&lArr; Previous chapter</a>\
  165. """.format(previous_link)
  166. if previous_link is not None and next_link is not None:
  167. chapter["html"] += u"&nbsp;" * 5
  168. if next_link is not None:
  169. chapter["html"] += u"""\
  170. <a href="{}">Next chapter &rArr;</a>\
  171. """.format(next_link)
  172. ##### Tasks ######################################
  173. @task
  174. def prepare():
  175. frontpage = CONFIG["MARKDOWN_FILES"][0]
  176. content = io.open(frontpage["file"], "r", encoding="utf-8").read()
  177. # TODO Can I make this always go change the third line instead?
  178. # TODO And then go back and change it to "$$date$$" so that it
  179. # is not inadvertently committed to the git repo.
  180. today = unicode(datetime.datetime.now().strftime("%d %b %Y"))
  181. content = content.replace(u"$$date$$", today)
  182. with io.open(frontpage["file"], "w", encoding="utf-8") as output:
  183. output.write(content)
  184. @task
  185. def html():
  186. """HTML5 output."""
  187. prepare()
  188. args = ["pandoc",
  189. "-f", "markdown",
  190. "-t", "html5",
  191. "-o", "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  192. "-s",
  193. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  194. local(" ".join(args))
  195. local("open {}.html".format(CONFIG["FULL_PROJECT_NAME"]))
  196. @task
  197. def epub():
  198. """http://johnmacfarlane.net/pandoc/epub.html"""
  199. prepare()
  200. args = ["pandoc",
  201. "-f", "markdown",
  202. "-t", "epub",
  203. "-o", "{}.epub".format(CONFIG["FULL_PROJECT_NAME"])] + \
  204. [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  205. # TODO --epub-cover-image
  206. # TODO --epub-metadata
  207. # TODO --epub-stylesheet
  208. local(" ".join(args))
  209. if AWS_ENABLED:
  210. upload_output_to_s3("{}.epub".format(CONFIG["FULL_PROJECT_NAME"]))
  211. @task
  212. def pdf():
  213. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  214. prepare()
  215. args = ["pandoc",
  216. "-f", "markdown",
  217. # https://github.com/jgm/pandoc/issues/571
  218. #"-t", "pdf",
  219. "-o", "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  220. "-N",
  221. # https://github.com/jgm/pandoc/issues/600
  222. "-V", "papersize:\"a4paper\"",
  223. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  224. local(" ".join(args))
  225. if AWS_ENABLED:
  226. upload_output_to_s3("{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]))
  227. @task
  228. def clean():
  229. """Remove generated output files"""
  230. possible_outputs = (
  231. "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  232. "{}.epub".format(CONFIG["FULL_PROJECT_NAME"]),
  233. "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  234. )
  235. for filename in possible_outputs:
  236. if os.path.exists(filename):
  237. os.remove(filename)
  238. print("Removed {}".format(filename))
  239. @task
  240. def push():
  241. """Upload Wordpress, EPUB, PDF."""
  242. clean()
  243. wp()
  244. epub()
  245. pdf()
  246. ########## WordPress ##########
  247. ## http://developer.wordpress.com/docs/api/ ##
  248. @task
  249. def oauth_step1():
  250. """Fetch OAuth2 token.
  251. http://developer.wordpress.com/docs/oauth2/"""
  252. if os.path.exists(OAUTH_CONFIG_FILE):
  253. os.remove(OAUTH_CONFIG_FILE)
  254. request_url = "https://public-api.wordpress.com/oauth2/authorize"
  255. params = {
  256. "client_id": WORDPRESS_CLIENT_ID,
  257. "redirect_uri": "http://swaroopch.com",
  258. "response_type": "code",
  259. }
  260. url = "{}?{}".format(request_url, urllib.urlencode(params))
  261. print("""\
  262. 1. After authorization, it will redirect, for e.g.
  263. http://swaroopch.com/?code=8D1Gq1tLQy&state
  264. 2. Extract the code from the URL and run:
  265. fab oauth_step2:8D1Gq1tLQy
  266. 3. See generated OAUTH_CONFIG_FILE file
  267. """)
  268. try:
  269. proceed = raw_input("Proceed? (y/n) ")
  270. if proceed.lower().startswith("y"):
  271. webbrowser.open(url)
  272. else:
  273. abort("Okay, bye.")
  274. except SyntaxError:
  275. abort("Okay, bye.")
  276. @task
  277. def oauth_step2(code):
  278. """Use fetched token to generate OAuth access token."""
  279. request_url = "https://public-api.wordpress.com/oauth2/token"
  280. params = {
  281. "client_id": WORDPRESS_CLIENT_ID,
  282. "client_secret": WORDPRESS_CLIENT_SECRET,
  283. "code": code,
  284. "redirect_uri": "http://swaroopch.com",
  285. "grant_type": "authorization_code",
  286. }
  287. response = requests.post(request_url, data=params)
  288. response.raise_for_status()
  289. response = response.json()
  290. print(response)
  291. with io.open(OAUTH_CONFIG_FILE, "w", encoding="utf-8") as output_file:
  292. json.dump(response, output_file, sort_keys=True, indent=2)
  293. @task
  294. def wp():
  295. """http://developer.wordpress.com/docs/api/"""
  296. if WORDPRESS_ENABLED:
  297. prepare()
  298. chapters = copy.deepcopy(CONFIG["MARKDOWN_FILES"])
  299. # header anchor id -> index in MARKDOWN_FILES
  300. all_headers = {}
  301. # Render html
  302. print("Rendering html")
  303. for (i, chapter) in enumerate(chapters):
  304. chapter_content = io.open(chapter["file"],
  305. "r",
  306. encoding="utf-8").read()
  307. chapter["html"] = markdown_to_html(
  308. chapter_content,
  309. upload_assets_to_s3=AWS_ENABLED)
  310. collect_header_anchors(chapter, i, all_headers)
  311. chapter["link"] = "{}/{}/{}".format(
  312. WORDPRESS_BASE_URL,
  313. WORDPRESS_PARENT_PAGE_SLUG,
  314. chapter["slug"])
  315. # Fix cross-links
  316. for chapter in chapters:
  317. fix_links_to_other_chapters(chapter, chapters, all_headers)
  318. # Add previous and next links at end of html
  319. for (i, chapter) in enumerate(chapters):
  320. add_previous_next_links(chapter, i, chapters)
  321. # Fetch list of pages on the server and determine which already exist
  322. existing_pages = _wordpress_get_pages()
  323. page_slug_to_id = dict([(i.get("slug"), i.get("ID"))
  324. for i in existing_pages])
  325. for chapter in chapters:
  326. if chapter["slug"] in page_slug_to_id:
  327. chapter["page_id"] = page_slug_to_id[chapter["slug"]]
  328. # Send to WP
  329. print("Uploading to WordPress")
  330. for chapter in chapters:
  331. if chapter["slug"] in page_slug_to_id:
  332. print("Existing page: {}".format(chapter["link"]))
  333. assert wordpress_edit_page(chapter["page_id"],
  334. chapter["title"],
  335. chapter["html"])
  336. else:
  337. print("New page: {}".format(chapter["link"]))
  338. assert wordpress_new_page(chapter["slug"],
  339. chapter["title"],
  340. chapter["html"])
  341. def _wordpress_headers():
  342. assert WORDPRESS_ENABLED
  343. return {
  344. "Authorization": "Bearer {}".format(OAUTH_CONFIG["access_token"]),
  345. }
  346. @retry
  347. def _wordpress_get_pages():
  348. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/"
  349. url = url.format(OAUTH_CONFIG["blog_id"])
  350. offset = 0
  351. number = 100
  352. posts = []
  353. while True:
  354. print("offset = {}".format(offset))
  355. response = requests.get(url,
  356. params={"context": "edit",
  357. "type": "page",
  358. "status": "publish",
  359. "number": number,
  360. # TODO Use a proper category instead
  361. "search": "python_en",
  362. "offset": offset},
  363. headers=_wordpress_headers())
  364. response.raise_for_status()
  365. new_posts = response.json()["posts"]
  366. posts.extend(new_posts)
  367. if len(new_posts) < number:
  368. break
  369. offset += 100
  370. return posts
  371. @retry
  372. def wordpress_new_page(slug, title, content):
  373. """Create a new Wordpress page."""
  374. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/new"
  375. url = url.format(OAUTH_CONFIG["blog_id"])
  376. response = requests.post(url,
  377. data={"slug": slug,
  378. "title": title,
  379. "content": content,
  380. "parent": WORDPRESS_PARENT_PAGE_ID,
  381. "type": "page",
  382. # TODO Use a proper category instead
  383. "tags": [CONFIG["FULL_PROJECT_NAME"]],
  384. "comments_open": False,
  385. "pings_open": False,
  386. "publicize": False},
  387. headers=_wordpress_headers())
  388. response.raise_for_status()
  389. return response.json()
  390. @retry
  391. def wordpress_edit_page(post_id, title, content):
  392. """Edit a Wordpress page."""
  393. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/{}"
  394. url = url.format(OAUTH_CONFIG["blog_id"], post_id)
  395. response = requests.post(url,
  396. data={"title": title,
  397. "content": content,
  398. # TODO Use a proper category instead
  399. "tags": [CONFIG["FULL_PROJECT_NAME"]]},
  400. headers=_wordpress_headers())
  401. response.raise_for_status()
  402. return response.json()