fabfile.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import codecs
  5. import os
  6. import json
  7. os.environ["PYTHONIOENCODING"] = "utf-8"
  8. CONFIG_FILE = "config.json"
  9. CONFIG = json.load(codecs.open(CONFIG_FILE, "r", "utf-8"))
  10. OAUTH_CONFIG_FILE = "oauth.json"
  11. OAUTH_CONFIG = None
  12. if os.path.exists(OAUTH_CONFIG_FILE):
  13. OAUTH_CONFIG = json.load(codecs.open(OAUTH_CONFIG_FILE, "r", "utf-8"))
  14. ## NOTES
  15. ## 1. This assumes that you have already created the S3 bucket whose name
  16. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  17. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  18. ## above as SHORT_PROJECT_NAME.
  19. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  20. ## SHORT_PROJECT_NAME/assets.
  21. ##### Imports ####################################
  22. import datetime
  23. import subprocess
  24. import copy
  25. import webbrowser
  26. import urllib
  27. import boto
  28. import boto.s3.bucket
  29. import boto.s3.key
  30. from bs4 import BeautifulSoup
  31. import requests
  32. from fabric.api import task, local
  33. from fabric.utils import abort
  34. ##### Start with checks ##########################
  35. for chapter in CONFIG["MARKDOWN_FILES"]:
  36. assert (chapter["slug"].lower() == chapter["slug"]), \
  37. "Slug must be lower case : {}".format(chapter["slug"])
  38. if str(os.environ.get("AWS_ENABLED")).lower() == "false":
  39. AWS_ENABLED = False
  40. elif os.environ.get("AWS_ACCESS_KEY_ID") is not None \
  41. and len(os.environ["AWS_ACCESS_KEY_ID"]) > 0 \
  42. and os.environ.get("AWS_SECRET_ACCESS_KEY") is not None \
  43. and len(os.environ["AWS_SECRET_ACCESS_KEY"]) > 0 \
  44. and os.environ.get("AWS_S3_BUCKET_NAME") is not None \
  45. and len(os.environ["AWS_S3_BUCKET_NAME"]) > 0:
  46. AWS_ENABLED = True
  47. else:
  48. AWS_ENABLED = False
  49. print("NOTE: S3 uploading is disabled because of missing " +
  50. "AWS key environment variables.")
  51. # In my case, they are the same - "files.swaroopch.com"
  52. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  53. if AWS_ENABLED:
  54. S3_PUBLIC_URL = os.environ["AWS_S3_BUCKET_NAME"]
  55. #else
  56. #S3_PUBLIC_URL = "s3.amazonaws.com/{}".format(
  57. #os.environ["AWS_S3_BUCKET_NAME"])
  58. if OAUTH_CONFIG is not None:
  59. WORDPRESS_ENABLED = True
  60. WORDPRESS_CLIENT_ID = os.environ["WORDPRESS_CLIENT_ID"]
  61. WORDPRESS_CLIENT_SECRET = os.environ["WORDPRESS_CLIENT_SECRET"]
  62. WORDPRESS_PARENT_PAGE_ID = int(os.environ["WORDPRESS_PARENT_PAGE_ID"])
  63. WORDPRESS_PARENT_PAGE_SLUG = os.environ["WORDPRESS_PARENT_PAGE_SLUG"]
  64. WORDPRESS_BASE_URL = os.environ["WORDPRESS_BASE_URL"]
  65. else:
  66. WORDPRESS_ENABLED = False
  67. print("NOTE: Wordpress uploading is disabled because of " +
  68. "missing environment variables.")
  69. ##### Helper methods #############################
  70. def _upload_to_s3(filename, key):
  71. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  72. conn = boto.connect_s3()
  73. b = boto.s3.bucket.Bucket(conn, os.environ["AWS_S3_BUCKET_NAME"])
  74. k = boto.s3.key.Key(b)
  75. k.key = key
  76. k.set_contents_from_filename(filename)
  77. k.set_acl("public-read")
  78. url = "http://{}/{}".format(S3_PUBLIC_URL, key)
  79. print("Uploaded to S3 : {}".format(url))
  80. return url
  81. def upload_output_to_s3(filename):
  82. key = "{}/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  83. filename.split("/")[-1])
  84. return _upload_to_s3(filename, key)
  85. def upload_asset_to_s3(filename):
  86. key = "{}/assets/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  87. filename.split("/")[-1])
  88. return _upload_to_s3(filename, key)
  89. def replace_images_with_s3_urls(text):
  90. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  91. soup = BeautifulSoup(text)
  92. for image in soup.find_all("img"):
  93. image["src"] = upload_asset_to_s3(image["src"])
  94. return str(soup)
  95. def markdown_to_html(source_text, upload_assets_to_s3=False):
  96. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  97. args = ["pandoc",
  98. "-f", "markdown",
  99. "-t", "html5"]
  100. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  101. output = p.communicate(source_text.encode("utf-8", "ignore"))[0]
  102. # http://wordpress.org/extend/plugins/raw-html/
  103. output = u"<!--raw-->\n" + \
  104. output.decode("utf-8", "ignore") + \
  105. u"\n<!--/raw-->"
  106. # NOTE: Also assumes that you have added the CSS from
  107. # `pandoc -S -t html5` to the `style.css` of your active Wordpress theme.
  108. if upload_assets_to_s3:
  109. output = replace_images_with_s3_urls(output)
  110. return output.decode("utf-8", "ignore")
  111. def collect_header_anchors(chapter, i, all_headers):
  112. soup = BeautifulSoup(chapter["html"])
  113. for header in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
  114. if "id" in header.attrs:
  115. all_headers[header["id"]] = i
  116. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  117. """Fix links to other sections with Wordpress page URL."""
  118. soup = BeautifulSoup(chapter["html"])
  119. for link in soup.find_all("a"):
  120. if "href" in link.attrs:
  121. if link["href"].startswith("#"):
  122. header_id = link["href"][1:]
  123. assert header_id in all_headers, \
  124. "#{} does not exist, referred in {}".format(
  125. header_id, chapter["file"])
  126. other_chapter = chapters[all_headers[header_id]]
  127. link["href"] = "{}#{}".format(
  128. other_chapter["link"],
  129. header_id)
  130. chapter["html"] = unicode(soup)
  131. def add_previous_next_links(chapter, i, chapters):
  132. previous_link = None
  133. if i > 0:
  134. previous_link = chapters[i - 1]["link"]
  135. next_link = None
  136. if i < len(chapters) - 1:
  137. next_link = chapters[i + 1]["link"]
  138. if previous_link is not None or next_link is not None:
  139. chapter["html"] += u"\n"
  140. if previous_link is not None:
  141. chapter["html"] += u"""\
  142. <a href="{}">&lArr; Previous chapter</a>\
  143. """.format(previous_link)
  144. if previous_link is not None and next_link is not None:
  145. chapter["html"] += u"&nbsp;" * 5
  146. if next_link is not None:
  147. chapter["html"] += u"""\
  148. <a href="{}">Next chapter &rArr;</a>\
  149. """.format(next_link)
  150. ##### Tasks ######################################
  151. @task
  152. def prepare():
  153. frontpage = CONFIG["MARKDOWN_FILES"][0]
  154. content = codecs.open(frontpage["file"], "r", "utf-8").read()
  155. # TODO Can I make this always go change the third line instead?
  156. # TODO And then go back and change it to "$$date$$" so that it
  157. # is not inadvertently committed to the git repo.
  158. today = unicode(datetime.datetime.now().strftime("%d %b %Y"))
  159. content = content.replace(u"$$date$$", today)
  160. with codecs.open(frontpage["file"], "w", "utf-8") as output:
  161. output.write(content)
  162. @task
  163. def html():
  164. """HTML5 output."""
  165. prepare()
  166. args = ["pandoc",
  167. "-f", "markdown",
  168. "-t", "html5",
  169. "-o", "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  170. "-s",
  171. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  172. local(" ".join(args))
  173. local("open {}.html".format(CONFIG["FULL_PROJECT_NAME"]))
  174. @task
  175. def epub():
  176. """http://johnmacfarlane.net/pandoc/epub.html"""
  177. prepare()
  178. args = ["pandoc",
  179. "-f", "markdown",
  180. "-t", "epub",
  181. "-o", "{}.epub".format(CONFIG["FULL_PROJECT_NAME"])] + \
  182. [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  183. # TODO --epub-cover-image
  184. # TODO --epub-metadata
  185. # TODO --epub-stylesheet
  186. local(" ".join(args))
  187. if AWS_ENABLED:
  188. upload_output_to_s3("{}.epub".format(CONFIG["FULL_PROJECT_NAME"]))
  189. @task
  190. def pdf():
  191. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  192. prepare()
  193. args = ["pandoc",
  194. "-f", "markdown",
  195. # https://github.com/jgm/pandoc/issues/571
  196. #"-t", "pdf",
  197. "-o", "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  198. "-N",
  199. # https://github.com/jgm/pandoc/issues/600
  200. "-V", "papersize:\"a4paper\"",
  201. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  202. local(" ".join(args))
  203. if AWS_ENABLED:
  204. upload_output_to_s3("{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]))
  205. @task
  206. def clean():
  207. """Remove generated output files"""
  208. possible_outputs = (
  209. "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  210. "{}.epub".format(CONFIG["FULL_PROJECT_NAME"]),
  211. "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  212. )
  213. for filename in possible_outputs:
  214. if os.path.exists(filename):
  215. os.remove(filename)
  216. print("Removed {}".format(filename))
  217. @task
  218. def push():
  219. """Upload Wordpress, EPUB, PDF."""
  220. clean()
  221. wp()
  222. epub()
  223. pdf()
  224. ########## WordPress ##########
  225. @task
  226. def oauth_step1():
  227. """Fetch OAuth2 token.
  228. http://developer.wordpress.com/docs/oauth2/"""
  229. if os.path.exists(OAUTH_CONFIG_FILE):
  230. os.remove(OAUTH_CONFIG_FILE)
  231. request_url = "https://public-api.wordpress.com/oauth2/authorize"
  232. params = {
  233. "client_id": WORDPRESS_CLIENT_ID,
  234. "redirect_uri": "http://swaroopch.com",
  235. "response_type": "code",
  236. }
  237. url = "{}?{}".format(request_url, urllib.urlencode(params))
  238. print("""\
  239. 1. After authorization, it will redirect, for e.g.
  240. http://swaroopch.com/?code=8D1Gq1tLQy&state
  241. 2. Extract the code from the URL and run:
  242. fab oauth_step2:8D1Gq1tLQy
  243. 3. See generated OAUTH_CONFIG_FILE file
  244. """)
  245. try:
  246. proceed = raw_input("Proceed? (y/n) ")
  247. if proceed.lower().startswith("y"):
  248. webbrowser.open(url)
  249. else:
  250. abort("Okay, bye.")
  251. except SyntaxError:
  252. abort("Okay, bye.")
  253. @task
  254. def oauth_step2(code):
  255. """Use fetched token to generate OAuth access token."""
  256. request_url = "https://public-api.wordpress.com/oauth2/token"
  257. params = {
  258. "client_id": WORDPRESS_CLIENT_ID,
  259. "client_secret": WORDPRESS_CLIENT_SECRET,
  260. "code": code,
  261. "redirect_uri": "http://swaroopch.com",
  262. "grant_type": "authorization_code",
  263. }
  264. response = requests.post(request_url, data=params)
  265. response.raise_for_status()
  266. response = response.json()
  267. print(response)
  268. with codecs.open(OAUTH_CONFIG_FILE, "w", "utf-8") as output_file:
  269. json.dump(response, output_file, sort_keys=True, indent=2)
  270. @task
  271. def wp():
  272. """http://developer.wordpress.com/docs/api/"""
  273. if WORDPRESS_ENABLED:
  274. prepare()
  275. chapters = copy.deepcopy(CONFIG["MARKDOWN_FILES"])
  276. # header anchor id -> index in MARKDOWN_FILES
  277. all_headers = {}
  278. # Render html
  279. print("Rendering html")
  280. for (i, chapter) in enumerate(chapters):
  281. chapter_content = codecs.open(chapter["file"], "r", "utf-8").read()
  282. chapter["html"] = markdown_to_html(
  283. chapter_content,
  284. upload_assets_to_s3=AWS_ENABLED)
  285. collect_header_anchors(chapter, i, all_headers)
  286. chapter["link"] = "{}/{}/{}".format(
  287. WORDPRESS_BASE_URL,
  288. WORDPRESS_PARENT_PAGE_SLUG,
  289. chapter["slug"])
  290. # Fix cross-links
  291. for chapter in chapters:
  292. fix_links_to_other_chapters(chapter, chapters, all_headers)
  293. # Add previous and next links at end of html
  294. for (i, chapter) in enumerate(chapters):
  295. add_previous_next_links(chapter, i, chapters)
  296. # Fetch list of pages on the server and determine which already exist
  297. existing_pages = _wordpress_get_pages()
  298. page_slug_to_id = dict([(i.get("slug"), i.get("ID"))
  299. for i in existing_pages])
  300. for chapter in chapters:
  301. if chapter["slug"] in page_slug_to_id:
  302. chapter["page_id"] = page_slug_to_id[chapter["slug"]]
  303. # Send to WP
  304. print("Uploading to WordPress")
  305. for chapter in chapters:
  306. if chapter["slug"] in page_slug_to_id:
  307. print("Existing page: {}".format(chapter["link"]))
  308. assert wordpress_edit_page(chapter["page_id"],
  309. chapter["title"],
  310. chapter["html"])
  311. else:
  312. print("New page: {}".format(chapter["link"]))
  313. assert wordpress_new_page(chapter["slug"],
  314. chapter["title"],
  315. chapter["html"])
  316. def _wordpress_headers():
  317. assert WORDPRESS_ENABLED
  318. return {
  319. "Authorization": "Bearer {}".format(OAUTH_CONFIG["access_token"]),
  320. }
  321. def _wordpress_get_pages():
  322. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/"
  323. url = url.format(OAUTH_CONFIG["blog_id"])
  324. offset = 0
  325. number = 100
  326. posts = []
  327. while True:
  328. print("offset = {}".format(offset))
  329. response = requests.get(url,
  330. params={"context": "edit",
  331. "type": "page",
  332. "status": "publish",
  333. "number": number,
  334. "offset": offset},
  335. # TODO "tag": CONFIG["FULL_PROJECT_NAME"]
  336. headers=_wordpress_headers())
  337. response.raise_for_status()
  338. new_posts = response.json()["posts"]
  339. posts.extend(new_posts)
  340. if len(new_posts) < number:
  341. break
  342. offset += 100
  343. return posts
  344. def wordpress_new_page(slug, title, content):
  345. """Create a new Wordpress page."""
  346. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/new"
  347. url = url.format(OAUTH_CONFIG["blog_id"])
  348. response = requests.post(url,
  349. data={"slug": slug,
  350. "title": title,
  351. "content": content,
  352. "parent": WORDPRESS_PARENT_PAGE_ID,
  353. "type": "page",
  354. "tags": [CONFIG["FULL_PROJECT_NAME"]],
  355. "comments_open": False,
  356. "pings_open": False,
  357. "publicize": False},
  358. headers=_wordpress_headers())
  359. response.raise_for_status()
  360. return response.json()
  361. def wordpress_edit_page(post_id, title, content):
  362. """Edit a Wordpress page."""
  363. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/{}"
  364. url = url.format(OAUTH_CONFIG["blog_id"], post_id)
  365. response = requests.post(url,
  366. data={"title": title,
  367. "content": content,
  368. "tags": [CONFIG["FULL_PROJECT_NAME"]]},
  369. headers=_wordpress_headers())
  370. response.raise_for_status()
  371. return response.json()