fabfile.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. ##### Configuration ##############################
  4. import io
  5. import os
  6. import json
  7. os.environ["PYTHONIOENCODING"] = "utf-8"
  8. CONFIG_FILE = "config.json"
  9. CONFIG = json.load(io.open(CONFIG_FILE, encoding="utf-8"))
  10. OAUTH_CONFIG_FILE = "oauth.json"
  11. OAUTH_CONFIG = None
  12. if os.path.exists(OAUTH_CONFIG_FILE):
  13. OAUTH_CONFIG = json.load(io.open(OAUTH_CONFIG_FILE, encoding="utf-8"))
  14. ## NOTES
  15. ## 1. This assumes that you have already created the S3 bucket whose name
  16. ## is stored in AWS_S3_BUCKET_NAME environment variable.
  17. ## 2. Under that S3 bucket, you have created a folder whose name is stored
  18. ## above as SHORT_PROJECT_NAME.
  19. ## 3. Under that S3 bucket, you have created a folder whose name is stored as
  20. ## SHORT_PROJECT_NAME/assets.
  21. ##### Imports ####################################
  22. import datetime
  23. import subprocess
  24. import copy
  25. import webbrowser
  26. import urllib
  27. import time
  28. from functools import wraps
  29. import boto
  30. import boto.s3.bucket
  31. import boto.s3.key
  32. from bs4 import BeautifulSoup
  33. import requests
  34. from fabric.api import task, local
  35. from fabric.utils import abort
  36. import logging
  37. ##### Start with checks ##########################
  38. logging.basicConfig(level=logging.DEBUG)
  39. log = logging.getLogger(__name__)
  40. for chapter in CONFIG["MARKDOWN_FILES"]:
  41. assert (chapter["slug"].lower() == chapter["slug"]), \
  42. "Slug must be lower case : {}".format(chapter["slug"])
  43. if str(os.environ.get("AWS_ENABLED")).lower() == "false":
  44. AWS_ENABLED = False
  45. elif os.environ.get("AWS_ACCESS_KEY_ID") is not None \
  46. and len(os.environ["AWS_ACCESS_KEY_ID"]) > 0 \
  47. and os.environ.get("AWS_SECRET_ACCESS_KEY") is not None \
  48. and len(os.environ["AWS_SECRET_ACCESS_KEY"]) > 0 \
  49. and os.environ.get("AWS_S3_BUCKET_NAME") is not None \
  50. and len(os.environ["AWS_S3_BUCKET_NAME"]) > 0:
  51. AWS_ENABLED = True
  52. else:
  53. AWS_ENABLED = False
  54. print("NOTE: S3 uploading is disabled because of missing " +
  55. "AWS key environment variables.")
  56. # In my case, they are the same - "files.swaroopch.com"
  57. # http://docs.amazonwebservices.com/AmazonS3/latest/dev/VirtualHosting.html#VirtualHostingCustomURLs
  58. if AWS_ENABLED:
  59. S3_PUBLIC_URL = os.environ["AWS_S3_BUCKET_NAME"]
  60. #else
  61. #S3_PUBLIC_URL = "s3.amazonaws.com/{}".format(
  62. #os.environ["AWS_S3_BUCKET_NAME"])
  63. if OAUTH_CONFIG is not None:
  64. WORDPRESS_ENABLED = True
  65. WORDPRESS_CLIENT_ID = os.environ["WORDPRESS_CLIENT_ID"]
  66. WORDPRESS_CLIENT_SECRET = os.environ["WORDPRESS_CLIENT_SECRET"]
  67. WORDPRESS_PARENT_PAGE_ID = int(os.environ["WORDPRESS_PARENT_PAGE_ID"])
  68. WORDPRESS_PARENT_PAGE_SLUG = os.environ["WORDPRESS_PARENT_PAGE_SLUG"]
  69. WORDPRESS_BASE_URL = os.environ["WORDPRESS_BASE_URL"]
  70. else:
  71. WORDPRESS_ENABLED = False
  72. print("NOTE: Wordpress uploading is disabled because of " +
  73. "missing environment variables.")
  74. ##### Helper methods #############################
  75. def retry(f):
  76. @wraps(f)
  77. def wrapped_f(*args, **kwargs):
  78. MAX_ATTEMPTS = 5
  79. for attempt in range(1, MAX_ATTEMPTS + 1):
  80. try:
  81. return f(*args, **kwargs)
  82. except:
  83. log.exception("Attempt %s/%s failed : %s",
  84. attempt,
  85. MAX_ATTEMPTS,
  86. (args, kwargs))
  87. time.sleep(10 * attempt)
  88. log.critical("All %s attempts failed : %s",
  89. MAX_ATTEMPTS,
  90. (args, kwargs))
  91. return wrapped_f
  92. def _upload_to_s3(filename, key):
  93. """http://docs.pythonboto.org/en/latest/s3_tut.html#storing-data"""
  94. conn = boto.connect_s3()
  95. b = boto.s3.bucket.Bucket(conn, os.environ["AWS_S3_BUCKET_NAME"])
  96. k = boto.s3.key.Key(b)
  97. k.key = key
  98. k.set_contents_from_filename(filename)
  99. k.set_acl("public-read")
  100. url = "http://{}/{}".format(S3_PUBLIC_URL, key)
  101. print("Uploaded to S3 : {}".format(url))
  102. return url
  103. def upload_output_to_s3(filename):
  104. key = "{}/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  105. filename.split("/")[-1])
  106. return _upload_to_s3(filename, key)
  107. def upload_asset_to_s3(filename):
  108. key = "{}/assets/{}".format(CONFIG["SHORT_PROJECT_NAME"],
  109. filename.split("/")[-1])
  110. return _upload_to_s3(filename, key)
  111. def replace_images_with_s3_urls(text):
  112. """http://www.crummy.com/software/BeautifulSoup/bs4/doc/"""
  113. soup = BeautifulSoup(text)
  114. for image in soup.find_all("img"):
  115. image["src"] = upload_asset_to_s3(image["src"])
  116. return str(soup)
  117. def markdown_to_html(source_text, upload_assets_to_s3=False):
  118. """Convert from Markdown to HTML; optional: upload images, etc. to S3."""
  119. args = ["pandoc",
  120. "-f", "markdown",
  121. "-t", "html5"]
  122. p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  123. output = p.communicate(source_text.encode("utf-8", "ignore"))[0]
  124. # http://wordpress.org/extend/plugins/raw-html/
  125. output = u"<!--raw-->\n" + \
  126. output.decode("utf-8", "ignore") + \
  127. u"\n<!--/raw-->"
  128. # NOTE: Also assumes that you have added the CSS from
  129. # `pandoc -t html5` to the `style.css` of your active Wordpress theme.
  130. if upload_assets_to_s3:
  131. output = replace_images_with_s3_urls(output)
  132. return output.decode("utf-8", "ignore")
  133. def collect_header_anchors(chapter, i, all_headers):
  134. soup = BeautifulSoup(chapter["html"])
  135. for header in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
  136. if "id" in header.attrs:
  137. all_headers[header["id"]] = i
  138. def fix_links_to_other_chapters(chapter, chapters, all_headers):
  139. """Fix links to other sections with Wordpress page URL."""
  140. soup = BeautifulSoup(chapter["html"])
  141. for link in soup.find_all("a"):
  142. if "href" in link.attrs:
  143. if link["href"].startswith("#"):
  144. header_id = link["href"][1:]
  145. assert header_id in all_headers, \
  146. "#{} does not exist, referred in {}".format(
  147. header_id, chapter["file"])
  148. other_chapter = chapters[all_headers[header_id]]
  149. link["href"] = "{}#{}".format(
  150. other_chapter["link"],
  151. header_id)
  152. chapter["html"] = unicode(soup)
  153. def add_previous_next_links(chapter, i, chapters):
  154. previous_link = None
  155. if i > 0:
  156. previous_link = chapters[i - 1]["link"]
  157. next_link = None
  158. if i < len(chapters) - 1:
  159. next_link = chapters[i + 1]["link"]
  160. if previous_link is not None or next_link is not None:
  161. chapter["html"] += u"\n"
  162. if previous_link is not None:
  163. chapter["html"] += u"""\
  164. <a href="{}">&lArr; Previous chapter</a>\
  165. """.format(previous_link)
  166. if previous_link is not None and next_link is not None:
  167. chapter["html"] += u"&nbsp;" * 5
  168. if next_link is not None:
  169. chapter["html"] += u"""\
  170. <a href="{}">Next chapter &rArr;</a>\
  171. """.format(next_link)
  172. ##### Tasks ######################################
  173. @task
  174. def prepare():
  175. frontpage = CONFIG["MARKDOWN_FILES"][0]
  176. content = io.open(frontpage["file"], encoding="utf-8").read()
  177. # TODO Can I make this always go change the third line instead?
  178. # TODO And then go back and change it to "$$date$$" so that it
  179. # is not inadvertently committed to the git repo.
  180. today = unicode(datetime.datetime.now().strftime("%d %b %Y"))
  181. content = content.replace(u"$$date$$", today)
  182. with io.open(frontpage["file"], "wt", encoding="utf-8") as output:
  183. output.write(content)
  184. @task
  185. def html():
  186. """HTML5 output."""
  187. prepare()
  188. args = ["pandoc",
  189. "-f", "markdown",
  190. "-t", "html5",
  191. "-o", "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  192. "-s",
  193. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  194. local(" ".join(args))
  195. html_file = "file://" + os.path.abspath(
  196. "{}.html".format(CONFIG["FULL_PROJECT_NAME"]))
  197. print(html_file)
  198. webbrowser.open(html_file)
  199. @task
  200. def epub():
  201. """http://johnmacfarlane.net/pandoc/epub.html"""
  202. prepare()
  203. args = ["pandoc",
  204. "-f", "markdown",
  205. "-t", "epub",
  206. "-o", "{}.epub".format(CONFIG["FULL_PROJECT_NAME"])] + \
  207. [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  208. # TODO --epub-cover-image
  209. # TODO --epub-metadata
  210. # TODO --epub-stylesheet
  211. local(" ".join(args))
  212. if AWS_ENABLED:
  213. upload_output_to_s3("{}.epub".format(CONFIG["FULL_PROJECT_NAME"]))
  214. @task
  215. def pdf():
  216. """http://johnmacfarlane.net/pandoc/README.html#creating-a-pdf"""
  217. prepare()
  218. args = ["pandoc",
  219. "-f", "markdown",
  220. # https://github.com/jgm/pandoc/issues/571
  221. #"-t", "pdf",
  222. "-o", "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  223. "-N",
  224. # https://github.com/jgm/pandoc/issues/600
  225. "-V", "papersize:\"a4paper\"",
  226. "--toc"] + [i["file"] for i in CONFIG["MARKDOWN_FILES"]]
  227. local(" ".join(args))
  228. if AWS_ENABLED:
  229. upload_output_to_s3("{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]))
  230. @task
  231. def clean():
  232. """Remove generated output files"""
  233. possible_outputs = (
  234. "{}.html".format(CONFIG["FULL_PROJECT_NAME"]),
  235. "{}.epub".format(CONFIG["FULL_PROJECT_NAME"]),
  236. "{}.pdf".format(CONFIG["FULL_PROJECT_NAME"]),
  237. )
  238. for filename in possible_outputs:
  239. if os.path.exists(filename):
  240. os.remove(filename)
  241. print("Removed {}".format(filename))
  242. @task
  243. def push():
  244. """Upload Wordpress, EPUB, PDF."""
  245. clean()
  246. wp()
  247. epub()
  248. pdf()
  249. ########## WordPress ##########
  250. ## http://developer.wordpress.com/docs/api/ ##
  251. @task
  252. def oauth_step1():
  253. """Fetch OAuth2 token.
  254. http://developer.wordpress.com/docs/oauth2/"""
  255. if os.path.exists(OAUTH_CONFIG_FILE):
  256. os.remove(OAUTH_CONFIG_FILE)
  257. request_url = "https://public-api.wordpress.com/oauth2/authorize"
  258. params = {
  259. "client_id": WORDPRESS_CLIENT_ID,
  260. "redirect_uri": "http://swaroopch.com",
  261. "response_type": "code",
  262. }
  263. url = "{}?{}".format(request_url, urllib.urlencode(params))
  264. print("""\
  265. 1. After authorization, it will redirect, for e.g.
  266. http://swaroopch.com/?code=8D1Gq1tLQy&state
  267. 2. Extract the code from the URL and run:
  268. fab oauth_step2:8D1Gq1tLQy
  269. 3. See generated OAUTH_CONFIG_FILE file
  270. """)
  271. try:
  272. proceed = raw_input("Proceed? (y/n) ")
  273. if proceed.lower().startswith("y"):
  274. webbrowser.open(url)
  275. else:
  276. abort("Okay, bye.")
  277. except SyntaxError:
  278. abort("Okay, bye.")
  279. @task
  280. def oauth_step2(code):
  281. """Use fetched token to generate OAuth access token."""
  282. request_url = "https://public-api.wordpress.com/oauth2/token"
  283. params = {
  284. "client_id": WORDPRESS_CLIENT_ID,
  285. "client_secret": WORDPRESS_CLIENT_SECRET,
  286. "code": code,
  287. "redirect_uri": "http://swaroopch.com",
  288. "grant_type": "authorization_code",
  289. }
  290. response = requests.post(request_url, data=params)
  291. response.raise_for_status()
  292. response = response.json()
  293. print(response)
  294. with io.open(OAUTH_CONFIG_FILE, "wt", encoding="utf-8") as output_file:
  295. json.dump(response, output_file, sort_keys=True, indent=2)
  296. @task
  297. def wp():
  298. """http://developer.wordpress.com/docs/api/"""
  299. if WORDPRESS_ENABLED:
  300. prepare()
  301. chapters = copy.deepcopy(CONFIG["MARKDOWN_FILES"])
  302. # header anchor id -> index in MARKDOWN_FILES
  303. all_headers = {}
  304. # Render html
  305. print("Rendering html")
  306. for (i, chapter) in enumerate(chapters):
  307. chapter_content = io.open(chapter["file"],
  308. encoding="utf-8").read()
  309. chapter["html"] = markdown_to_html(
  310. chapter_content,
  311. upload_assets_to_s3=AWS_ENABLED)
  312. collect_header_anchors(chapter, i, all_headers)
  313. chapter["link"] = "{}/{}/{}".format(
  314. WORDPRESS_BASE_URL,
  315. WORDPRESS_PARENT_PAGE_SLUG,
  316. chapter["slug"])
  317. # Fix cross-links
  318. for chapter in chapters:
  319. fix_links_to_other_chapters(chapter, chapters, all_headers)
  320. # Add previous and next links at end of html
  321. for (i, chapter) in enumerate(chapters):
  322. add_previous_next_links(chapter, i, chapters)
  323. # Fetch list of pages on the server and determine which already exist
  324. existing_pages = _wordpress_get_pages()
  325. page_slug_to_id = dict([(i.get("slug"), i.get("ID"))
  326. for i in existing_pages])
  327. for chapter in chapters:
  328. if chapter["slug"] in page_slug_to_id:
  329. chapter["page_id"] = page_slug_to_id[chapter["slug"]]
  330. # Send to WP
  331. print("Uploading to WordPress")
  332. for chapter in chapters:
  333. if chapter["slug"] in page_slug_to_id:
  334. print("Existing page: {}".format(chapter["link"]))
  335. assert wordpress_edit_page(chapter["page_id"],
  336. chapter["title"],
  337. chapter["html"])
  338. else:
  339. print("New page: {}".format(chapter["link"]))
  340. assert wordpress_new_page(chapter["slug"],
  341. chapter["title"],
  342. chapter["html"])
  343. def _wordpress_headers():
  344. assert WORDPRESS_ENABLED
  345. return {
  346. "Authorization": "Bearer {}".format(OAUTH_CONFIG["access_token"]),
  347. }
  348. @retry
  349. def _wordpress_get_pages():
  350. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/"
  351. url = url.format(OAUTH_CONFIG["blog_id"])
  352. offset = 0
  353. number = 100
  354. posts = []
  355. while True:
  356. print("offset = {}".format(offset))
  357. response = requests.get(url,
  358. params={"context": "edit",
  359. "type": "page",
  360. "status": "publish",
  361. "number": number,
  362. "search": CONFIG["BOOK_PAGES_SEARCH"],
  363. "offset": offset},
  364. headers=_wordpress_headers())
  365. response.raise_for_status()
  366. new_posts = response.json()["posts"]
  367. posts.extend(new_posts)
  368. if len(new_posts) < number:
  369. break
  370. offset += 100
  371. return posts
  372. @retry
  373. def wordpress_new_page(slug, title, content):
  374. """Create a new Wordpress page."""
  375. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/new"
  376. url = url.format(OAUTH_CONFIG["blog_id"])
  377. response = requests.post(url,
  378. data={"slug": slug,
  379. "title": title,
  380. "content": content,
  381. "parent": WORDPRESS_PARENT_PAGE_ID,
  382. "type": "page",
  383. "comments_open": False,
  384. "pings_open": False,
  385. "publicize": False},
  386. headers=_wordpress_headers())
  387. response.raise_for_status()
  388. return response.json()
  389. @retry
  390. def wordpress_edit_page(post_id, title, content):
  391. """Edit a Wordpress page."""
  392. url = "https://public-api.wordpress.com/rest/v1/sites/{}/posts/{}"
  393. url = url.format(OAUTH_CONFIG["blog_id"], post_id)
  394. response = requests.post(url,
  395. data={"title": title,
  396. "content": content},
  397. headers=_wordpress_headers())
  398. response.raise_for_status()
  399. return response.json()